aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-11-12 06:39:21 -0500
committerIngo Molnar <mingo@elte.hu>2008-11-12 06:39:21 -0500
commit708b8eae0fd532af73ea8350e6dcc10255ff7376 (patch)
treef336436934fd79bc91aff7112a9beb10bc4e839f /fs
parentd98d38f2014ab79f28c126ff175d034891f7aefc (diff)
parentf21f237cf55494c3a4209de323281a3b0528da10 (diff)
Merge branch 'linus' into core/locking
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile2
-rw-r--r--fs/autofs4/dev-ioctl.c5
-rw-r--r--fs/autofs4/expire.c19
-rw-r--r--fs/block_dev.c23
-rw-r--r--fs/cifs/CHANGES6
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/cifsproto.h2
-rw-r--r--fs/cifs/cifssmb.c2
-rw-r--r--fs/cifs/connect.c50
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/cifs/transport.c48
-rw-r--r--fs/coda/psdev.c2
-rw-r--r--fs/ecryptfs/crypto.c15
-rw-r--r--fs/ext3/super.c21
-rw-r--r--fs/ext4/balloc.c77
-rw-r--r--fs/ext4/ext4.h3
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c7
-rw-r--r--fs/ext4/mballoc.c1
-rw-r--r--fs/ext4/super.c35
-rw-r--r--fs/fat/Makefile6
-rw-r--r--fs/fat/cache.c25
-rw-r--r--fs/fat/dir.c20
-rw-r--r--fs/fat/fat.h329
-rw-r--r--fs/fat/fatent.c24
-rw-r--r--fs/fat/file.c49
-rw-r--r--fs/fat/inode.c133
-rw-r--r--fs/fat/misc.c155
-rw-r--r--fs/fat/namei_msdos.c (renamed from fs/msdos/namei.c)42
-rw-r--r--fs/fat/namei_vfat.c (renamed from fs/vfat/namei.c)161
-rw-r--r--fs/file_table.c4
-rw-r--r--fs/fuse/dev.c1
-rw-r--r--fs/inotify_user.c3
-rw-r--r--fs/jbd/checkpoint.c31
-rw-r--r--fs/jbd/transaction.c1
-rw-r--r--fs/jbd2/checkpoint.c32
-rw-r--r--fs/jbd2/commit.c8
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jffs2/background.c10
-rw-r--r--fs/jffs2/compr_lzo.c15
-rw-r--r--fs/jffs2/nodemgmt.c2
-rw-r--r--fs/libfs.c2
-rw-r--r--fs/lockd/svc4proc.c1
-rw-r--r--fs/lockd/svcproc.c1
-rw-r--r--fs/msdos/Makefile7
-rw-r--r--fs/nfs/inode.c13
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nfsd/vfs.c4
-rw-r--r--fs/ocfs2/file.c30
-rw-r--r--fs/ocfs2/inode.c6
-rw-r--r--fs/ocfs2/journal.c1
-rw-r--r--fs/ocfs2/mmap.c6
-rw-r--r--fs/ocfs2/namei.c8
-rw-r--r--fs/ocfs2/ocfs2.h3
-rw-r--r--fs/ocfs2/ocfs2_fs.h17
-rw-r--r--fs/ocfs2/xattr.c372
-rw-r--r--fs/ocfs2/xattr.h38
-rw-r--r--fs/pipe.c3
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/uptime.c38
-rw-r--r--fs/splice.c4
-rw-r--r--fs/vfat/Makefile7
-rw-r--r--fs/xfs/xfs_da_btree.c5
-rw-r--r--fs/xfs/xfs_dir2.c6
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_log.c39
-rw-r--r--fs/xfs/xfs_log_recover.c8
-rw-r--r--fs/xfs/xfs_mount.c5
69 files changed, 1328 insertions, 683 deletions
diff --git a/fs/Makefile b/fs/Makefile
index 2168c902d5ca..d9f8afe6f0c4 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -81,8 +81,6 @@ obj-$(CONFIG_HUGETLBFS) += hugetlbfs/
81obj-$(CONFIG_CODA_FS) += coda/ 81obj-$(CONFIG_CODA_FS) += coda/
82obj-$(CONFIG_MINIX_FS) += minix/ 82obj-$(CONFIG_MINIX_FS) += minix/
83obj-$(CONFIG_FAT_FS) += fat/ 83obj-$(CONFIG_FAT_FS) += fat/
84obj-$(CONFIG_MSDOS_FS) += msdos/
85obj-$(CONFIG_VFAT_FS) += vfat/
86obj-$(CONFIG_BFS_FS) += bfs/ 84obj-$(CONFIG_BFS_FS) += bfs/
87obj-$(CONFIG_ISO9660_FS) += isofs/ 85obj-$(CONFIG_ISO9660_FS) += isofs/
88obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ 86obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 625abf5422e2..33bf8cbfd051 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -128,9 +128,10 @@ static inline void free_dev_ioctl(struct autofs_dev_ioctl *param)
128 */ 128 */
129static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) 129static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
130{ 130{
131 int err = -EINVAL; 131 int err;
132 132
133 if (check_dev_ioctl_version(cmd, param)) { 133 err = check_dev_ioctl_version(cmd, param);
134 if (err) {
134 AUTOFS_WARN("invalid device control module version " 135 AUTOFS_WARN("invalid device control module version "
135 "supplied for cmd(0x%08x)", cmd); 136 "supplied for cmd(0x%08x)", cmd);
136 goto out; 137 goto out;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index cde2f8e8935a..4b6fb3f628c0 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -56,12 +56,23 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
56 mntget(mnt); 56 mntget(mnt);
57 dget(dentry); 57 dget(dentry);
58 58
59 if (!autofs4_follow_mount(&mnt, &dentry)) 59 if (!follow_down(&mnt, &dentry))
60 goto done; 60 goto done;
61 61
62 /* This is an autofs submount, we can't expire it */ 62 if (is_autofs4_dentry(dentry)) {
63 if (is_autofs4_dentry(dentry)) 63 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
64 goto done; 64
65 /* This is an autofs submount, we can't expire it */
66 if (sbi->type == AUTOFS_TYPE_INDIRECT)
67 goto done;
68
69 /*
70 * Otherwise it's an offset mount and we need to check
71 * if we can umount its mount, if there is one.
72 */
73 if (!d_mountpoint(dentry))
74 goto done;
75 }
65 76
66 /* Update the expiry counter if fs is busy */ 77 /* Update the expiry counter if fs is busy */
67 if (!may_umount_tree(mnt)) { 78 if (!may_umount_tree(mnt)) {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 88a776fa0ef6..db831efbdbbd 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -986,7 +986,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
986static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) 986static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
987{ 987{
988 struct gendisk *disk; 988 struct gendisk *disk;
989 struct hd_struct *part = NULL;
990 int ret; 989 int ret;
991 int partno; 990 int partno;
992 int perm = 0; 991 int perm = 0;
@@ -1004,24 +1003,25 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1004 return ret; 1003 return ret;
1005 } 1004 }
1006 1005
1007 ret = -ENXIO;
1008
1009 lock_kernel(); 1006 lock_kernel();
1010 1007
1008 ret = -ENXIO;
1011 disk = get_gendisk(bdev->bd_dev, &partno); 1009 disk = get_gendisk(bdev->bd_dev, &partno);
1012 if (!disk) 1010 if (!disk)
1013 goto out_unlock_kernel; 1011 goto out_unlock_kernel;
1014 part = disk_get_part(disk, partno);
1015 if (!part)
1016 goto out_unlock_kernel;
1017 1012
1018 mutex_lock_nested(&bdev->bd_mutex, for_part); 1013 mutex_lock_nested(&bdev->bd_mutex, for_part);
1019 if (!bdev->bd_openers) { 1014 if (!bdev->bd_openers) {
1020 bdev->bd_disk = disk; 1015 bdev->bd_disk = disk;
1021 bdev->bd_part = part;
1022 bdev->bd_contains = bdev; 1016 bdev->bd_contains = bdev;
1023 if (!partno) { 1017 if (!partno) {
1024 struct backing_dev_info *bdi; 1018 struct backing_dev_info *bdi;
1019
1020 ret = -ENXIO;
1021 bdev->bd_part = disk_get_part(disk, partno);
1022 if (!bdev->bd_part)
1023 goto out_clear;
1024
1025 if (disk->fops->open) { 1025 if (disk->fops->open) {
1026 ret = disk->fops->open(bdev, mode); 1026 ret = disk->fops->open(bdev, mode);
1027 if (ret) 1027 if (ret)
@@ -1049,18 +1049,17 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1049 bdev->bd_contains = whole; 1049 bdev->bd_contains = whole;
1050 bdev->bd_inode->i_data.backing_dev_info = 1050 bdev->bd_inode->i_data.backing_dev_info =
1051 whole->bd_inode->i_data.backing_dev_info; 1051 whole->bd_inode->i_data.backing_dev_info;
1052 bdev->bd_part = disk_get_part(disk, partno);
1052 if (!(disk->flags & GENHD_FL_UP) || 1053 if (!(disk->flags & GENHD_FL_UP) ||
1053 !part || !part->nr_sects) { 1054 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1054 ret = -ENXIO; 1055 ret = -ENXIO;
1055 goto out_clear; 1056 goto out_clear;
1056 } 1057 }
1057 bd_set_size(bdev, (loff_t)part->nr_sects << 9); 1058 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1058 } 1059 }
1059 } else { 1060 } else {
1060 disk_put_part(part);
1061 put_disk(disk); 1061 put_disk(disk);
1062 module_put(disk->fops->owner); 1062 module_put(disk->fops->owner);
1063 part = NULL;
1064 disk = NULL; 1063 disk = NULL;
1065 if (bdev->bd_contains == bdev) { 1064 if (bdev->bd_contains == bdev) {
1066 if (bdev->bd_disk->fops->open) { 1065 if (bdev->bd_disk->fops->open) {
@@ -1080,6 +1079,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1080 return 0; 1079 return 0;
1081 1080
1082 out_clear: 1081 out_clear:
1082 disk_put_part(bdev->bd_part);
1083 bdev->bd_disk = NULL; 1083 bdev->bd_disk = NULL;
1084 bdev->bd_part = NULL; 1084 bdev->bd_part = NULL;
1085 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1085 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
@@ -1091,7 +1091,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1091 out_unlock_kernel: 1091 out_unlock_kernel:
1092 unlock_kernel(); 1092 unlock_kernel();
1093 1093
1094 disk_put_part(part);
1095 if (disk) 1094 if (disk)
1096 module_put(disk->fops->owner); 1095 module_put(disk->fops->owner);
1097 put_disk(disk); 1096 put_disk(disk);
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 8f528ea24c48..8855331b2fba 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -4,7 +4,11 @@ Various fixes to make delete of open files behavior more predictable
4(when delete of an open file fails we mark the file as "delete-on-close" 4(when delete of an open file fails we mark the file as "delete-on-close"
5in a way that more servers accept, but only if we can first rename the 5in a way that more servers accept, but only if we can first rename the
6file to a temporary name). Add experimental support for more safely 6file to a temporary name). Add experimental support for more safely
7handling fcntl(F_SETLEASE). 7handling fcntl(F_SETLEASE). Convert cifs to using blocking tcp
8sends, and also let tcp autotune the socket send and receive buffers.
9This reduces the number of EAGAIN errors returned by TCP/IP in
10high stress workloads (and the number of retries on socket writes
11when sending large SMBWriteX requests).
8 12
9Version 1.54 13Version 1.54
10------------ 14------------
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index c791e5b5a914..1cb1189f24e0 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -141,6 +141,8 @@ struct TCP_Server_Info {
141 char versionMajor; 141 char versionMajor;
142 char versionMinor; 142 char versionMinor;
143 bool svlocal:1; /* local server or remote */ 143 bool svlocal:1; /* local server or remote */
144 bool noblocksnd; /* use blocking sendmsg */
145 bool noautotune; /* do not autotune send buf sizes */
144 atomic_t socketUseCount; /* number of open cifs sessions on socket */ 146 atomic_t socketUseCount; /* number of open cifs sessions on socket */
145 atomic_t inFlight; /* number of requests on the wire to server */ 147 atomic_t inFlight; /* number of requests on the wire to server */
146#ifdef CONFIG_CIFS_STATS2 148#ifdef CONFIG_CIFS_STATS2
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 0cff7fe986e8..6f21ecb85ce5 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -36,7 +36,7 @@ extern void cifs_buf_release(void *);
36extern struct smb_hdr *cifs_small_buf_get(void); 36extern struct smb_hdr *cifs_small_buf_get(void);
37extern void cifs_small_buf_release(void *); 37extern void cifs_small_buf_release(void *);
38extern int smb_send(struct socket *, struct smb_hdr *, 38extern int smb_send(struct socket *, struct smb_hdr *,
39 unsigned int /* length */ , struct sockaddr *); 39 unsigned int /* length */ , struct sockaddr *, bool);
40extern unsigned int _GetXid(void); 40extern unsigned int _GetXid(void);
41extern void _FreeXid(unsigned int); 41extern void _FreeXid(unsigned int);
42#define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current->fsuid)); 42#define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current->fsuid));
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 843a85fb8b9a..d5eac48fc415 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1536,7 +1536,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1536 __u32 bytes_sent; 1536 __u32 bytes_sent;
1537 __u16 byte_count; 1537 __u16 byte_count;
1538 1538
1539 /* cFYI(1,("write at %lld %d bytes",offset,count));*/ 1539 /* cFYI(1, ("write at %lld %d bytes", offset, count));*/
1540 if (tcon->ses == NULL) 1540 if (tcon->ses == NULL)
1541 return -ECONNABORTED; 1541 return -ECONNABORTED;
1542 1542
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 71b7661e2260..e9f9248cb3fe 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -92,6 +92,8 @@ struct smb_vol {
92 bool seal:1; /* request transport encryption on share */ 92 bool seal:1; /* request transport encryption on share */
93 bool nodfs:1; /* Do not request DFS, even if available */ 93 bool nodfs:1; /* Do not request DFS, even if available */
94 bool local_lease:1; /* check leases only on local system, not remote */ 94 bool local_lease:1; /* check leases only on local system, not remote */
95 bool noblocksnd:1;
96 bool noautotune:1;
95 unsigned int rsize; 97 unsigned int rsize;
96 unsigned int wsize; 98 unsigned int wsize;
97 unsigned int sockopt; 99 unsigned int sockopt;
@@ -102,9 +104,11 @@ struct smb_vol {
102static int ipv4_connect(struct sockaddr_in *psin_server, 104static int ipv4_connect(struct sockaddr_in *psin_server,
103 struct socket **csocket, 105 struct socket **csocket,
104 char *netb_name, 106 char *netb_name,
105 char *server_netb_name); 107 char *server_netb_name,
108 bool noblocksnd,
109 bool nosndbuf); /* ipv6 never set sndbuf size */
106static int ipv6_connect(struct sockaddr_in6 *psin_server, 110static int ipv6_connect(struct sockaddr_in6 *psin_server,
107 struct socket **csocket); 111 struct socket **csocket, bool noblocksnd);
108 112
109 113
110 /* 114 /*
@@ -191,12 +195,13 @@ cifs_reconnect(struct TCP_Server_Info *server)
191 try_to_freeze(); 195 try_to_freeze();
192 if (server->protocolType == IPV6) { 196 if (server->protocolType == IPV6) {
193 rc = ipv6_connect(&server->addr.sockAddr6, 197 rc = ipv6_connect(&server->addr.sockAddr6,
194 &server->ssocket); 198 &server->ssocket, server->noautotune);
195 } else { 199 } else {
196 rc = ipv4_connect(&server->addr.sockAddr, 200 rc = ipv4_connect(&server->addr.sockAddr,
197 &server->ssocket, 201 &server->ssocket,
198 server->workstation_RFC1001_name, 202 server->workstation_RFC1001_name,
199 server->server_RFC1001_name); 203 server->server_RFC1001_name,
204 server->noblocksnd, server->noautotune);
200 } 205 }
201 if (rc) { 206 if (rc) {
202 cFYI(1, ("reconnect error %d", rc)); 207 cFYI(1, ("reconnect error %d", rc));
@@ -1192,6 +1197,10 @@ cifs_parse_mount_options(char *options, const char *devname,
1192 /* ignore */ 1197 /* ignore */
1193 } else if (strnicmp(data, "rw", 2) == 0) { 1198 } else if (strnicmp(data, "rw", 2) == 0) {
1194 vol->rw = true; 1199 vol->rw = true;
1200 } else if (strnicmp(data, "noblocksend", 11) == 0) {
1201 vol->noblocksnd = 1;
1202 } else if (strnicmp(data, "noautotune", 10) == 0) {
1203 vol->noautotune = 1;
1195 } else if ((strnicmp(data, "suid", 4) == 0) || 1204 } else if ((strnicmp(data, "suid", 4) == 0) ||
1196 (strnicmp(data, "nosuid", 6) == 0) || 1205 (strnicmp(data, "nosuid", 6) == 0) ||
1197 (strnicmp(data, "exec", 4) == 0) || 1206 (strnicmp(data, "exec", 4) == 0) ||
@@ -1518,7 +1527,8 @@ static void rfc1002mangle(char *target, char *source, unsigned int length)
1518 1527
1519static int 1528static int
1520ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, 1529ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1521 char *netbios_name, char *target_name) 1530 char *netbios_name, char *target_name,
1531 bool noblocksnd, bool noautotune)
1522{ 1532{
1523 int rc = 0; 1533 int rc = 0;
1524 int connected = 0; 1534 int connected = 0;
@@ -1590,11 +1600,16 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1590 (*csocket)->sk->sk_sndbuf, 1600 (*csocket)->sk->sk_sndbuf,
1591 (*csocket)->sk->sk_rcvbuf, (*csocket)->sk->sk_rcvtimeo)); 1601 (*csocket)->sk->sk_rcvbuf, (*csocket)->sk->sk_rcvtimeo));
1592 (*csocket)->sk->sk_rcvtimeo = 7 * HZ; 1602 (*csocket)->sk->sk_rcvtimeo = 7 * HZ;
1603 if (!noblocksnd)
1604 (*csocket)->sk->sk_sndtimeo = 3 * HZ;
1605
1593 /* make the bufsizes depend on wsize/rsize and max requests */ 1606 /* make the bufsizes depend on wsize/rsize and max requests */
1594 if ((*csocket)->sk->sk_sndbuf < (200 * 1024)) 1607 if (noautotune) {
1595 (*csocket)->sk->sk_sndbuf = 200 * 1024; 1608 if ((*csocket)->sk->sk_sndbuf < (200 * 1024))
1596 if ((*csocket)->sk->sk_rcvbuf < (140 * 1024)) 1609 (*csocket)->sk->sk_sndbuf = 200 * 1024;
1597 (*csocket)->sk->sk_rcvbuf = 140 * 1024; 1610 if ((*csocket)->sk->sk_rcvbuf < (140 * 1024))
1611 (*csocket)->sk->sk_rcvbuf = 140 * 1024;
1612 }
1598 1613
1599 /* send RFC1001 sessinit */ 1614 /* send RFC1001 sessinit */
1600 if (psin_server->sin_port == htons(RFC1001_PORT)) { 1615 if (psin_server->sin_port == htons(RFC1001_PORT)) {
@@ -1631,7 +1646,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1631 /* sizeof RFC1002_SESSION_REQUEST with no scope */ 1646 /* sizeof RFC1002_SESSION_REQUEST with no scope */
1632 smb_buf->smb_buf_length = 0x81000044; 1647 smb_buf->smb_buf_length = 0x81000044;
1633 rc = smb_send(*csocket, smb_buf, 0x44, 1648 rc = smb_send(*csocket, smb_buf, 0x44,
1634 (struct sockaddr *)psin_server); 1649 (struct sockaddr *)psin_server, noblocksnd);
1635 kfree(ses_init_buf); 1650 kfree(ses_init_buf);
1636 msleep(1); /* RFC1001 layer in at least one server 1651 msleep(1); /* RFC1001 layer in at least one server
1637 requires very short break before negprot 1652 requires very short break before negprot
@@ -1651,7 +1666,8 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1651} 1666}
1652 1667
1653static int 1668static int
1654ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) 1669ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket,
1670 bool noblocksnd)
1655{ 1671{
1656 int rc = 0; 1672 int rc = 0;
1657 int connected = 0; 1673 int connected = 0;
@@ -1720,6 +1736,9 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
1720 the default. sock_setsockopt not used because it expects 1736 the default. sock_setsockopt not used because it expects
1721 user space buffer */ 1737 user space buffer */
1722 (*csocket)->sk->sk_rcvtimeo = 7 * HZ; 1738 (*csocket)->sk->sk_rcvtimeo = 7 * HZ;
1739 if (!noblocksnd)
1740 (*csocket)->sk->sk_sndtimeo = 3 * HZ;
1741
1723 1742
1724 return rc; 1743 return rc;
1725} 1744}
@@ -1983,11 +2002,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1983 cFYI(1, ("attempting ipv6 connect")); 2002 cFYI(1, ("attempting ipv6 connect"));
1984 /* BB should we allow ipv6 on port 139? */ 2003 /* BB should we allow ipv6 on port 139? */
1985 /* other OS never observed in Wild doing 139 with v6 */ 2004 /* other OS never observed in Wild doing 139 with v6 */
1986 rc = ipv6_connect(&sin_server6, &csocket); 2005 rc = ipv6_connect(&sin_server6, &csocket,
2006 volume_info.noblocksnd);
1987 } else 2007 } else
1988 rc = ipv4_connect(&sin_server, &csocket, 2008 rc = ipv4_connect(&sin_server, &csocket,
1989 volume_info.source_rfc1001_name, 2009 volume_info.source_rfc1001_name,
1990 volume_info.target_rfc1001_name); 2010 volume_info.target_rfc1001_name,
2011 volume_info.noblocksnd,
2012 volume_info.noautotune);
1991 if (rc < 0) { 2013 if (rc < 0) {
1992 cERROR(1, ("Error connecting to IPv4 socket. " 2014 cERROR(1, ("Error connecting to IPv4 socket. "
1993 "Aborting operation")); 2015 "Aborting operation"));
@@ -2002,6 +2024,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2002 sock_release(csocket); 2024 sock_release(csocket);
2003 goto out; 2025 goto out;
2004 } else { 2026 } else {
2027 srvTcp->noblocksnd = volume_info.noblocksnd;
2028 srvTcp->noautotune = volume_info.noautotune;
2005 memcpy(&srvTcp->addr.sockAddr, &sin_server, 2029 memcpy(&srvTcp->addr.sockAddr, &sin_server,
2006 sizeof(struct sockaddr_in)); 2030 sizeof(struct sockaddr_in));
2007 atomic_set(&srvTcp->inFlight, 0); 2031 atomic_set(&srvTcp->inFlight, 0);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 62d8bd8f14c0..ead1a3bb0256 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1824,7 +1824,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1824 pTcon = cifs_sb->tcon; 1824 pTcon = cifs_sb->tcon;
1825 1825
1826 pagevec_init(&lru_pvec, 0); 1826 pagevec_init(&lru_pvec, 0);
1827 cFYI(DBG2, ("rpages: num pages %d", num_pages)); 1827 cFYI(DBG2, ("rpages: num pages %d", num_pages));
1828 for (i = 0; i < num_pages; ) { 1828 for (i = 0; i < num_pages; ) {
1829 unsigned contig_pages; 1829 unsigned contig_pages;
1830 struct page *tmp_page; 1830 struct page *tmp_page;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index d54fa8aeaea9..ff8c68de4a92 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1361,9 +1361,11 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
1361 CIFS_MOUNT_MAP_SPECIAL_CHR); 1361 CIFS_MOUNT_MAP_SPECIAL_CHR);
1362 1362
1363 if (tmprc == 0 && (info_buf_source->UniqueId == 1363 if (tmprc == 0 && (info_buf_source->UniqueId ==
1364 info_buf_target->UniqueId)) 1364 info_buf_target->UniqueId)) {
1365 /* same file, POSIX says that this is a noop */ 1365 /* same file, POSIX says that this is a noop */
1366 rc = 0;
1366 goto cifs_rename_exit; 1367 goto cifs_rename_exit;
1368 }
1367 } /* else ... BB we could add the same check for Windows by 1369 } /* else ... BB we could add the same check for Windows by
1368 checking the UniqueId via FILE_INTERNAL_INFO */ 1370 checking the UniqueId via FILE_INTERNAL_INFO */
1369 1371
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index bf0e6d8e382a..ff8243a8fe3e 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -161,7 +161,7 @@ void DeleteTconOplockQEntries(struct cifsTconInfo *tcon)
161 161
162int 162int
163smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, 163smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
164 unsigned int smb_buf_length, struct sockaddr *sin) 164 unsigned int smb_buf_length, struct sockaddr *sin, bool noblocksnd)
165{ 165{
166 int rc = 0; 166 int rc = 0;
167 int i = 0; 167 int i = 0;
@@ -178,7 +178,10 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
178 smb_msg.msg_namelen = sizeof(struct sockaddr); 178 smb_msg.msg_namelen = sizeof(struct sockaddr);
179 smb_msg.msg_control = NULL; 179 smb_msg.msg_control = NULL;
180 smb_msg.msg_controllen = 0; 180 smb_msg.msg_controllen = 0;
181 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ 181 if (noblocksnd)
182 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
183 else
184 smb_msg.msg_flags = MSG_NOSIGNAL;
182 185
183 /* smb header is converted in header_assemble. bcc and rest of SMB word 186 /* smb header is converted in header_assemble. bcc and rest of SMB word
184 area, and byte area if necessary, is converted to littleendian in 187 area, and byte area if necessary, is converted to littleendian in
@@ -229,8 +232,8 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
229} 232}
230 233
231static int 234static int
232smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, 235smb_send2(struct TCP_Server_Info *server, struct kvec *iov, int n_vec,
233 struct sockaddr *sin) 236 struct sockaddr *sin, bool noblocksnd)
234{ 237{
235 int rc = 0; 238 int rc = 0;
236 int i = 0; 239 int i = 0;
@@ -240,6 +243,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
240 unsigned int total_len; 243 unsigned int total_len;
241 int first_vec = 0; 244 int first_vec = 0;
242 unsigned int smb_buf_length = smb_buffer->smb_buf_length; 245 unsigned int smb_buf_length = smb_buffer->smb_buf_length;
246 struct socket *ssocket = server->ssocket;
243 247
244 if (ssocket == NULL) 248 if (ssocket == NULL)
245 return -ENOTSOCK; /* BB eventually add reconnect code here */ 249 return -ENOTSOCK; /* BB eventually add reconnect code here */
@@ -248,7 +252,10 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
248 smb_msg.msg_namelen = sizeof(struct sockaddr); 252 smb_msg.msg_namelen = sizeof(struct sockaddr);
249 smb_msg.msg_control = NULL; 253 smb_msg.msg_control = NULL;
250 smb_msg.msg_controllen = 0; 254 smb_msg.msg_controllen = 0;
251 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ 255 if (noblocksnd)
256 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
257 else
258 smb_msg.msg_flags = MSG_NOSIGNAL;
252 259
253 /* smb header is converted in header_assemble. bcc and rest of SMB word 260 /* smb header is converted in header_assemble. bcc and rest of SMB word
254 area, and byte area if necessary, is converted to littleendian in 261 area, and byte area if necessary, is converted to littleendian in
@@ -283,8 +290,11 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
283 if (rc < 0) 290 if (rc < 0)
284 break; 291 break;
285 292
286 if (rc >= total_len) { 293 if (rc == total_len) {
287 WARN_ON(rc > total_len); 294 total_len = 0;
295 break;
296 } else if (rc > total_len) {
297 cERROR(1, ("sent %d requested %d", rc, total_len));
288 break; 298 break;
289 } 299 }
290 if (rc == 0) { 300 if (rc == 0) {
@@ -312,6 +322,16 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
312 i = 0; /* in case we get ENOSPC on the next send */ 322 i = 0; /* in case we get ENOSPC on the next send */
313 } 323 }
314 324
325 if ((total_len > 0) && (total_len != smb_buf_length + 4)) {
326 cFYI(1, ("partial send (%d remaining), terminating session",
327 total_len));
328 /* If we have only sent part of an SMB then the next SMB
329 could be taken as the remainder of this one. We need
330 to kill the socket so the server throws away the partial
331 SMB */
332 server->tcpStatus = CifsNeedReconnect;
333 }
334
315 if (rc < 0) { 335 if (rc < 0) {
316 cERROR(1, ("Error %d sending data on socket to server", rc)); 336 cERROR(1, ("Error %d sending data on socket to server", rc));
317 } else 337 } else
@@ -518,8 +538,9 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
518#ifdef CONFIG_CIFS_STATS2 538#ifdef CONFIG_CIFS_STATS2
519 atomic_inc(&ses->server->inSend); 539 atomic_inc(&ses->server->inSend);
520#endif 540#endif
521 rc = smb_send2(ses->server->ssocket, iov, n_vec, 541 rc = smb_send2(ses->server, iov, n_vec,
522 (struct sockaddr *) &(ses->server->addr.sockAddr)); 542 (struct sockaddr *) &(ses->server->addr.sockAddr),
543 ses->server->noblocksnd);
523#ifdef CONFIG_CIFS_STATS2 544#ifdef CONFIG_CIFS_STATS2
524 atomic_dec(&ses->server->inSend); 545 atomic_dec(&ses->server->inSend);
525 midQ->when_sent = jiffies; 546 midQ->when_sent = jiffies;
@@ -711,7 +732,8 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
711 atomic_inc(&ses->server->inSend); 732 atomic_inc(&ses->server->inSend);
712#endif 733#endif
713 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, 734 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length,
714 (struct sockaddr *) &(ses->server->addr.sockAddr)); 735 (struct sockaddr *) &(ses->server->addr.sockAddr),
736 ses->server->noblocksnd);
715#ifdef CONFIG_CIFS_STATS2 737#ifdef CONFIG_CIFS_STATS2
716 atomic_dec(&ses->server->inSend); 738 atomic_dec(&ses->server->inSend);
717 midQ->when_sent = jiffies; 739 midQ->when_sent = jiffies;
@@ -851,7 +873,8 @@ send_nt_cancel(struct cifsTconInfo *tcon, struct smb_hdr *in_buf,
851 return rc; 873 return rc;
852 } 874 }
853 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, 875 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length,
854 (struct sockaddr *) &(ses->server->addr.sockAddr)); 876 (struct sockaddr *) &(ses->server->addr.sockAddr),
877 ses->server->noblocksnd);
855 up(&ses->server->tcpSem); 878 up(&ses->server->tcpSem);
856 return rc; 879 return rc;
857} 880}
@@ -941,7 +964,8 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
941 atomic_inc(&ses->server->inSend); 964 atomic_inc(&ses->server->inSend);
942#endif 965#endif
943 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, 966 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length,
944 (struct sockaddr *) &(ses->server->addr.sockAddr)); 967 (struct sockaddr *) &(ses->server->addr.sockAddr),
968 ses->server->noblocksnd);
945#ifdef CONFIG_CIFS_STATS2 969#ifdef CONFIG_CIFS_STATS2
946 atomic_dec(&ses->server->inSend); 970 atomic_dec(&ses->server->inSend);
947 midQ->when_sent = jiffies; 971 midQ->when_sent = jiffies;
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index cfd29da714d1..0376ac66c44a 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -2,7 +2,7 @@
2 * An implementation of a loadable kernel mode driver providing 2 * An implementation of a loadable kernel mode driver providing
3 * multiple kernel/user space bidirectional communications links. 3 * multiple kernel/user space bidirectional communications links.
4 * 4 *
5 * Author: Alan Cox <alan@redhat.com> 5 * Author: Alan Cox <alan@lxorguk.ukuu.org.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 06db79d05c12..6046239465a1 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1251,6 +1251,7 @@ struct kmem_cache *ecryptfs_header_cache_2;
1251/** 1251/**
1252 * ecryptfs_write_headers_virt 1252 * ecryptfs_write_headers_virt
1253 * @page_virt: The virtual address to write the headers to 1253 * @page_virt: The virtual address to write the headers to
1254 * @max: The size of memory allocated at page_virt
1254 * @size: Set to the number of bytes written by this function 1255 * @size: Set to the number of bytes written by this function
1255 * @crypt_stat: The cryptographic context 1256 * @crypt_stat: The cryptographic context
1256 * @ecryptfs_dentry: The eCryptfs dentry 1257 * @ecryptfs_dentry: The eCryptfs dentry
@@ -1278,7 +1279,8 @@ struct kmem_cache *ecryptfs_header_cache_2;
1278 * 1279 *
1279 * Returns zero on success 1280 * Returns zero on success
1280 */ 1281 */
1281static int ecryptfs_write_headers_virt(char *page_virt, size_t *size, 1282static int ecryptfs_write_headers_virt(char *page_virt, size_t max,
1283 size_t *size,
1282 struct ecryptfs_crypt_stat *crypt_stat, 1284 struct ecryptfs_crypt_stat *crypt_stat,
1283 struct dentry *ecryptfs_dentry) 1285 struct dentry *ecryptfs_dentry)
1284{ 1286{
@@ -1296,7 +1298,7 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t *size,
1296 offset += written; 1298 offset += written;
1297 rc = ecryptfs_generate_key_packet_set((page_virt + offset), crypt_stat, 1299 rc = ecryptfs_generate_key_packet_set((page_virt + offset), crypt_stat,
1298 ecryptfs_dentry, &written, 1300 ecryptfs_dentry, &written,
1299 PAGE_CACHE_SIZE - offset); 1301 max - offset);
1300 if (rc) 1302 if (rc)
1301 ecryptfs_printk(KERN_WARNING, "Error generating key packet " 1303 ecryptfs_printk(KERN_WARNING, "Error generating key packet "
1302 "set; rc = [%d]\n", rc); 1304 "set; rc = [%d]\n", rc);
@@ -1368,14 +1370,14 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1368 goto out; 1370 goto out;
1369 } 1371 }
1370 /* Released in this function */ 1372 /* Released in this function */
1371 virt = kzalloc(crypt_stat->num_header_bytes_at_front, GFP_KERNEL); 1373 virt = (char *)get_zeroed_page(GFP_KERNEL);
1372 if (!virt) { 1374 if (!virt) {
1373 printk(KERN_ERR "%s: Out of memory\n", __func__); 1375 printk(KERN_ERR "%s: Out of memory\n", __func__);
1374 rc = -ENOMEM; 1376 rc = -ENOMEM;
1375 goto out; 1377 goto out;
1376 } 1378 }
1377 rc = ecryptfs_write_headers_virt(virt, &size, crypt_stat, 1379 rc = ecryptfs_write_headers_virt(virt, PAGE_CACHE_SIZE, &size,
1378 ecryptfs_dentry); 1380 crypt_stat, ecryptfs_dentry);
1379 if (unlikely(rc)) { 1381 if (unlikely(rc)) {
1380 printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n", 1382 printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n",
1381 __func__, rc); 1383 __func__, rc);
@@ -1393,8 +1395,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1393 goto out_free; 1395 goto out_free;
1394 } 1396 }
1395out_free: 1397out_free:
1396 memset(virt, 0, crypt_stat->num_header_bytes_at_front); 1398 free_page((unsigned long)virt);
1397 kfree(virt);
1398out: 1399out:
1399 return rc; 1400 return rc;
1400} 1401}
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 18eaa78ecb4e..5dec6d1356c4 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -281,7 +281,8 @@ void ext3_abort (struct super_block * sb, const char * function,
281 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 281 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
282 sb->s_flags |= MS_RDONLY; 282 sb->s_flags |= MS_RDONLY;
283 EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; 283 EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
284 journal_abort(EXT3_SB(sb)->s_journal, -EIO); 284 if (EXT3_SB(sb)->s_journal)
285 journal_abort(EXT3_SB(sb)->s_journal, -EIO);
285} 286}
286 287
287void ext3_warning (struct super_block * sb, const char * function, 288void ext3_warning (struct super_block * sb, const char * function,
@@ -390,11 +391,14 @@ static void ext3_put_super (struct super_block * sb)
390{ 391{
391 struct ext3_sb_info *sbi = EXT3_SB(sb); 392 struct ext3_sb_info *sbi = EXT3_SB(sb);
392 struct ext3_super_block *es = sbi->s_es; 393 struct ext3_super_block *es = sbi->s_es;
393 int i; 394 int i, err;
394 395
395 ext3_xattr_put_super(sb); 396 ext3_xattr_put_super(sb);
396 if (journal_destroy(sbi->s_journal) < 0) 397 err = journal_destroy(sbi->s_journal);
398 sbi->s_journal = NULL;
399 if (err < 0)
397 ext3_abort(sb, __func__, "Couldn't clean up the journal"); 400 ext3_abort(sb, __func__, "Couldn't clean up the journal");
401
398 if (!(sb->s_flags & MS_RDONLY)) { 402 if (!(sb->s_flags & MS_RDONLY)) {
399 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 403 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
400 es->s_state = cpu_to_le16(sbi->s_mount_state); 404 es->s_state = cpu_to_le16(sbi->s_mount_state);
@@ -2386,13 +2390,12 @@ static void ext3_write_super (struct super_block * sb)
2386 2390
2387static int ext3_sync_fs(struct super_block *sb, int wait) 2391static int ext3_sync_fs(struct super_block *sb, int wait)
2388{ 2392{
2389 tid_t target;
2390
2391 sb->s_dirt = 0; 2393 sb->s_dirt = 0;
2392 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { 2394 if (wait)
2393 if (wait) 2395 ext3_force_commit(sb);
2394 log_wait_commit(EXT3_SB(sb)->s_journal, target); 2396 else
2395 } 2397 journal_start_commit(EXT3_SB(sb)->s_journal, NULL);
2398
2396 return 0; 2399 return 0;
2397} 2400}
2398 2401
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index b9821be709bd..d2003cdc36aa 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -589,21 +589,23 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
589 return; 589 return;
590} 590}
591 591
592int ext4_claim_free_blocks(struct ext4_sb_info *sbi, 592/**
593 s64 nblocks) 593 * ext4_has_free_blocks()
594 * @sbi: in-core super block structure.
595 * @nblocks: number of needed blocks
596 *
597 * Check if filesystem has nblocks free & available for allocation.
598 * On success return 1, return 0 on failure.
599 */
600int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
594{ 601{
595 s64 free_blocks, dirty_blocks; 602 s64 free_blocks, dirty_blocks, root_blocks;
596 s64 root_blocks = 0;
597 struct percpu_counter *fbc = &sbi->s_freeblocks_counter; 603 struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
598 struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; 604 struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
599 605
600 free_blocks = percpu_counter_read_positive(fbc); 606 free_blocks = percpu_counter_read_positive(fbc);
601 dirty_blocks = percpu_counter_read_positive(dbc); 607 dirty_blocks = percpu_counter_read_positive(dbc);
602 608 root_blocks = ext4_r_blocks_count(sbi->s_es);
603 if (!capable(CAP_SYS_RESOURCE) &&
604 sbi->s_resuid != current->fsuid &&
605 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
606 root_blocks = ext4_r_blocks_count(sbi->s_es);
607 609
608 if (free_blocks - (nblocks + root_blocks + dirty_blocks) < 610 if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
609 EXT4_FREEBLOCKS_WATERMARK) { 611 EXT4_FREEBLOCKS_WATERMARK) {
@@ -616,57 +618,32 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
616 } 618 }
617 } 619 }
618 /* Check whether we have space after 620 /* Check whether we have space after
619 * accounting for current dirty blocks 621 * accounting for current dirty blocks & root reserved blocks.
620 */ 622 */
621 if (free_blocks < ((root_blocks + nblocks) + dirty_blocks)) 623 if (free_blocks >= ((root_blocks + nblocks) + dirty_blocks))
622 /* we don't have free space */ 624 return 1;
623 return -ENOSPC; 625
626 /* Hm, nope. Are (enough) root reserved blocks available? */
627 if (sbi->s_resuid == current->fsuid ||
628 ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
629 capable(CAP_SYS_RESOURCE)) {
630 if (free_blocks >= (nblocks + dirty_blocks))
631 return 1;
632 }
624 633
625 /* Add the blocks to nblocks */
626 percpu_counter_add(dbc, nblocks);
627 return 0; 634 return 0;
628} 635}
629 636
630/** 637int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
631 * ext4_has_free_blocks()
632 * @sbi: in-core super block structure.
633 * @nblocks: number of neeed blocks
634 *
635 * Check if filesystem has free blocks available for allocation.
636 * Return the number of blocks avaible for allocation for this request
637 * On success, return nblocks
638 */
639ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
640 s64 nblocks) 638 s64 nblocks)
641{ 639{
642 s64 free_blocks, dirty_blocks; 640 if (ext4_has_free_blocks(sbi, nblocks)) {
643 s64 root_blocks = 0; 641 percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks);
644 struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
645 struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
646
647 free_blocks = percpu_counter_read_positive(fbc);
648 dirty_blocks = percpu_counter_read_positive(dbc);
649
650 if (!capable(CAP_SYS_RESOURCE) &&
651 sbi->s_resuid != current->fsuid &&
652 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
653 root_blocks = ext4_r_blocks_count(sbi->s_es);
654
655 if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
656 EXT4_FREEBLOCKS_WATERMARK) {
657 free_blocks = percpu_counter_sum(fbc);
658 dirty_blocks = percpu_counter_sum(dbc);
659 }
660 if (free_blocks <= (root_blocks + dirty_blocks))
661 /* we don't have free space */
662 return 0; 642 return 0;
663 643 } else
664 if (free_blocks - (root_blocks + dirty_blocks) < nblocks) 644 return -ENOSPC;
665 return free_blocks - (root_blocks + dirty_blocks);
666 return nblocks;
667} 645}
668 646
669
670/** 647/**
671 * ext4_should_retry_alloc() 648 * ext4_should_retry_alloc()
672 * @sb: super block 649 * @sb: super block
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4880cc3e6727..b0537c827024 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1003,8 +1003,7 @@ extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
1003 ext4_lblk_t iblock, ext4_fsblk_t goal, 1003 ext4_lblk_t iblock, ext4_fsblk_t goal,
1004 unsigned long *count, int *errp); 1004 unsigned long *count, int *errp);
1005extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); 1005extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1006extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, 1006extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1007 s64 nblocks);
1008extern void ext4_free_blocks(handle_t *handle, struct inode *inode, 1007extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1009 ext4_fsblk_t block, unsigned long count, int metadata); 1008 ext4_fsblk_t block, unsigned long count, int metadata);
1010extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, 1009extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index fe34d74cfb19..2a117e286e54 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -718,6 +718,8 @@ got:
718 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 718 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
719 free = ext4_free_blocks_after_init(sb, group, gdp); 719 free = ext4_free_blocks_after_init(sb, group, gdp);
720 gdp->bg_free_blocks_count = cpu_to_le16(free); 720 gdp->bg_free_blocks_count = cpu_to_le16(free);
721 gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
722 gdp);
721 } 723 }
722 spin_unlock(sb_bgl_lock(sbi, group)); 724 spin_unlock(sb_bgl_lock(sbi, group));
723 725
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8dbf6953845b..be21a5ae33cb 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2329,6 +2329,8 @@ static int ext4_da_writepage(struct page *page,
2329 unlock_page(page); 2329 unlock_page(page);
2330 return 0; 2330 return 0;
2331 } 2331 }
2332 /* now mark the buffer_heads as dirty and uptodate */
2333 block_commit_write(page, 0, PAGE_CACHE_SIZE);
2332 } 2334 }
2333 2335
2334 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2336 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
@@ -4580,9 +4582,10 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
4580static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) 4582static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4581{ 4583{
4582 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) 4584 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
4583 return ext4_indirect_trans_blocks(inode, nrblocks, 0); 4585 return ext4_indirect_trans_blocks(inode, nrblocks, chunk);
4584 return ext4_ext_index_trans_blocks(inode, nrblocks, 0); 4586 return ext4_ext_index_trans_blocks(inode, nrblocks, chunk);
4585} 4587}
4588
4586/* 4589/*
4587 * Account for index blocks, block groups bitmaps and block group 4590 * Account for index blocks, block groups bitmaps and block group
4588 * descriptor blocks if modify datablocks and index blocks 4591 * descriptor blocks if modify datablocks and index blocks
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index dfe17a134052..444ad998f72e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4441,6 +4441,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4441 else if (block >= (entry->start_blk + entry->count)) 4441 else if (block >= (entry->start_blk + entry->count))
4442 n = &(*n)->rb_right; 4442 n = &(*n)->rb_right;
4443 else { 4443 else {
4444 ext4_unlock_group(sb, group);
4444 ext4_error(sb, __func__, 4445 ext4_error(sb, __func__,
4445 "Double free of blocks %d (%d %d)\n", 4446 "Double free of blocks %d (%d %d)\n",
4446 block, entry->start_blk, entry->count); 4447 block, entry->start_blk, entry->count);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index bdddea14e782..e4a241c65dbe 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -333,7 +333,8 @@ void ext4_abort(struct super_block *sb, const char *function,
333 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 333 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
334 sb->s_flags |= MS_RDONLY; 334 sb->s_flags |= MS_RDONLY;
335 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 335 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
336 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 336 if (EXT4_SB(sb)->s_journal)
337 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
337} 338}
338 339
339void ext4_warning(struct super_block *sb, const char *function, 340void ext4_warning(struct super_block *sb, const char *function,
@@ -442,14 +443,16 @@ static void ext4_put_super(struct super_block *sb)
442{ 443{
443 struct ext4_sb_info *sbi = EXT4_SB(sb); 444 struct ext4_sb_info *sbi = EXT4_SB(sb);
444 struct ext4_super_block *es = sbi->s_es; 445 struct ext4_super_block *es = sbi->s_es;
445 int i; 446 int i, err;
446 447
447 ext4_mb_release(sb); 448 ext4_mb_release(sb);
448 ext4_ext_release(sb); 449 ext4_ext_release(sb);
449 ext4_xattr_put_super(sb); 450 ext4_xattr_put_super(sb);
450 if (jbd2_journal_destroy(sbi->s_journal) < 0) 451 err = jbd2_journal_destroy(sbi->s_journal);
451 ext4_abort(sb, __func__, "Couldn't clean up the journal");
452 sbi->s_journal = NULL; 452 sbi->s_journal = NULL;
453 if (err < 0)
454 ext4_abort(sb, __func__, "Couldn't clean up the journal");
455
453 if (!(sb->s_flags & MS_RDONLY)) { 456 if (!(sb->s_flags & MS_RDONLY)) {
454 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 457 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
455 es->s_state = cpu_to_le16(sbi->s_mount_state); 458 es->s_state = cpu_to_le16(sbi->s_mount_state);
@@ -1455,9 +1458,8 @@ static int ext4_fill_flex_info(struct super_block *sb)
1455 1458
1456 /* We allocate both existing and potentially added groups */ 1459 /* We allocate both existing and potentially added groups */
1457 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1460 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1458 ((sbi->s_es->s_reserved_gdt_blocks +1 ) << 1461 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1459 EXT4_DESC_PER_BLOCK_BITS(sb))) / 1462 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
1460 groups_per_flex;
1461 sbi->s_flex_groups = kzalloc(flex_group_count * 1463 sbi->s_flex_groups = kzalloc(flex_group_count *
1462 sizeof(struct flex_groups), GFP_KERNEL); 1464 sizeof(struct flex_groups), GFP_KERNEL);
1463 if (sbi->s_flex_groups == NULL) { 1465 if (sbi->s_flex_groups == NULL) {
@@ -2882,12 +2884,9 @@ int ext4_force_commit(struct super_block *sb)
2882/* 2884/*
2883 * Ext4 always journals updates to the superblock itself, so we don't 2885 * Ext4 always journals updates to the superblock itself, so we don't
2884 * have to propagate any other updates to the superblock on disk at this 2886 * have to propagate any other updates to the superblock on disk at this
2885 * point. Just start an async writeback to get the buffers on their way 2887 * point. (We can probably nuke this function altogether, and remove
2886 * to the disk. 2888 * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...)
2887 *
2888 * This implicitly triggers the writebehind on sync().
2889 */ 2889 */
2890
2891static void ext4_write_super(struct super_block *sb) 2890static void ext4_write_super(struct super_block *sb)
2892{ 2891{
2893 if (mutex_trylock(&sb->s_lock) != 0) 2892 if (mutex_trylock(&sb->s_lock) != 0)
@@ -2897,15 +2896,15 @@ static void ext4_write_super(struct super_block *sb)
2897 2896
2898static int ext4_sync_fs(struct super_block *sb, int wait) 2897static int ext4_sync_fs(struct super_block *sb, int wait)
2899{ 2898{
2900 tid_t target; 2899 int ret = 0;
2901 2900
2902 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); 2901 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
2903 sb->s_dirt = 0; 2902 sb->s_dirt = 0;
2904 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { 2903 if (wait)
2905 if (wait) 2904 ret = ext4_force_commit(sb);
2906 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); 2905 else
2907 } 2906 jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
2908 return 0; 2907 return ret;
2909} 2908}
2910 2909
2911/* 2910/*
diff --git a/fs/fat/Makefile b/fs/fat/Makefile
index bfb5f06cf2c8..e06190322c1c 100644
--- a/fs/fat/Makefile
+++ b/fs/fat/Makefile
@@ -3,5 +3,9 @@
3# 3#
4 4
5obj-$(CONFIG_FAT_FS) += fat.o 5obj-$(CONFIG_FAT_FS) += fat.o
6obj-$(CONFIG_VFAT_FS) += vfat.o
7obj-$(CONFIG_MSDOS_FS) += msdos.o
6 8
7fat-objs := cache.o dir.o fatent.o file.o inode.o misc.o 9fat-y := cache.o dir.o fatent.o file.o inode.o misc.o
10vfat-y := namei_vfat.o
11msdos-y := namei_msdos.o
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 3222f51c41cf..b42602298087 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -9,8 +9,8 @@
9 */ 9 */
10 10
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/msdos_fs.h>
13#include <linux/buffer_head.h> 12#include <linux/buffer_head.h>
13#include "fat.h"
14 14
15/* this must be > 0. */ 15/* this must be > 0. */
16#define FAT_MAX_CACHE 8 16#define FAT_MAX_CACHE 8
@@ -293,10 +293,12 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
293} 293}
294 294
295int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, 295int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
296 unsigned long *mapped_blocks) 296 unsigned long *mapped_blocks, int create)
297{ 297{
298 struct super_block *sb = inode->i_sb; 298 struct super_block *sb = inode->i_sb;
299 struct msdos_sb_info *sbi = MSDOS_SB(sb); 299 struct msdos_sb_info *sbi = MSDOS_SB(sb);
300 const unsigned long blocksize = sb->s_blocksize;
301 const unsigned char blocksize_bits = sb->s_blocksize_bits;
300 sector_t last_block; 302 sector_t last_block;
301 int cluster, offset; 303 int cluster, offset;
302 304
@@ -309,10 +311,21 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
309 } 311 }
310 return 0; 312 return 0;
311 } 313 }
312 last_block = (MSDOS_I(inode)->mmu_private + (sb->s_blocksize - 1)) 314
313 >> sb->s_blocksize_bits; 315 last_block = (i_size_read(inode) + (blocksize - 1)) >> blocksize_bits;
314 if (sector >= last_block) 316 if (sector >= last_block) {
315 return 0; 317 if (!create)
318 return 0;
319
320 /*
321 * ->mmu_private can access on only allocation path.
322 * (caller must hold ->i_mutex)
323 */
324 last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1))
325 >> blocksize_bits;
326 if (sector >= last_block)
327 return 0;
328 }
316 329
317 cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits); 330 cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits);
318 offset = sector & (sbi->sec_per_clus - 1); 331 offset = sector & (sbi->sec_per_clus - 1);
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index bae1c3292522..67e058357098 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -16,11 +16,11 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/time.h> 18#include <linux/time.h>
19#include <linux/msdos_fs.h>
20#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
21#include <linux/buffer_head.h> 20#include <linux/buffer_head.h>
22#include <linux/compat.h> 21#include <linux/compat.h>
23#include <asm/uaccess.h> 22#include <asm/uaccess.h>
23#include "fat.h"
24 24
25static inline loff_t fat_make_i_pos(struct super_block *sb, 25static inline loff_t fat_make_i_pos(struct super_block *sb,
26 struct buffer_head *bh, 26 struct buffer_head *bh,
@@ -77,7 +77,7 @@ next:
77 77
78 *bh = NULL; 78 *bh = NULL;
79 iblock = *pos >> sb->s_blocksize_bits; 79 iblock = *pos >> sb->s_blocksize_bits;
80 err = fat_bmap(dir, iblock, &phys, &mapped_blocks); 80 err = fat_bmap(dir, iblock, &phys, &mapped_blocks, 0);
81 if (err || !phys) 81 if (err || !phys)
82 return -1; /* beyond EOF or error */ 82 return -1; /* beyond EOF or error */
83 83
@@ -86,7 +86,7 @@ next:
86 *bh = sb_bread(sb, phys); 86 *bh = sb_bread(sb, phys);
87 if (*bh == NULL) { 87 if (*bh == NULL) {
88 printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n", 88 printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n",
89 (unsigned long long)phys); 89 (llu)phys);
90 /* skip this block */ 90 /* skip this block */
91 *pos = (iblock + 1) << sb->s_blocksize_bits; 91 *pos = (iblock + 1) << sb->s_blocksize_bits;
92 goto next; 92 goto next;
@@ -373,9 +373,10 @@ parse_record:
373 if (de->attr == ATTR_EXT) { 373 if (de->attr == ATTR_EXT) {
374 int status = fat_parse_long(inode, &cpos, &bh, &de, 374 int status = fat_parse_long(inode, &cpos, &bh, &de,
375 &unicode, &nr_slots); 375 &unicode, &nr_slots);
376 if (status < 0) 376 if (status < 0) {
377 return status; 377 err = status;
378 else if (status == PARSE_INVALID) 378 goto end_of_dir;
379 } else if (status == PARSE_INVALID)
379 continue; 380 continue;
380 else if (status == PARSE_NOT_LONGNAME) 381 else if (status == PARSE_NOT_LONGNAME)
381 goto parse_record; 382 goto parse_record;
@@ -832,6 +833,7 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd,
832#endif /* CONFIG_COMPAT */ 833#endif /* CONFIG_COMPAT */
833 834
834const struct file_operations fat_dir_operations = { 835const struct file_operations fat_dir_operations = {
836 .llseek = generic_file_llseek,
835 .read = generic_read_dir, 837 .read = generic_read_dir,
836 .readdir = fat_readdir, 838 .readdir = fat_readdir,
837 .ioctl = fat_dir_ioctl, 839 .ioctl = fat_dir_ioctl,
@@ -1089,6 +1091,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
1089 struct msdos_dir_entry *de; 1091 struct msdos_dir_entry *de;
1090 sector_t blknr; 1092 sector_t blknr;
1091 __le16 date, time; 1093 __le16 date, time;
1094 u8 time_cs;
1092 int err, cluster; 1095 int err, cluster;
1093 1096
1094 err = fat_alloc_clusters(dir, &cluster, 1); 1097 err = fat_alloc_clusters(dir, &cluster, 1);
@@ -1102,7 +1105,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
1102 goto error_free; 1105 goto error_free;
1103 } 1106 }
1104 1107
1105 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); 1108 fat_time_unix2fat(sbi, ts, &time, &date, &time_cs);
1106 1109
1107 de = (struct msdos_dir_entry *)bhs[0]->b_data; 1110 de = (struct msdos_dir_entry *)bhs[0]->b_data;
1108 /* filling the new directory slots ("." and ".." entries) */ 1111 /* filling the new directory slots ("." and ".." entries) */
@@ -1112,13 +1115,14 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
1112 de[0].lcase = de[1].lcase = 0; 1115 de[0].lcase = de[1].lcase = 0;
1113 de[0].time = de[1].time = time; 1116 de[0].time = de[1].time = time;
1114 de[0].date = de[1].date = date; 1117 de[0].date = de[1].date = date;
1115 de[0].ctime_cs = de[1].ctime_cs = 0;
1116 if (sbi->options.isvfat) { 1118 if (sbi->options.isvfat) {
1117 /* extra timestamps */ 1119 /* extra timestamps */
1118 de[0].ctime = de[1].ctime = time; 1120 de[0].ctime = de[1].ctime = time;
1121 de[0].ctime_cs = de[1].ctime_cs = time_cs;
1119 de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = date; 1122 de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = date;
1120 } else { 1123 } else {
1121 de[0].ctime = de[1].ctime = 0; 1124 de[0].ctime = de[1].ctime = 0;
1125 de[0].ctime_cs = de[1].ctime_cs = 0;
1122 de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = 0; 1126 de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = 0;
1123 } 1127 }
1124 de[0].start = cpu_to_le16(cluster); 1128 de[0].start = cpu_to_le16(cluster);
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
new file mode 100644
index 000000000000..ea440d65819c
--- /dev/null
+++ b/fs/fat/fat.h
@@ -0,0 +1,329 @@
1#ifndef _FAT_H
2#define _FAT_H
3
4#include <linux/buffer_head.h>
5#include <linux/string.h>
6#include <linux/nls.h>
7#include <linux/fs.h>
8#include <linux/mutex.h>
9#include <linux/msdos_fs.h>
10
11/*
12 * vfat shortname flags
13 */
14#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */
15#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */
16#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */
17#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */
18#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */
19
20struct fat_mount_options {
21 uid_t fs_uid;
22 gid_t fs_gid;
23 unsigned short fs_fmask;
24 unsigned short fs_dmask;
25 unsigned short codepage; /* Codepage for shortname conversions */
26 char *iocharset; /* Charset used for filename input/display */
27 unsigned short shortname; /* flags for shortname display/create rule */
28 unsigned char name_check; /* r = relaxed, n = normal, s = strict */
29 unsigned short allow_utime;/* permission for setting the [am]time */
30 unsigned quiet:1, /* set = fake successful chmods and chowns */
31 showexec:1, /* set = only set x bit for com/exe/bat */
32 sys_immutable:1, /* set = system files are immutable */
33 dotsOK:1, /* set = hidden and system files are named '.filename' */
34 isvfat:1, /* 0=no vfat long filename support, 1=vfat support */
35 utf8:1, /* Use of UTF-8 character set (Default) */
36 unicode_xlate:1, /* create escape sequences for unhandled Unicode */
37 numtail:1, /* Does first alias have a numeric '~1' type tail? */
38 flush:1, /* write things quickly */
39 nocase:1, /* Does this need case conversion? 0=need case conversion*/
40 usefree:1, /* Use free_clusters for FAT32 */
41 tz_utc:1, /* Filesystem timestamps are in UTC */
42 rodir:1; /* allow ATTR_RO for directory */
43};
44
45#define FAT_HASH_BITS 8
46#define FAT_HASH_SIZE (1UL << FAT_HASH_BITS)
47
48/*
49 * MS-DOS file system in-core superblock data
50 */
51struct msdos_sb_info {
52 unsigned short sec_per_clus; /* sectors/cluster */
53 unsigned short cluster_bits; /* log2(cluster_size) */
54 unsigned int cluster_size; /* cluster size */
55 unsigned char fats,fat_bits; /* number of FATs, FAT bits (12 or 16) */
56 unsigned short fat_start;
57 unsigned long fat_length; /* FAT start & length (sec.) */
58 unsigned long dir_start;
59 unsigned short dir_entries; /* root dir start & entries */
60 unsigned long data_start; /* first data sector */
61 unsigned long max_cluster; /* maximum cluster number */
62 unsigned long root_cluster; /* first cluster of the root directory */
63 unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */
64 struct mutex fat_lock;
65 unsigned int prev_free; /* previously allocated cluster number */
66 unsigned int free_clusters; /* -1 if undefined */
67 unsigned int free_clus_valid; /* is free_clusters valid? */
68 struct fat_mount_options options;
69 struct nls_table *nls_disk; /* Codepage used on disk */
70 struct nls_table *nls_io; /* Charset used for input and display */
71 const void *dir_ops; /* Opaque; default directory operations */
72 int dir_per_block; /* dir entries per block */
73 int dir_per_block_bits; /* log2(dir_per_block) */
74
75 int fatent_shift;
76 struct fatent_operations *fatent_ops;
77
78 spinlock_t inode_hash_lock;
79 struct hlist_head inode_hashtable[FAT_HASH_SIZE];
80};
81
82#define FAT_CACHE_VALID 0 /* special case for valid cache */
83
84/*
85 * MS-DOS file system inode data in memory
86 */
87struct msdos_inode_info {
88 spinlock_t cache_lru_lock;
89 struct list_head cache_lru;
90 int nr_caches;
91 /* for avoiding the race between fat_free() and fat_get_cluster() */
92 unsigned int cache_valid_id;
93
94 /* NOTE: mmu_private is 64bits, so must hold ->i_mutex to access */
95 loff_t mmu_private; /* physically allocated size */
96
97 int i_start; /* first cluster or 0 */
98 int i_logstart; /* logical first cluster */
99 int i_attrs; /* unused attribute bits */
100 loff_t i_pos; /* on-disk position of directory entry or 0 */
101 struct hlist_node i_fat_hash; /* hash by i_location */
102 struct inode vfs_inode;
103};
104
105struct fat_slot_info {
106 loff_t i_pos; /* on-disk position of directory entry */
107 loff_t slot_off; /* offset for slot or de start */
108 int nr_slots; /* number of slots + 1(de) in filename */
109 struct msdos_dir_entry *de;
110 struct buffer_head *bh;
111};
112
113static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb)
114{
115 return sb->s_fs_info;
116}
117
118static inline struct msdos_inode_info *MSDOS_I(struct inode *inode)
119{
120 return container_of(inode, struct msdos_inode_info, vfs_inode);
121}
122
123/*
124 * If ->i_mode can't hold S_IWUGO (i.e. ATTR_RO), we use ->i_attrs to
125 * save ATTR_RO instead of ->i_mode.
126 *
127 * If it's directory and !sbi->options.rodir, ATTR_RO isn't read-only
128 * bit, it's just used as flag for app.
129 */
130static inline int fat_mode_can_hold_ro(struct inode *inode)
131{
132 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
133 mode_t mask;
134
135 if (S_ISDIR(inode->i_mode)) {
136 if (!sbi->options.rodir)
137 return 0;
138 mask = ~sbi->options.fs_dmask;
139 } else
140 mask = ~sbi->options.fs_fmask;
141
142 if (!(mask & S_IWUGO))
143 return 0;
144 return 1;
145}
146
147/* Convert attribute bits and a mask to the UNIX mode. */
148static inline mode_t fat_make_mode(struct msdos_sb_info *sbi,
149 u8 attrs, mode_t mode)
150{
151 if (attrs & ATTR_RO && !((attrs & ATTR_DIR) && !sbi->options.rodir))
152 mode &= ~S_IWUGO;
153
154 if (attrs & ATTR_DIR)
155 return (mode & ~sbi->options.fs_dmask) | S_IFDIR;
156 else
157 return (mode & ~sbi->options.fs_fmask) | S_IFREG;
158}
159
160/* Return the FAT attribute byte for this inode */
161static inline u8 fat_make_attrs(struct inode *inode)
162{
163 u8 attrs = MSDOS_I(inode)->i_attrs;
164 if (S_ISDIR(inode->i_mode))
165 attrs |= ATTR_DIR;
166 if (fat_mode_can_hold_ro(inode) && !(inode->i_mode & S_IWUGO))
167 attrs |= ATTR_RO;
168 return attrs;
169}
170
171static inline void fat_save_attrs(struct inode *inode, u8 attrs)
172{
173 if (fat_mode_can_hold_ro(inode))
174 MSDOS_I(inode)->i_attrs = attrs & ATTR_UNUSED;
175 else
176 MSDOS_I(inode)->i_attrs = attrs & (ATTR_UNUSED | ATTR_RO);
177}
178
179static inline unsigned char fat_checksum(const __u8 *name)
180{
181 unsigned char s = name[0];
182 s = (s<<7) + (s>>1) + name[1]; s = (s<<7) + (s>>1) + name[2];
183 s = (s<<7) + (s>>1) + name[3]; s = (s<<7) + (s>>1) + name[4];
184 s = (s<<7) + (s>>1) + name[5]; s = (s<<7) + (s>>1) + name[6];
185 s = (s<<7) + (s>>1) + name[7]; s = (s<<7) + (s>>1) + name[8];
186 s = (s<<7) + (s>>1) + name[9]; s = (s<<7) + (s>>1) + name[10];
187 return s;
188}
189
190static inline sector_t fat_clus_to_blknr(struct msdos_sb_info *sbi, int clus)
191{
192 return ((sector_t)clus - FAT_START_ENT) * sbi->sec_per_clus
193 + sbi->data_start;
194}
195
196static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len)
197{
198#ifdef __BIG_ENDIAN
199 while (len--) {
200 *dst++ = src[0] | (src[1] << 8);
201 src += 2;
202 }
203#else
204 memcpy(dst, src, len * 2);
205#endif
206}
207
208static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len)
209{
210#ifdef __BIG_ENDIAN
211 while (len--) {
212 dst[0] = *src & 0x00FF;
213 dst[1] = (*src & 0xFF00) >> 8;
214 dst += 2;
215 src++;
216 }
217#else
218 memcpy(dst, src, len * 2);
219#endif
220}
221
222/* fat/cache.c */
223extern void fat_cache_inval_inode(struct inode *inode);
224extern int fat_get_cluster(struct inode *inode, int cluster,
225 int *fclus, int *dclus);
226extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
227 unsigned long *mapped_blocks, int create);
228
229/* fat/dir.c */
230extern const struct file_operations fat_dir_operations;
231extern int fat_search_long(struct inode *inode, const unsigned char *name,
232 int name_len, struct fat_slot_info *sinfo);
233extern int fat_dir_empty(struct inode *dir);
234extern int fat_subdirs(struct inode *dir);
235extern int fat_scan(struct inode *dir, const unsigned char *name,
236 struct fat_slot_info *sinfo);
237extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh,
238 struct msdos_dir_entry **de, loff_t *i_pos);
239extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts);
240extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
241 struct fat_slot_info *sinfo);
242extern int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo);
243
244/* fat/fatent.c */
245struct fat_entry {
246 int entry;
247 union {
248 u8 *ent12_p[2];
249 __le16 *ent16_p;
250 __le32 *ent32_p;
251 } u;
252 int nr_bhs;
253 struct buffer_head *bhs[2];
254};
255
256static inline void fatent_init(struct fat_entry *fatent)
257{
258 fatent->nr_bhs = 0;
259 fatent->entry = 0;
260 fatent->u.ent32_p = NULL;
261 fatent->bhs[0] = fatent->bhs[1] = NULL;
262}
263
264static inline void fatent_set_entry(struct fat_entry *fatent, int entry)
265{
266 fatent->entry = entry;
267 fatent->u.ent32_p = NULL;
268}
269
270static inline void fatent_brelse(struct fat_entry *fatent)
271{
272 int i;
273 fatent->u.ent32_p = NULL;
274 for (i = 0; i < fatent->nr_bhs; i++)
275 brelse(fatent->bhs[i]);
276 fatent->nr_bhs = 0;
277 fatent->bhs[0] = fatent->bhs[1] = NULL;
278}
279
280extern void fat_ent_access_init(struct super_block *sb);
281extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent,
282 int entry);
283extern int fat_ent_write(struct inode *inode, struct fat_entry *fatent,
284 int new, int wait);
285extern int fat_alloc_clusters(struct inode *inode, int *cluster,
286 int nr_cluster);
287extern int fat_free_clusters(struct inode *inode, int cluster);
288extern int fat_count_free_clusters(struct super_block *sb);
289
290/* fat/file.c */
291extern int fat_generic_ioctl(struct inode *inode, struct file *filp,
292 unsigned int cmd, unsigned long arg);
293extern const struct file_operations fat_file_operations;
294extern const struct inode_operations fat_file_inode_operations;
295extern int fat_setattr(struct dentry * dentry, struct iattr * attr);
296extern void fat_truncate(struct inode *inode);
297extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
298 struct kstat *stat);
299
300/* fat/inode.c */
301extern void fat_attach(struct inode *inode, loff_t i_pos);
302extern void fat_detach(struct inode *inode);
303extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos);
304extern struct inode *fat_build_inode(struct super_block *sb,
305 struct msdos_dir_entry *de, loff_t i_pos);
306extern int fat_sync_inode(struct inode *inode);
307extern int fat_fill_super(struct super_block *sb, void *data, int silent,
308 const struct inode_operations *fs_dir_inode_ops, int isvfat);
309
310extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
311 struct inode *i2);
312/* fat/misc.c */
313extern void fat_fs_panic(struct super_block *s, const char *fmt, ...)
314 __attribute__ ((format (printf, 2, 3))) __cold;
315extern void fat_clusters_flush(struct super_block *sb);
316extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
317extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
318 __le16 __time, __le16 __date, u8 time_cs);
319extern void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts,
320 __le16 *time, __le16 *date, u8 *time_cs);
321extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
322
323int fat_cache_init(void);
324void fat_cache_destroy(void);
325
326/* helper for printk */
327typedef unsigned long long llu;
328
329#endif /* !_FAT_H */
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index fb98b3d847ed..da6eea47872f 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -7,6 +7,7 @@
7#include <linux/fs.h> 7#include <linux/fs.h>
8#include <linux/msdos_fs.h> 8#include <linux/msdos_fs.h>
9#include <linux/blkdev.h> 9#include <linux/blkdev.h>
10#include "fat.h"
10 11
11struct fatent_operations { 12struct fatent_operations {
12 void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); 13 void (*ent_blocknr)(struct super_block *, int, int *, sector_t *);
@@ -92,8 +93,7 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent,
92err_brelse: 93err_brelse:
93 brelse(bhs[0]); 94 brelse(bhs[0]);
94err: 95err:
95 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", 96 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", (llu)blocknr);
96 (unsigned long long)blocknr);
97 return -EIO; 97 return -EIO;
98} 98}
99 99
@@ -106,7 +106,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent,
106 fatent->bhs[0] = sb_bread(sb, blocknr); 106 fatent->bhs[0] = sb_bread(sb, blocknr);
107 if (!fatent->bhs[0]) { 107 if (!fatent->bhs[0]) {
108 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", 108 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n",
109 (unsigned long long)blocknr); 109 (llu)blocknr);
110 return -EIO; 110 return -EIO;
111 } 111 }
112 fatent->nr_bhs = 1; 112 fatent->nr_bhs = 1;
@@ -316,10 +316,20 @@ static inline int fat_ent_update_ptr(struct super_block *sb,
316 /* Is this fatent's blocks including this entry? */ 316 /* Is this fatent's blocks including this entry? */
317 if (!fatent->nr_bhs || bhs[0]->b_blocknr != blocknr) 317 if (!fatent->nr_bhs || bhs[0]->b_blocknr != blocknr)
318 return 0; 318 return 0;
319 /* Does this entry need the next block? */ 319 if (sbi->fat_bits == 12) {
320 if (sbi->fat_bits == 12 && (offset + 1) >= sb->s_blocksize) { 320 if ((offset + 1) < sb->s_blocksize) {
321 if (fatent->nr_bhs != 2 || bhs[1]->b_blocknr != (blocknr + 1)) 321 /* This entry is on bhs[0]. */
322 return 0; 322 if (fatent->nr_bhs == 2) {
323 brelse(bhs[1]);
324 fatent->nr_bhs = 1;
325 }
326 } else {
327 /* This entry needs the next block. */
328 if (fatent->nr_bhs != 2)
329 return 0;
330 if (bhs[1]->b_blocknr != (blocknr + 1))
331 return 0;
332 }
323 } 333 }
324 ops->ent_set_ptr(fatent, offset); 334 ops->ent_set_ptr(fatent, offset);
325 return 1; 335 return 1;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index ddde37025ca6..f06a4e525ece 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -10,13 +10,13 @@
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/mount.h> 11#include <linux/mount.h>
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/msdos_fs.h>
14#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
15#include <linux/writeback.h> 14#include <linux/writeback.h>
16#include <linux/backing-dev.h> 15#include <linux/backing-dev.h>
17#include <linux/blkdev.h> 16#include <linux/blkdev.h>
18#include <linux/fsnotify.h> 17#include <linux/fsnotify.h>
19#include <linux/security.h> 18#include <linux/security.h>
19#include "fat.h"
20 20
21int fat_generic_ioctl(struct inode *inode, struct file *filp, 21int fat_generic_ioctl(struct inode *inode, struct file *filp,
22 unsigned int cmd, unsigned long arg) 22 unsigned int cmd, unsigned long arg)
@@ -29,10 +29,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
29 { 29 {
30 u32 attr; 30 u32 attr;
31 31
32 if (inode->i_ino == MSDOS_ROOT_INO) 32 mutex_lock(&inode->i_mutex);
33 attr = ATTR_DIR; 33 attr = fat_make_attrs(inode);
34 else 34 mutex_unlock(&inode->i_mutex);
35 attr = fat_attr(inode);
36 35
37 return put_user(attr, user_attr); 36 return put_user(attr, user_attr);
38 } 37 }
@@ -62,20 +61,16 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
62 /* Merge in ATTR_VOLUME and ATTR_DIR */ 61 /* Merge in ATTR_VOLUME and ATTR_DIR */
63 attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) | 62 attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) |
64 (is_dir ? ATTR_DIR : 0); 63 (is_dir ? ATTR_DIR : 0);
65 oldattr = fat_attr(inode); 64 oldattr = fat_make_attrs(inode);
66 65
67 /* Equivalent to a chmod() */ 66 /* Equivalent to a chmod() */
68 ia.ia_valid = ATTR_MODE | ATTR_CTIME; 67 ia.ia_valid = ATTR_MODE | ATTR_CTIME;
69 ia.ia_ctime = current_fs_time(inode->i_sb); 68 ia.ia_ctime = current_fs_time(inode->i_sb);
70 if (is_dir) { 69 if (is_dir)
71 ia.ia_mode = MSDOS_MKMODE(attr, 70 ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO);
72 S_IRWXUGO & ~sbi->options.fs_dmask) 71 else {
73 | S_IFDIR; 72 ia.ia_mode = fat_make_mode(sbi, attr,
74 } else { 73 S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO));
75 ia.ia_mode = MSDOS_MKMODE(attr,
76 (S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO))
77 & ~sbi->options.fs_fmask)
78 | S_IFREG;
79 } 74 }
80 75
81 /* The root directory has no attributes */ 76 /* The root directory has no attributes */
@@ -115,7 +110,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
115 inode->i_flags &= S_IMMUTABLE; 110 inode->i_flags &= S_IMMUTABLE;
116 } 111 }
117 112
118 MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED; 113 fat_save_attrs(inode, attr);
119 mark_inode_dirty(inode); 114 mark_inode_dirty(inode);
120up: 115up:
121 mnt_drop_write(filp->f_path.mnt); 116 mnt_drop_write(filp->f_path.mnt);
@@ -274,7 +269,7 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi,
274 269
275 /* 270 /*
276 * Note, the basic check is already done by a caller of 271 * Note, the basic check is already done by a caller of
277 * (attr->ia_mode & ~MSDOS_VALID_MODE) 272 * (attr->ia_mode & ~FAT_VALID_MODE)
278 */ 273 */
279 274
280 if (S_ISREG(inode->i_mode)) 275 if (S_ISREG(inode->i_mode))
@@ -287,11 +282,18 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi,
287 /* 282 /*
288 * Of the r and x bits, all (subject to umask) must be present. Of the 283 * Of the r and x bits, all (subject to umask) must be present. Of the
289 * w bits, either all (subject to umask) or none must be present. 284 * w bits, either all (subject to umask) or none must be present.
285 *
286 * If fat_mode_can_hold_ro(inode) is false, can't change w bits.
290 */ 287 */
291 if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO))) 288 if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO)))
292 return -EPERM; 289 return -EPERM;
293 if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask))) 290 if (fat_mode_can_hold_ro(inode)) {
294 return -EPERM; 291 if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask)))
292 return -EPERM;
293 } else {
294 if ((perm & S_IWUGO) != (S_IWUGO & ~mask))
295 return -EPERM;
296 }
295 297
296 *mode_ptr &= S_IFMT | perm; 298 *mode_ptr &= S_IFMT | perm;
297 299
@@ -314,13 +316,15 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
314} 316}
315 317
316#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) 318#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
319/* valid file mode bits */
320#define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO)
317 321
318int fat_setattr(struct dentry *dentry, struct iattr *attr) 322int fat_setattr(struct dentry *dentry, struct iattr *attr)
319{ 323{
320 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); 324 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
321 struct inode *inode = dentry->d_inode; 325 struct inode *inode = dentry->d_inode;
322 int error = 0;
323 unsigned int ia_valid; 326 unsigned int ia_valid;
327 int error;
324 328
325 /* 329 /*
326 * Expand the file. Since inode_setattr() updates ->i_size 330 * Expand the file. Since inode_setattr() updates ->i_size
@@ -356,7 +360,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
356 ((attr->ia_valid & ATTR_GID) && 360 ((attr->ia_valid & ATTR_GID) &&
357 (attr->ia_gid != sbi->options.fs_gid)) || 361 (attr->ia_gid != sbi->options.fs_gid)) ||
358 ((attr->ia_valid & ATTR_MODE) && 362 ((attr->ia_valid & ATTR_MODE) &&
359 (attr->ia_mode & ~MSDOS_VALID_MODE))) 363 (attr->ia_mode & ~FAT_VALID_MODE)))
360 error = -EPERM; 364 error = -EPERM;
361 365
362 if (error) { 366 if (error) {
@@ -374,7 +378,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
374 attr->ia_valid &= ~ATTR_MODE; 378 attr->ia_valid &= ~ATTR_MODE;
375 } 379 }
376 380
377 error = inode_setattr(inode, attr); 381 if (attr->ia_valid)
382 error = inode_setattr(inode, attr);
378out: 383out:
379 return error; 384 return error;
380} 385}
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 19eafbe3c379..bdd8fb7be2ca 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -16,7 +16,6 @@
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/smp_lock.h> 17#include <linux/smp_lock.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <linux/msdos_fs.h>
20#include <linux/pagemap.h> 19#include <linux/pagemap.h>
21#include <linux/mpage.h> 20#include <linux/mpage.h>
22#include <linux/buffer_head.h> 21#include <linux/buffer_head.h>
@@ -27,7 +26,9 @@
27#include <linux/uio.h> 26#include <linux/uio.h>
28#include <linux/writeback.h> 27#include <linux/writeback.h>
29#include <linux/log2.h> 28#include <linux/log2.h>
29#include <linux/hash.h>
30#include <asm/unaligned.h> 30#include <asm/unaligned.h>
31#include "fat.h"
31 32
32#ifndef CONFIG_FAT_DEFAULT_IOCHARSET 33#ifndef CONFIG_FAT_DEFAULT_IOCHARSET
33/* if user don't select VFAT, this is undefined. */ 34/* if user don't select VFAT, this is undefined. */
@@ -63,7 +64,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
63 sector_t phys; 64 sector_t phys;
64 int err, offset; 65 int err, offset;
65 66
66 err = fat_bmap(inode, iblock, &phys, &mapped_blocks); 67 err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
67 if (err) 68 if (err)
68 return err; 69 return err;
69 if (phys) { 70 if (phys) {
@@ -93,7 +94,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
93 *max_blocks = min(mapped_blocks, *max_blocks); 94 *max_blocks = min(mapped_blocks, *max_blocks);
94 MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits; 95 MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
95 96
96 err = fat_bmap(inode, iblock, &phys, &mapped_blocks); 97 err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
97 if (err) 98 if (err)
98 return err; 99 return err;
99 100
@@ -175,7 +176,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
175 176
176 if (rw == WRITE) { 177 if (rw == WRITE) {
177 /* 178 /*
178 * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(), 179 * FIXME: blockdev_direct_IO() doesn't use ->write_begin(),
179 * so we need to update the ->mmu_private to block boundary. 180 * so we need to update the ->mmu_private to block boundary.
180 * 181 *
181 * But we must fill the remaining area or hole by nul for 182 * But we must fill the remaining area or hole by nul for
@@ -198,7 +199,14 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
198 199
199static sector_t _fat_bmap(struct address_space *mapping, sector_t block) 200static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
200{ 201{
201 return generic_block_bmap(mapping, block, fat_get_block); 202 sector_t blocknr;
203
204 /* fat_get_cluster() assumes the requested blocknr isn't truncated. */
205 mutex_lock(&mapping->host->i_mutex);
206 blocknr = generic_block_bmap(mapping, block, fat_get_block);
207 mutex_unlock(&mapping->host->i_mutex);
208
209 return blocknr;
202} 210}
203 211
204static const struct address_space_operations fat_aops = { 212static const struct address_space_operations fat_aops = {
@@ -247,25 +255,21 @@ static void fat_hash_init(struct super_block *sb)
247 INIT_HLIST_HEAD(&sbi->inode_hashtable[i]); 255 INIT_HLIST_HEAD(&sbi->inode_hashtable[i]);
248} 256}
249 257
250static inline unsigned long fat_hash(struct super_block *sb, loff_t i_pos) 258static inline unsigned long fat_hash(loff_t i_pos)
251{ 259{
252 unsigned long tmp = (unsigned long)i_pos | (unsigned long) sb; 260 return hash_32(i_pos, FAT_HASH_BITS);
253 tmp = tmp + (tmp >> FAT_HASH_BITS) + (tmp >> FAT_HASH_BITS * 2);
254 return tmp & FAT_HASH_MASK;
255} 261}
256 262
257void fat_attach(struct inode *inode, loff_t i_pos) 263void fat_attach(struct inode *inode, loff_t i_pos)
258{ 264{
259 struct super_block *sb = inode->i_sb; 265 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
260 struct msdos_sb_info *sbi = MSDOS_SB(sb); 266 struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
261 267
262 spin_lock(&sbi->inode_hash_lock); 268 spin_lock(&sbi->inode_hash_lock);
263 MSDOS_I(inode)->i_pos = i_pos; 269 MSDOS_I(inode)->i_pos = i_pos;
264 hlist_add_head(&MSDOS_I(inode)->i_fat_hash, 270 hlist_add_head(&MSDOS_I(inode)->i_fat_hash, head);
265 sbi->inode_hashtable + fat_hash(sb, i_pos));
266 spin_unlock(&sbi->inode_hash_lock); 271 spin_unlock(&sbi->inode_hash_lock);
267} 272}
268
269EXPORT_SYMBOL_GPL(fat_attach); 273EXPORT_SYMBOL_GPL(fat_attach);
270 274
271void fat_detach(struct inode *inode) 275void fat_detach(struct inode *inode)
@@ -276,13 +280,12 @@ void fat_detach(struct inode *inode)
276 hlist_del_init(&MSDOS_I(inode)->i_fat_hash); 280 hlist_del_init(&MSDOS_I(inode)->i_fat_hash);
277 spin_unlock(&sbi->inode_hash_lock); 281 spin_unlock(&sbi->inode_hash_lock);
278} 282}
279
280EXPORT_SYMBOL_GPL(fat_detach); 283EXPORT_SYMBOL_GPL(fat_detach);
281 284
282struct inode *fat_iget(struct super_block *sb, loff_t i_pos) 285struct inode *fat_iget(struct super_block *sb, loff_t i_pos)
283{ 286{
284 struct msdos_sb_info *sbi = MSDOS_SB(sb); 287 struct msdos_sb_info *sbi = MSDOS_SB(sb);
285 struct hlist_head *head = sbi->inode_hashtable + fat_hash(sb, i_pos); 288 struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
286 struct hlist_node *_p; 289 struct hlist_node *_p;
287 struct msdos_inode_info *i; 290 struct msdos_inode_info *i;
288 struct inode *inode = NULL; 291 struct inode *inode = NULL;
@@ -341,8 +344,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
341 344
342 if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) { 345 if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) {
343 inode->i_generation &= ~1; 346 inode->i_generation &= ~1;
344 inode->i_mode = MSDOS_MKMODE(de->attr, 347 inode->i_mode = fat_make_mode(sbi, de->attr, S_IRWXUGO);
345 S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR;
346 inode->i_op = sbi->dir_ops; 348 inode->i_op = sbi->dir_ops;
347 inode->i_fop = &fat_dir_operations; 349 inode->i_fop = &fat_dir_operations;
348 350
@@ -359,10 +361,9 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
359 inode->i_nlink = fat_subdirs(inode); 361 inode->i_nlink = fat_subdirs(inode);
360 } else { /* not a directory */ 362 } else { /* not a directory */
361 inode->i_generation |= 1; 363 inode->i_generation |= 1;
362 inode->i_mode = MSDOS_MKMODE(de->attr, 364 inode->i_mode = fat_make_mode(sbi, de->attr,
363 ((sbi->options.showexec && !is_exec(de->name + 8)) 365 ((sbi->options.showexec && !is_exec(de->name + 8))
364 ? S_IRUGO|S_IWUGO : S_IRWXUGO) 366 ? S_IRUGO|S_IWUGO : S_IRWXUGO));
365 & ~sbi->options.fs_fmask) | S_IFREG;
366 MSDOS_I(inode)->i_start = le16_to_cpu(de->start); 367 MSDOS_I(inode)->i_start = le16_to_cpu(de->start);
367 if (sbi->fat_bits == 32) 368 if (sbi->fat_bits == 32)
368 MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16); 369 MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16);
@@ -378,25 +379,16 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
378 if (sbi->options.sys_immutable) 379 if (sbi->options.sys_immutable)
379 inode->i_flags |= S_IMMUTABLE; 380 inode->i_flags |= S_IMMUTABLE;
380 } 381 }
381 MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED; 382 fat_save_attrs(inode, de->attr);
383
382 inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) 384 inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
383 & ~((loff_t)sbi->cluster_size - 1)) >> 9; 385 & ~((loff_t)sbi->cluster_size - 1)) >> 9;
384 inode->i_mtime.tv_sec = 386
385 date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date), 387 fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0);
386 sbi->options.tz_utc);
387 inode->i_mtime.tv_nsec = 0;
388 if (sbi->options.isvfat) { 388 if (sbi->options.isvfat) {
389 int secs = de->ctime_cs / 100; 389 fat_time_fat2unix(sbi, &inode->i_ctime, de->ctime,
390 int csecs = de->ctime_cs % 100; 390 de->cdate, de->ctime_cs);
391 inode->i_ctime.tv_sec = 391 fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0);
392 date_dos2unix(le16_to_cpu(de->ctime),
393 le16_to_cpu(de->cdate),
394 sbi->options.tz_utc) + secs;
395 inode->i_ctime.tv_nsec = csecs * 10000000;
396 inode->i_atime.tv_sec =
397 date_dos2unix(0, le16_to_cpu(de->adate),
398 sbi->options.tz_utc);
399 inode->i_atime.tv_nsec = 0;
400 } else 392 } else
401 inode->i_ctime = inode->i_atime = inode->i_mtime; 393 inode->i_ctime = inode->i_atime = inode->i_mtime;
402 394
@@ -443,13 +435,8 @@ static void fat_delete_inode(struct inode *inode)
443 435
444static void fat_clear_inode(struct inode *inode) 436static void fat_clear_inode(struct inode *inode)
445{ 437{
446 struct super_block *sb = inode->i_sb;
447 struct msdos_sb_info *sbi = MSDOS_SB(sb);
448
449 spin_lock(&sbi->inode_hash_lock);
450 fat_cache_inval_inode(inode); 438 fat_cache_inval_inode(inode);
451 hlist_del_init(&MSDOS_I(inode)->i_fat_hash); 439 fat_detach(inode);
452 spin_unlock(&sbi->inode_hash_lock);
453} 440}
454 441
455static void fat_write_super(struct super_block *sb) 442static void fat_write_super(struct super_block *sb)
@@ -555,6 +542,20 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
555 return 0; 542 return 0;
556} 543}
557 544
545static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi,
546 struct inode *inode)
547{
548 loff_t i_pos;
549#if BITS_PER_LONG == 32
550 spin_lock(&sbi->inode_hash_lock);
551#endif
552 i_pos = MSDOS_I(inode)->i_pos;
553#if BITS_PER_LONG == 32
554 spin_unlock(&sbi->inode_hash_lock);
555#endif
556 return i_pos;
557}
558
558static int fat_write_inode(struct inode *inode, int wait) 559static int fat_write_inode(struct inode *inode, int wait)
559{ 560{
560 struct super_block *sb = inode->i_sb; 561 struct super_block *sb = inode->i_sb;
@@ -564,9 +565,12 @@ static int fat_write_inode(struct inode *inode, int wait)
564 loff_t i_pos; 565 loff_t i_pos;
565 int err; 566 int err;
566 567
568 if (inode->i_ino == MSDOS_ROOT_INO)
569 return 0;
570
567retry: 571retry:
568 i_pos = MSDOS_I(inode)->i_pos; 572 i_pos = fat_i_pos_read(sbi, inode);
569 if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) 573 if (!i_pos)
570 return 0; 574 return 0;
571 575
572 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); 576 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
@@ -588,19 +592,17 @@ retry:
588 raw_entry->size = 0; 592 raw_entry->size = 0;
589 else 593 else
590 raw_entry->size = cpu_to_le32(inode->i_size); 594 raw_entry->size = cpu_to_le32(inode->i_size);
591 raw_entry->attr = fat_attr(inode); 595 raw_entry->attr = fat_make_attrs(inode);
592 raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); 596 raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
593 raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); 597 raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
594 fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, 598 fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time,
595 &raw_entry->date, sbi->options.tz_utc); 599 &raw_entry->date, NULL);
596 if (sbi->options.isvfat) { 600 if (sbi->options.isvfat) {
597 __le16 atime; 601 __le16 atime;
598 fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime, 602 fat_time_unix2fat(sbi, &inode->i_ctime, &raw_entry->ctime,
599 &raw_entry->cdate, sbi->options.tz_utc); 603 &raw_entry->cdate, &raw_entry->ctime_cs);
600 fat_date_unix2dos(inode->i_atime.tv_sec, &atime, 604 fat_time_unix2fat(sbi, &inode->i_atime, &atime,
601 &raw_entry->adate, sbi->options.tz_utc); 605 &raw_entry->adate, NULL);
602 raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
603 inode->i_ctime.tv_nsec / 10000000;
604 } 606 }
605 spin_unlock(&sbi->inode_hash_lock); 607 spin_unlock(&sbi->inode_hash_lock);
606 mark_buffer_dirty(bh); 608 mark_buffer_dirty(bh);
@@ -819,8 +821,10 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
819 seq_puts(m, ",uni_xlate"); 821 seq_puts(m, ",uni_xlate");
820 if (!opts->numtail) 822 if (!opts->numtail)
821 seq_puts(m, ",nonumtail"); 823 seq_puts(m, ",nonumtail");
824 if (opts->rodir)
825 seq_puts(m, ",rodir");
822 } 826 }
823 if (sbi->options.flush) 827 if (opts->flush)
824 seq_puts(m, ",flush"); 828 seq_puts(m, ",flush");
825 if (opts->tz_utc) 829 if (opts->tz_utc)
826 seq_puts(m, ",tz=UTC"); 830 seq_puts(m, ",tz=UTC");
@@ -836,7 +840,7 @@ enum {
836 Opt_charset, Opt_shortname_lower, Opt_shortname_win95, 840 Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
837 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, 841 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
838 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, 842 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
839 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err, 843 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err,
840}; 844};
841 845
842static const match_table_t fat_tokens = { 846static const match_table_t fat_tokens = {
@@ -908,6 +912,7 @@ static const match_table_t vfat_tokens = {
908 {Opt_nonumtail_yes, "nonumtail=yes"}, 912 {Opt_nonumtail_yes, "nonumtail=yes"},
909 {Opt_nonumtail_yes, "nonumtail=true"}, 913 {Opt_nonumtail_yes, "nonumtail=true"},
910 {Opt_nonumtail_yes, "nonumtail"}, 914 {Opt_nonumtail_yes, "nonumtail"},
915 {Opt_rodir, "rodir"},
911 {Opt_err, NULL} 916 {Opt_err, NULL}
912}; 917};
913 918
@@ -927,10 +932,13 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
927 opts->allow_utime = -1; 932 opts->allow_utime = -1;
928 opts->codepage = fat_default_codepage; 933 opts->codepage = fat_default_codepage;
929 opts->iocharset = fat_default_iocharset; 934 opts->iocharset = fat_default_iocharset;
930 if (is_vfat) 935 if (is_vfat) {
931 opts->shortname = VFAT_SFN_DISPLAY_LOWER|VFAT_SFN_CREATE_WIN95; 936 opts->shortname = VFAT_SFN_DISPLAY_LOWER|VFAT_SFN_CREATE_WIN95;
932 else 937 opts->rodir = 0;
938 } else {
933 opts->shortname = 0; 939 opts->shortname = 0;
940 opts->rodir = 1;
941 }
934 opts->name_check = 'n'; 942 opts->name_check = 'n';
935 opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0; 943 opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0;
936 opts->utf8 = opts->unicode_xlate = 0; 944 opts->utf8 = opts->unicode_xlate = 0;
@@ -1081,6 +1089,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1081 case Opt_nonumtail_yes: /* empty or 1 or yes or true */ 1089 case Opt_nonumtail_yes: /* empty or 1 or yes or true */
1082 opts->numtail = 0; /* negated option */ 1090 opts->numtail = 0; /* negated option */
1083 break; 1091 break;
1092 case Opt_rodir:
1093 opts->rodir = 1;
1094 break;
1084 1095
1085 /* obsolete mount options */ 1096 /* obsolete mount options */
1086 case Opt_obsolate: 1097 case Opt_obsolate:
@@ -1126,7 +1137,7 @@ static int fat_read_root(struct inode *inode)
1126 inode->i_gid = sbi->options.fs_gid; 1137 inode->i_gid = sbi->options.fs_gid;
1127 inode->i_version++; 1138 inode->i_version++;
1128 inode->i_generation = 0; 1139 inode->i_generation = 0;
1129 inode->i_mode = (S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR; 1140 inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO);
1130 inode->i_op = sbi->dir_ops; 1141 inode->i_op = sbi->dir_ops;
1131 inode->i_fop = &fat_dir_operations; 1142 inode->i_fop = &fat_dir_operations;
1132 if (sbi->fat_bits == 32) { 1143 if (sbi->fat_bits == 32) {
@@ -1143,7 +1154,7 @@ static int fat_read_root(struct inode *inode)
1143 MSDOS_I(inode)->i_logstart = 0; 1154 MSDOS_I(inode)->i_logstart = 0;
1144 MSDOS_I(inode)->mmu_private = inode->i_size; 1155 MSDOS_I(inode)->mmu_private = inode->i_size;
1145 1156
1146 MSDOS_I(inode)->i_attrs = ATTR_NONE; 1157 fat_save_attrs(inode, ATTR_DIR);
1147 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0; 1158 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
1148 inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0; 1159 inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0;
1149 inode->i_nlink = fat_subdirs(inode)+2; 1160 inode->i_nlink = fat_subdirs(inode)+2;
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 79fb98ad36d4..ac39ebcc1496 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -8,8 +8,8 @@
8 8
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/msdos_fs.h>
12#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include "fat.h"
13 13
14/* 14/*
15 * fat_fs_panic reports a severe file system problem and sets the file system 15 * fat_fs_panic reports a severe file system problem and sets the file system
@@ -124,8 +124,9 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
124 mark_inode_dirty(inode); 124 mark_inode_dirty(inode);
125 } 125 }
126 if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) { 126 if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) {
127 fat_fs_panic(sb, "clusters badly computed (%d != %lu)", 127 fat_fs_panic(sb, "clusters badly computed (%d != %llu)",
128 new_fclus, inode->i_blocks >> (sbi->cluster_bits - 9)); 128 new_fclus,
129 (llu)(inode->i_blocks >> (sbi->cluster_bits - 9)));
129 fat_cache_inval_inode(inode); 130 fat_cache_inval_inode(inode);
130 } 131 }
131 inode->i_blocks += nr_cluster << (sbi->cluster_bits - 9); 132 inode->i_blocks += nr_cluster << (sbi->cluster_bits - 9);
@@ -135,65 +136,131 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
135 136
136extern struct timezone sys_tz; 137extern struct timezone sys_tz;
137 138
139/*
140 * The epoch of FAT timestamp is 1980.
141 * : bits : value
142 * date: 0 - 4: day (1 - 31)
143 * date: 5 - 8: month (1 - 12)
144 * date: 9 - 15: year (0 - 127) from 1980
145 * time: 0 - 4: sec (0 - 29) 2sec counts
146 * time: 5 - 10: min (0 - 59)
147 * time: 11 - 15: hour (0 - 23)
148 */
149#define SECS_PER_MIN 60
150#define SECS_PER_HOUR (60 * 60)
151#define SECS_PER_DAY (SECS_PER_HOUR * 24)
152#define UNIX_SECS_1980 315532800L
153#if BITS_PER_LONG == 64
154#define UNIX_SECS_2108 4354819200L
155#endif
156/* days between 1.1.70 and 1.1.80 (2 leap days) */
157#define DAYS_DELTA (365 * 10 + 2)
158/* 120 (2100 - 1980) isn't leap year */
159#define YEAR_2100 120
160#define IS_LEAP_YEAR(y) (!((y) & 3) && (y) != YEAR_2100)
161
138/* Linear day numbers of the respective 1sts in non-leap years. */ 162/* Linear day numbers of the respective 1sts in non-leap years. */
139static int day_n[] = { 163static time_t days_in_year[] = {
140 /* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */ 164 /* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */
141 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, 0 165 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0,
142}; 166};
143 167
144/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */ 168/* Convert a FAT time/date pair to a UNIX date (seconds since 1 1 70). */
145int date_dos2unix(unsigned short time, unsigned short date, int tz_utc) 169void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
170 __le16 __time, __le16 __date, u8 time_cs)
146{ 171{
147 int month, year, secs; 172 u16 time = le16_to_cpu(__time), date = le16_to_cpu(__date);
173 time_t second, day, leap_day, month, year;
148 174
149 /* 175 year = date >> 9;
150 * first subtract and mask after that... Otherwise, if 176 month = max(1, (date >> 5) & 0xf);
151 * date == 0, bad things happen 177 day = max(1, date & 0x1f) - 1;
152 */ 178
153 month = ((date >> 5) - 1) & 15; 179 leap_day = (year + 3) / 4;
154 year = date >> 9; 180 if (year > YEAR_2100) /* 2100 isn't leap year */
155 secs = (time & 31)*2+60*((time >> 5) & 63)+(time >> 11)*3600+86400* 181 leap_day--;
156 ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 && 182 if (IS_LEAP_YEAR(year) && month > 2)
157 month < 2 ? 1 : 0)+3653); 183 leap_day++;
158 /* days since 1.1.70 plus 80's leap day */ 184
159 if (!tz_utc) 185 second = (time & 0x1f) << 1;
160 secs += sys_tz.tz_minuteswest*60; 186 second += ((time >> 5) & 0x3f) * SECS_PER_MIN;
161 return secs; 187 second += (time >> 11) * SECS_PER_HOUR;
188 second += (year * 365 + leap_day
189 + days_in_year[month] + day
190 + DAYS_DELTA) * SECS_PER_DAY;
191
192 if (!sbi->options.tz_utc)
193 second += sys_tz.tz_minuteswest * SECS_PER_MIN;
194
195 if (time_cs) {
196 ts->tv_sec = second + (time_cs / 100);
197 ts->tv_nsec = (time_cs % 100) * 10000000;
198 } else {
199 ts->tv_sec = second;
200 ts->tv_nsec = 0;
201 }
162} 202}
163 203
164/* Convert linear UNIX date to a MS-DOS time/date pair. */ 204/* Convert linear UNIX date to a FAT time/date pair. */
165void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc) 205void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts,
206 __le16 *time, __le16 *date, u8 *time_cs)
166{ 207{
167 int day, year, nl_day, month; 208 time_t second = ts->tv_sec;
209 time_t day, leap_day, month, year;
168 210
169 if (!tz_utc) 211 if (!sbi->options.tz_utc)
170 unix_date -= sys_tz.tz_minuteswest*60; 212 second -= sys_tz.tz_minuteswest * SECS_PER_MIN;
171 213
172 /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */ 214 /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
173 if (unix_date < 315532800) 215 if (second < UNIX_SECS_1980) {
174 unix_date = 315532800; 216 *time = 0;
175 217 *date = cpu_to_le16((0 << 9) | (1 << 5) | 1);
176 *time = cpu_to_le16((unix_date % 60)/2+(((unix_date/60) % 60) << 5)+ 218 if (time_cs)
177 (((unix_date/3600) % 24) << 11)); 219 *time_cs = 0;
178 day = unix_date/86400-3652; 220 return;
179 year = day/365; 221 }
180 if ((year+3)/4+365*year > day) 222#if BITS_PER_LONG == 64
223 if (second >= UNIX_SECS_2108) {
224 *time = cpu_to_le16((23 << 11) | (59 << 5) | 29);
225 *date = cpu_to_le16((127 << 9) | (12 << 5) | 31);
226 if (time_cs)
227 *time_cs = 199;
228 return;
229 }
230#endif
231
232 day = second / SECS_PER_DAY - DAYS_DELTA;
233 year = day / 365;
234 leap_day = (year + 3) / 4;
235 if (year > YEAR_2100) /* 2100 isn't leap year */
236 leap_day--;
237 if (year * 365 + leap_day > day)
181 year--; 238 year--;
182 day -= (year+3)/4+365*year; 239 leap_day = (year + 3) / 4;
183 if (day == 59 && !(year & 3)) { 240 if (year > YEAR_2100) /* 2100 isn't leap year */
184 nl_day = day; 241 leap_day--;
242 day -= year * 365 + leap_day;
243
244 if (IS_LEAP_YEAR(year) && day == days_in_year[3]) {
185 month = 2; 245 month = 2;
186 } else { 246 } else {
187 nl_day = (year & 3) || day <= 59 ? day : day-1; 247 if (IS_LEAP_YEAR(year) && day > days_in_year[3])
188 for (month = 0; month < 12; month++) { 248 day--;
189 if (day_n[month] > nl_day) 249 for (month = 1; month < 12; month++) {
250 if (days_in_year[month + 1] > day)
190 break; 251 break;
191 } 252 }
192 } 253 }
193 *date = cpu_to_le16(nl_day-day_n[month-1]+1+(month << 5)+(year << 9)); 254 day -= days_in_year[month];
194}
195 255
196EXPORT_SYMBOL_GPL(fat_date_unix2dos); 256 *time = cpu_to_le16(((second / SECS_PER_HOUR) % 24) << 11
257 | ((second / SECS_PER_MIN) % 60) << 5
258 | (second % SECS_PER_MIN) >> 1);
259 *date = cpu_to_le16((year << 9) | (month << 5) | (day + 1));
260 if (time_cs)
261 *time_cs = (ts->tv_sec & 1) * 100 + ts->tv_nsec / 10000000;
262}
263EXPORT_SYMBOL_GPL(fat_time_unix2fat);
197 264
198int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) 265int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
199{ 266{
diff --git a/fs/msdos/namei.c b/fs/fat/namei_msdos.c
index e844b9809d27..7ba03a4acbe0 100644
--- a/fs/msdos/namei.c
+++ b/fs/fat/namei_msdos.c
@@ -9,8 +9,8 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/time.h> 10#include <linux/time.h>
11#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include <linux/msdos_fs.h>
13#include <linux/smp_lock.h> 12#include <linux/smp_lock.h>
13#include "fat.h"
14 14
15/* Characters that are undesirable in an MS-DOS file name */ 15/* Characters that are undesirable in an MS-DOS file name */
16static unsigned char bad_chars[] = "*?<>|\""; 16static unsigned char bad_chars[] = "*?<>|\"";
@@ -203,33 +203,37 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
203{ 203{
204 struct super_block *sb = dir->i_sb; 204 struct super_block *sb = dir->i_sb;
205 struct fat_slot_info sinfo; 205 struct fat_slot_info sinfo;
206 struct inode *inode = NULL; 206 struct inode *inode;
207 int res; 207 int err;
208
209 dentry->d_op = &msdos_dentry_operations;
210 208
211 lock_super(sb); 209 lock_super(sb);
212 res = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); 210
213 if (res == -ENOENT) 211 err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo);
214 goto add; 212 if (err) {
215 if (res < 0) 213 if (err == -ENOENT) {
216 goto out; 214 inode = NULL;
215 goto out;
216 }
217 goto error;
218 }
219
217 inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); 220 inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
218 brelse(sinfo.bh); 221 brelse(sinfo.bh);
219 if (IS_ERR(inode)) { 222 if (IS_ERR(inode)) {
220 res = PTR_ERR(inode); 223 err = PTR_ERR(inode);
221 goto out; 224 goto error;
222 } 225 }
223add: 226out:
224 res = 0; 227 unlock_super(sb);
228 dentry->d_op = &msdos_dentry_operations;
225 dentry = d_splice_alias(inode, dentry); 229 dentry = d_splice_alias(inode, dentry);
226 if (dentry) 230 if (dentry)
227 dentry->d_op = &msdos_dentry_operations; 231 dentry->d_op = &msdos_dentry_operations;
228out: 232 return dentry;
233
234error:
229 unlock_super(sb); 235 unlock_super(sb);
230 if (!res) 236 return ERR_PTR(err);
231 return dentry;
232 return ERR_PTR(res);
233} 237}
234 238
235/***** Creates a directory entry (name is already formatted). */ 239/***** Creates a directory entry (name is already formatted). */
@@ -247,7 +251,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
247 if (is_hid) 251 if (is_hid)
248 de.attr |= ATTR_HIDDEN; 252 de.attr |= ATTR_HIDDEN;
249 de.lcase = 0; 253 de.lcase = 0;
250 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); 254 fat_time_unix2fat(sbi, ts, &time, &date, NULL);
251 de.cdate = de.adate = 0; 255 de.cdate = de.adate = 0;
252 de.ctime = 0; 256 de.ctime = 0;
253 de.ctime_cs = 0; 257 de.ctime_cs = 0;
diff --git a/fs/vfat/namei.c b/fs/fat/namei_vfat.c
index 155c10b4adbd..bf326d4356a3 100644
--- a/fs/vfat/namei.c
+++ b/fs/fat/namei_vfat.c
@@ -16,36 +16,75 @@
16 */ 16 */
17 17
18#include <linux/module.h> 18#include <linux/module.h>
19
20#include <linux/jiffies.h> 19#include <linux/jiffies.h>
21#include <linux/msdos_fs.h>
22#include <linux/ctype.h> 20#include <linux/ctype.h>
23#include <linux/slab.h> 21#include <linux/slab.h>
24#include <linux/smp_lock.h> 22#include <linux/smp_lock.h>
25#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
26#include <linux/namei.h> 24#include <linux/namei.h>
25#include "fat.h"
27 26
28static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) 27/*
28 * If new entry was created in the parent, it could create the 8.3
29 * alias (the shortname of logname). So, the parent may have the
30 * negative-dentry which matches the created 8.3 alias.
31 *
32 * If it happened, the negative dentry isn't actually negative
33 * anymore. So, drop it.
34 */
35static int vfat_revalidate_shortname(struct dentry *dentry)
29{ 36{
30 int ret = 1; 37 int ret = 1;
31 38 spin_lock(&dentry->d_lock);
32 if (!dentry->d_inode && 39 if (dentry->d_time != dentry->d_parent->d_inode->i_version)
33 nd && !(nd->flags & LOOKUP_CONTINUE) && (nd->flags & LOOKUP_CREATE))
34 /*
35 * negative dentry is dropped, in order to make sure
36 * to use the name which a user desires if this is
37 * create path.
38 */
39 ret = 0; 40 ret = 0;
40 else { 41 spin_unlock(&dentry->d_lock);
41 spin_lock(&dentry->d_lock);
42 if (dentry->d_time != dentry->d_parent->d_inode->i_version)
43 ret = 0;
44 spin_unlock(&dentry->d_lock);
45 }
46 return ret; 42 return ret;
47} 43}
48 44
45static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
46{
47 /* This is not negative dentry. Always valid. */
48 if (dentry->d_inode)
49 return 1;
50 return vfat_revalidate_shortname(dentry);
51}
52
53static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
54{
55 /*
56 * This is not negative dentry. Always valid.
57 *
58 * Note, rename() to existing directory entry will have ->d_inode,
59 * and will use existing name which isn't specified name by user.
60 *
61 * We may be able to drop this positive dentry here. But dropping
62 * positive dentry isn't good idea. So it's unsupported like
63 * rename("filename", "FILENAME") for now.
64 */
65 if (dentry->d_inode)
66 return 1;
67
68 /*
69 * This may be nfsd (or something), anyway, we can't see the
70 * intent of this. So, since this can be for creation, drop it.
71 */
72 if (!nd)
73 return 0;
74
75 /*
76 * Drop the negative dentry, in order to make sure to use the
77 * case sensitive name which is specified by user if this is
78 * for creation.
79 */
80 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
81 if (nd->flags & LOOKUP_CREATE)
82 return 0;
83 }
84
85 return vfat_revalidate_shortname(dentry);
86}
87
49/* returns the length of a struct qstr, ignoring trailing dots */ 88/* returns the length of a struct qstr, ignoring trailing dots */
50static unsigned int vfat_striptail_len(struct qstr *qstr) 89static unsigned int vfat_striptail_len(struct qstr *qstr)
51{ 90{
@@ -127,25 +166,16 @@ static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b)
127 return 1; 166 return 1;
128} 167}
129 168
130static struct dentry_operations vfat_dentry_ops[4] = { 169static struct dentry_operations vfat_ci_dentry_ops = {
131 { 170 .d_revalidate = vfat_revalidate_ci,
132 .d_hash = vfat_hashi, 171 .d_hash = vfat_hashi,
133 .d_compare = vfat_cmpi, 172 .d_compare = vfat_cmpi,
134 }, 173};
135 { 174
136 .d_revalidate = vfat_revalidate, 175static struct dentry_operations vfat_dentry_ops = {
137 .d_hash = vfat_hashi, 176 .d_revalidate = vfat_revalidate,
138 .d_compare = vfat_cmpi, 177 .d_hash = vfat_hash,
139 }, 178 .d_compare = vfat_cmp,
140 {
141 .d_hash = vfat_hash,
142 .d_compare = vfat_cmp,
143 },
144 {
145 .d_revalidate = vfat_revalidate,
146 .d_hash = vfat_hash,
147 .d_compare = vfat_cmp,
148 }
149}; 179};
150 180
151/* Characters that are undesirable in an MS-DOS file name */ 181/* Characters that are undesirable in an MS-DOS file name */
@@ -569,6 +599,7 @@ static int vfat_build_slots(struct inode *dir, const unsigned char *name,
569 unsigned char msdos_name[MSDOS_NAME]; 599 unsigned char msdos_name[MSDOS_NAME];
570 wchar_t *uname; 600 wchar_t *uname;
571 __le16 time, date; 601 __le16 time, date;
602 u8 time_cs;
572 int err, ulen, usize, i; 603 int err, ulen, usize, i;
573 loff_t offset; 604 loff_t offset;
574 605
@@ -621,10 +652,10 @@ shortname:
621 memcpy(de->name, msdos_name, MSDOS_NAME); 652 memcpy(de->name, msdos_name, MSDOS_NAME);
622 de->attr = is_dir ? ATTR_DIR : ATTR_ARCH; 653 de->attr = is_dir ? ATTR_DIR : ATTR_ARCH;
623 de->lcase = lcase; 654 de->lcase = lcase;
624 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); 655 fat_time_unix2fat(sbi, ts, &time, &date, &time_cs);
625 de->time = de->ctime = time; 656 de->time = de->ctime = time;
626 de->date = de->cdate = de->adate = date; 657 de->date = de->cdate = de->adate = date;
627 de->ctime_cs = 0; 658 de->ctime_cs = time_cs;
628 de->start = cpu_to_le16(cluster); 659 de->start = cpu_to_le16(cluster);
629 de->starthi = cpu_to_le16(cluster >> 16); 660 de->starthi = cpu_to_le16(cluster >> 16);
630 de->size = 0; 661 de->size = 0;
@@ -683,46 +714,58 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
683{ 714{
684 struct super_block *sb = dir->i_sb; 715 struct super_block *sb = dir->i_sb;
685 struct fat_slot_info sinfo; 716 struct fat_slot_info sinfo;
686 struct inode *inode = NULL; 717 struct inode *inode;
687 struct dentry *alias; 718 struct dentry *alias;
688 int err, table; 719 int err;
689 720
690 lock_super(sb); 721 lock_super(sb);
691 table = (MSDOS_SB(sb)->options.name_check == 's') ? 2 : 0;
692 dentry->d_op = &vfat_dentry_ops[table];
693 722
694 err = vfat_find(dir, &dentry->d_name, &sinfo); 723 err = vfat_find(dir, &dentry->d_name, &sinfo);
695 if (err) { 724 if (err) {
696 table++; 725 if (err == -ENOENT) {
726 inode = NULL;
727 goto out;
728 }
697 goto error; 729 goto error;
698 } 730 }
731
699 inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); 732 inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
700 brelse(sinfo.bh); 733 brelse(sinfo.bh);
701 if (IS_ERR(inode)) { 734 if (IS_ERR(inode)) {
702 unlock_super(sb); 735 err = PTR_ERR(inode);
703 return ERR_CAST(inode); 736 goto error;
704 } 737 }
705 alias = d_find_alias(inode);
706 if (alias) {
707 if (d_invalidate(alias) == 0)
708 dput(alias);
709 else {
710 iput(inode);
711 unlock_super(sb);
712 return alias;
713 }
714 738
739 alias = d_find_alias(inode);
740 if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
741 /*
742 * This inode has non DCACHE_DISCONNECTED dentry. This
743 * means, the user did ->lookup() by an another name
744 * (longname vs 8.3 alias of it) in past.
745 *
746 * Switch to new one for reason of locality if possible.
747 */
748 BUG_ON(d_unhashed(alias));
749 if (!S_ISDIR(inode->i_mode))
750 d_move(alias, dentry);
751 iput(inode);
752 unlock_super(sb);
753 return alias;
715 } 754 }
716error: 755out:
717 unlock_super(sb); 756 unlock_super(sb);
718 dentry->d_op = &vfat_dentry_ops[table]; 757 dentry->d_op = sb->s_root->d_op;
719 dentry->d_time = dentry->d_parent->d_inode->i_version; 758 dentry->d_time = dentry->d_parent->d_inode->i_version;
720 dentry = d_splice_alias(inode, dentry); 759 dentry = d_splice_alias(inode, dentry);
721 if (dentry) { 760 if (dentry) {
722 dentry->d_op = &vfat_dentry_ops[table]; 761 dentry->d_op = sb->s_root->d_op;
723 dentry->d_time = dentry->d_parent->d_inode->i_version; 762 dentry->d_time = dentry->d_parent->d_inode->i_version;
724 } 763 }
725 return dentry; 764 return dentry;
765
766error:
767 unlock_super(sb);
768 return ERR_PTR(err);
726} 769}
727 770
728static int vfat_create(struct inode *dir, struct dentry *dentry, int mode, 771static int vfat_create(struct inode *dir, struct dentry *dentry, int mode,
@@ -1014,9 +1057,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
1014 return res; 1057 return res;
1015 1058
1016 if (MSDOS_SB(sb)->options.name_check != 's') 1059 if (MSDOS_SB(sb)->options.name_check != 's')
1017 sb->s_root->d_op = &vfat_dentry_ops[0]; 1060 sb->s_root->d_op = &vfat_ci_dentry_ops;
1018 else 1061 else
1019 sb->s_root->d_op = &vfat_dentry_ops[2]; 1062 sb->s_root->d_op = &vfat_dentry_ops;
1020 1063
1021 return 0; 1064 return 0;
1022} 1065}
diff --git a/fs/file_table.c b/fs/file_table.c
index efc06faede6c..5ad0eca6eea2 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -269,6 +269,10 @@ void __fput(struct file *file)
269 eventpoll_release(file); 269 eventpoll_release(file);
270 locks_remove_flock(file); 270 locks_remove_flock(file);
271 271
272 if (unlikely(file->f_flags & FASYNC)) {
273 if (file->f_op && file->f_op->fasync)
274 file->f_op->fasync(-1, file, 0);
275 }
272 if (file->f_op && file->f_op->release) 276 if (file->f_op && file->f_op->release)
273 file->f_op->release(inode, file); 277 file->f_op->release(inode, file);
274 security_file_free(file); 278 security_file_free(file);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 87250b6a8682..b72361479be2 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1056,7 +1056,6 @@ static int fuse_dev_release(struct inode *inode, struct file *file)
1056 end_requests(fc, &fc->pending); 1056 end_requests(fc, &fc->pending);
1057 end_requests(fc, &fc->processing); 1057 end_requests(fc, &fc->processing);
1058 spin_unlock(&fc->lock); 1058 spin_unlock(&fc->lock);
1059 fasync_helper(-1, file, 0, &fc->fasync);
1060 fuse_conn_put(fc); 1059 fuse_conn_put(fc);
1061 } 1060 }
1062 1061
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index d85c7d931cdf..d367e9b92862 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -537,9 +537,6 @@ static int inotify_release(struct inode *ignored, struct file *file)
537 inotify_dev_event_dequeue(dev); 537 inotify_dev_event_dequeue(dev);
538 mutex_unlock(&dev->ev_mutex); 538 mutex_unlock(&dev->ev_mutex);
539 539
540 if (file->f_flags & FASYNC)
541 inotify_fasync(-1, file, 0);
542
543 /* free this device: the put matching the get in inotify_init() */ 540 /* free this device: the put matching the get in inotify_init() */
544 put_inotify_dev(dev); 541 put_inotify_dev(dev);
545 542
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 1bd8d4acc6f2..61f32f3868cd 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -115,7 +115,7 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
115 */ 115 */
116void __log_wait_for_space(journal_t *journal) 116void __log_wait_for_space(journal_t *journal)
117{ 117{
118 int nblocks; 118 int nblocks, space_left;
119 assert_spin_locked(&journal->j_state_lock); 119 assert_spin_locked(&journal->j_state_lock);
120 120
121 nblocks = jbd_space_needed(journal); 121 nblocks = jbd_space_needed(journal);
@@ -128,25 +128,42 @@ void __log_wait_for_space(journal_t *journal)
128 /* 128 /*
129 * Test again, another process may have checkpointed while we 129 * Test again, another process may have checkpointed while we
130 * were waiting for the checkpoint lock. If there are no 130 * were waiting for the checkpoint lock. If there are no
131 * outstanding transactions there is nothing to checkpoint and 131 * transactions ready to be checkpointed, try to recover
132 * we can't make progress. Abort the journal in this case. 132 * journal space by calling cleanup_journal_tail(), and if
133 * that doesn't work, by waiting for the currently committing
134 * transaction to complete. If there is absolutely no way
135 * to make progress, this is either a BUG or corrupted
136 * filesystem, so abort the journal and leave a stack
137 * trace for forensic evidence.
133 */ 138 */
134 spin_lock(&journal->j_state_lock); 139 spin_lock(&journal->j_state_lock);
135 spin_lock(&journal->j_list_lock); 140 spin_lock(&journal->j_list_lock);
136 nblocks = jbd_space_needed(journal); 141 nblocks = jbd_space_needed(journal);
137 if (__log_space_left(journal) < nblocks) { 142 space_left = __log_space_left(journal);
143 if (space_left < nblocks) {
138 int chkpt = journal->j_checkpoint_transactions != NULL; 144 int chkpt = journal->j_checkpoint_transactions != NULL;
145 tid_t tid = 0;
139 146
147 if (journal->j_committing_transaction)
148 tid = journal->j_committing_transaction->t_tid;
140 spin_unlock(&journal->j_list_lock); 149 spin_unlock(&journal->j_list_lock);
141 spin_unlock(&journal->j_state_lock); 150 spin_unlock(&journal->j_state_lock);
142 if (chkpt) { 151 if (chkpt) {
143 log_do_checkpoint(journal); 152 log_do_checkpoint(journal);
153 } else if (cleanup_journal_tail(journal) == 0) {
154 /* We were able to recover space; yay! */
155 ;
156 } else if (tid) {
157 log_wait_commit(journal, tid);
144 } else { 158 } else {
145 printk(KERN_ERR "%s: no transactions\n", 159 printk(KERN_ERR "%s: needed %d blocks and "
146 __func__); 160 "only had %d space available\n",
161 __func__, nblocks, space_left);
162 printk(KERN_ERR "%s: no way to get more "
163 "journal space\n", __func__);
164 WARN_ON(1);
147 journal_abort(journal, 0); 165 journal_abort(journal, 0);
148 } 166 }
149
150 spin_lock(&journal->j_state_lock); 167 spin_lock(&journal->j_state_lock);
151 } else { 168 } else {
152 spin_unlock(&journal->j_list_lock); 169 spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index d15cd6e7251e..60d4c32c8808 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -860,7 +860,6 @@ out:
860 * int journal_get_undo_access() - Notify intent to modify metadata with non-rewindable consequences 860 * int journal_get_undo_access() - Notify intent to modify metadata with non-rewindable consequences
861 * @handle: transaction 861 * @handle: transaction
862 * @bh: buffer to undo 862 * @bh: buffer to undo
863 * @credits: store the number of taken credits here (if not NULL)
864 * 863 *
865 * Sometimes there is a need to distinguish between metadata which has 864 * Sometimes there is a need to distinguish between metadata which has
866 * been committed to disk and that which has not. The ext3fs code uses 865 * been committed to disk and that which has not. The ext3fs code uses
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 9203c3332f17..9497718fe920 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -116,7 +116,7 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
116 */ 116 */
117void __jbd2_log_wait_for_space(journal_t *journal) 117void __jbd2_log_wait_for_space(journal_t *journal)
118{ 118{
119 int nblocks; 119 int nblocks, space_left;
120 assert_spin_locked(&journal->j_state_lock); 120 assert_spin_locked(&journal->j_state_lock);
121 121
122 nblocks = jbd_space_needed(journal); 122 nblocks = jbd_space_needed(journal);
@@ -129,25 +129,43 @@ void __jbd2_log_wait_for_space(journal_t *journal)
129 /* 129 /*
130 * Test again, another process may have checkpointed while we 130 * Test again, another process may have checkpointed while we
131 * were waiting for the checkpoint lock. If there are no 131 * were waiting for the checkpoint lock. If there are no
132 * outstanding transactions there is nothing to checkpoint and 132 * transactions ready to be checkpointed, try to recover
133 * we can't make progress. Abort the journal in this case. 133 * journal space by calling cleanup_journal_tail(), and if
134 * that doesn't work, by waiting for the currently committing
135 * transaction to complete. If there is absolutely no way
136 * to make progress, this is either a BUG or corrupted
137 * filesystem, so abort the journal and leave a stack
138 * trace for forensic evidence.
134 */ 139 */
135 spin_lock(&journal->j_state_lock); 140 spin_lock(&journal->j_state_lock);
136 spin_lock(&journal->j_list_lock); 141 spin_lock(&journal->j_list_lock);
137 nblocks = jbd_space_needed(journal); 142 nblocks = jbd_space_needed(journal);
138 if (__jbd2_log_space_left(journal) < nblocks) { 143 space_left = __jbd2_log_space_left(journal);
144 if (space_left < nblocks) {
139 int chkpt = journal->j_checkpoint_transactions != NULL; 145 int chkpt = journal->j_checkpoint_transactions != NULL;
146 tid_t tid = 0;
140 147
148 if (journal->j_committing_transaction)
149 tid = journal->j_committing_transaction->t_tid;
141 spin_unlock(&journal->j_list_lock); 150 spin_unlock(&journal->j_list_lock);
142 spin_unlock(&journal->j_state_lock); 151 spin_unlock(&journal->j_state_lock);
143 if (chkpt) { 152 if (chkpt) {
144 jbd2_log_do_checkpoint(journal); 153 jbd2_log_do_checkpoint(journal);
154 } else if (jbd2_cleanup_journal_tail(journal) == 0) {
155 /* We were able to recover space; yay! */
156 ;
157 } else if (tid) {
158 jbd2_log_wait_commit(journal, tid);
145 } else { 159 } else {
146 printk(KERN_ERR "%s: no transactions\n", 160 printk(KERN_ERR "%s: needed %d blocks and "
147 __func__); 161 "only had %d space available\n",
162 __func__, nblocks, space_left);
163 printk(KERN_ERR "%s: no way to get more "
164 "journal space in %s\n", __func__,
165 journal->j_devname);
166 WARN_ON(1);
148 jbd2_journal_abort(journal, 0); 167 jbd2_journal_abort(journal, 0);
149 } 168 }
150
151 spin_lock(&journal->j_state_lock); 169 spin_lock(&journal->j_state_lock);
152 } else { 170 } else {
153 spin_unlock(&journal->j_list_lock); 171 spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 8b119e16aa36..ebc667bc54a8 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -974,6 +974,9 @@ restart_loop:
974 journal->j_committing_transaction = NULL; 974 journal->j_committing_transaction = NULL;
975 spin_unlock(&journal->j_state_lock); 975 spin_unlock(&journal->j_state_lock);
976 976
977 if (journal->j_commit_callback)
978 journal->j_commit_callback(journal, commit_transaction);
979
977 if (commit_transaction->t_checkpoint_list == NULL && 980 if (commit_transaction->t_checkpoint_list == NULL &&
978 commit_transaction->t_checkpoint_io_list == NULL) { 981 commit_transaction->t_checkpoint_io_list == NULL) {
979 __jbd2_journal_drop_transaction(journal, commit_transaction); 982 __jbd2_journal_drop_transaction(journal, commit_transaction);
@@ -995,11 +998,8 @@ restart_loop:
995 } 998 }
996 spin_unlock(&journal->j_list_lock); 999 spin_unlock(&journal->j_list_lock);
997 1000
998 if (journal->j_commit_callback)
999 journal->j_commit_callback(journal, commit_transaction);
1000
1001 trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", 1001 trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
1002 journal->j_devname, commit_transaction->t_tid, 1002 journal->j_devname, journal->j_commit_sequence,
1003 journal->j_tail_sequence); 1003 journal->j_tail_sequence);
1004 jbd_debug(1, "JBD: commit %d complete, head %d\n", 1004 jbd_debug(1, "JBD: commit %d complete, head %d\n",
1005 journal->j_commit_sequence, journal->j_tail_sequence); 1005 journal->j_commit_sequence, journal->j_tail_sequence);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 783de118de92..e70d657a19f8 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1089,6 +1089,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1089 if (!journal->j_wbuf) { 1089 if (!journal->j_wbuf) {
1090 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 1090 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
1091 __func__); 1091 __func__);
1092 jbd2_stats_proc_exit(journal);
1092 kfree(journal); 1093 kfree(journal);
1093 return NULL; 1094 return NULL;
1094 } 1095 }
@@ -1098,6 +1099,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1098 if (err) { 1099 if (err) {
1099 printk(KERN_ERR "%s: Cannnot locate journal superblock\n", 1100 printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
1100 __func__); 1101 __func__);
1102 jbd2_stats_proc_exit(journal);
1101 kfree(journal); 1103 kfree(journal);
1102 return NULL; 1104 return NULL;
1103 } 1105 }
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 8adebd3e43c6..3cceef4ad2b7 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -85,15 +85,15 @@ static int jffs2_garbage_collect_thread(void *_c)
85 for (;;) { 85 for (;;) {
86 allow_signal(SIGHUP); 86 allow_signal(SIGHUP);
87 again: 87 again:
88 spin_lock(&c->erase_completion_lock);
88 if (!jffs2_thread_should_wake(c)) { 89 if (!jffs2_thread_should_wake(c)) {
89 set_current_state (TASK_INTERRUPTIBLE); 90 set_current_state (TASK_INTERRUPTIBLE);
91 spin_unlock(&c->erase_completion_lock);
90 D1(printk(KERN_DEBUG "jffs2_garbage_collect_thread sleeping...\n")); 92 D1(printk(KERN_DEBUG "jffs2_garbage_collect_thread sleeping...\n"));
91 /* Yes, there's a race here; we checked jffs2_thread_should_wake()
92 before setting current->state to TASK_INTERRUPTIBLE. But it doesn't
93 matter - We don't care if we miss a wakeup, because the GC thread
94 is only an optimisation anyway. */
95 schedule(); 93 schedule();
96 } 94 } else
95 spin_unlock(&c->erase_completion_lock);
96
97 97
98 /* This thread is purely an optimisation. But if it runs when 98 /* This thread is purely an optimisation. But if it runs when
99 other things could be running, it actually makes things a 99 other things could be running, it actually makes things a
diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c
index 47b045797e42..90cb60d09787 100644
--- a/fs/jffs2/compr_lzo.c
+++ b/fs/jffs2/compr_lzo.c
@@ -19,7 +19,7 @@
19 19
20static void *lzo_mem; 20static void *lzo_mem;
21static void *lzo_compress_buf; 21static void *lzo_compress_buf;
22static DEFINE_MUTEX(deflate_mutex); 22static DEFINE_MUTEX(deflate_mutex); /* for lzo_mem and lzo_compress_buf */
23 23
24static void free_workspace(void) 24static void free_workspace(void)
25{ 25{
@@ -49,18 +49,21 @@ static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out,
49 49
50 mutex_lock(&deflate_mutex); 50 mutex_lock(&deflate_mutex);
51 ret = lzo1x_1_compress(data_in, *sourcelen, lzo_compress_buf, &compress_size, lzo_mem); 51 ret = lzo1x_1_compress(data_in, *sourcelen, lzo_compress_buf, &compress_size, lzo_mem);
52 mutex_unlock(&deflate_mutex);
53
54 if (ret != LZO_E_OK) 52 if (ret != LZO_E_OK)
55 return -1; 53 goto fail;
56 54
57 if (compress_size > *dstlen) 55 if (compress_size > *dstlen)
58 return -1; 56 goto fail;
59 57
60 memcpy(cpage_out, lzo_compress_buf, compress_size); 58 memcpy(cpage_out, lzo_compress_buf, compress_size);
61 *dstlen = compress_size; 59 mutex_unlock(&deflate_mutex);
62 60
61 *dstlen = compress_size;
63 return 0; 62 return 0;
63
64 fail:
65 mutex_unlock(&deflate_mutex);
66 return -1;
64} 67}
65 68
66static int jffs2_lzo_decompress(unsigned char *data_in, unsigned char *cpage_out, 69static int jffs2_lzo_decompress(unsigned char *data_in, unsigned char *cpage_out,
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 0875b60b4bf7..21a052915aa9 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -261,9 +261,11 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
261 261
262 jffs2_sum_reset_collected(c->summary); /* reset collected summary */ 262 jffs2_sum_reset_collected(c->summary); /* reset collected summary */
263 263
264#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
264 /* adjust write buffer offset, else we get a non contiguous write bug */ 265 /* adjust write buffer offset, else we get a non contiguous write bug */
265 if (!(c->wbuf_ofs % c->sector_size) && !c->wbuf_len) 266 if (!(c->wbuf_ofs % c->sector_size) && !c->wbuf_len)
266 c->wbuf_ofs = 0xffffffff; 267 c->wbuf_ofs = 0xffffffff;
268#endif
267 269
268 D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset)); 270 D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset));
269 271
diff --git a/fs/libfs.c b/fs/libfs.c
index 74688598bcf7..e960a8321902 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -814,7 +814,7 @@ EXPORT_SYMBOL(simple_getattr);
814EXPORT_SYMBOL(simple_link); 814EXPORT_SYMBOL(simple_link);
815EXPORT_SYMBOL(simple_lookup); 815EXPORT_SYMBOL(simple_lookup);
816EXPORT_SYMBOL(simple_pin_fs); 816EXPORT_SYMBOL(simple_pin_fs);
817EXPORT_SYMBOL(simple_prepare_write); 817EXPORT_UNUSED_SYMBOL(simple_prepare_write);
818EXPORT_SYMBOL(simple_readpage); 818EXPORT_SYMBOL(simple_readpage);
819EXPORT_SYMBOL(simple_release_fs); 819EXPORT_SYMBOL(simple_release_fs);
820EXPORT_SYMBOL(simple_rename); 820EXPORT_SYMBOL(simple_rename);
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 014f6ce48172..4dfdcbc6bf68 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -434,6 +434,7 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
434 * reclaim all locks we hold on this server. 434 * reclaim all locks we hold on this server.
435 */ 435 */
436 memset(&saddr, 0, sizeof(saddr)); 436 memset(&saddr, 0, sizeof(saddr));
437 saddr.sin_family = AF_INET;
437 saddr.sin_addr.s_addr = argp->addr; 438 saddr.sin_addr.s_addr = argp->addr;
438 nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state); 439 nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state);
439 440
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 548b0bb2b84d..3ca89e2a9381 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -466,6 +466,7 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
466 * reclaim all locks we hold on this server. 466 * reclaim all locks we hold on this server.
467 */ 467 */
468 memset(&saddr, 0, sizeof(saddr)); 468 memset(&saddr, 0, sizeof(saddr));
469 saddr.sin_family = AF_INET;
469 saddr.sin_addr.s_addr = argp->addr; 470 saddr.sin_addr.s_addr = argp->addr;
470 nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state); 471 nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state);
471 472
diff --git a/fs/msdos/Makefile b/fs/msdos/Makefile
deleted file mode 100644
index ea67646fcb95..000000000000
--- a/fs/msdos/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
1#
2# Makefile for the Linux msdos filesystem routines.
3#
4
5obj-$(CONFIG_MSDOS_FS) += msdos.o
6
7msdos-y := namei.o
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b9195c02a863..d22eb383e1cf 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * nfs inode and superblock handling functions 6 * nfs inode and superblock handling functions
7 * 7 *
8 * Modularised by Alan Cox <Alan.Cox@linux.org>, while hacking some 8 * Modularised by Alan Cox <alan@lxorguk.ukuu.org.uk>, while hacking some
9 * experimental NFS changes. Modularisation taken straight from SYS5 fs. 9 * experimental NFS changes. Modularisation taken straight from SYS5 fs.
10 * 10 *
11 * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts. 11 * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts.
@@ -908,21 +908,16 @@ static int nfs_size_need_update(const struct inode *inode, const struct nfs_fatt
908 return nfs_size_to_loff_t(fattr->size) > i_size_read(inode); 908 return nfs_size_to_loff_t(fattr->size) > i_size_read(inode);
909} 909}
910 910
911static unsigned long nfs_attr_generation_counter; 911static atomic_long_t nfs_attr_generation_counter;
912 912
913static unsigned long nfs_read_attr_generation_counter(void) 913static unsigned long nfs_read_attr_generation_counter(void)
914{ 914{
915 smp_rmb(); 915 return atomic_long_read(&nfs_attr_generation_counter);
916 return nfs_attr_generation_counter;
917} 916}
918 917
919unsigned long nfs_inc_attr_generation_counter(void) 918unsigned long nfs_inc_attr_generation_counter(void)
920{ 919{
921 unsigned long ret; 920 return atomic_long_inc_return(&nfs_attr_generation_counter);
922 smp_rmb();
923 ret = ++nfs_attr_generation_counter;
924 smp_wmb();
925 return ret;
926} 921}
927 922
928void nfs_fattr_init(struct nfs_fattr *fattr) 923void nfs_fattr_init(struct nfs_fattr *fattr)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index a3b0061dfd45..f48db679a1c6 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * nfs superblock handling functions 6 * nfs superblock handling functions
7 * 7 *
8 * Modularised by Alan Cox <Alan.Cox@linux.org>, while hacking some 8 * Modularised by Alan Cox <alan@lxorguk.ukuu.org.uk>, while hacking some
9 * experimental NFS changes. Modularisation taken straight from SYS5 fs. 9 * experimental NFS changes. Modularisation taken straight from SYS5 fs.
10 * 10 *
11 * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts. 11 * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts.
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 0bc56f6d9276..4433c8f00163 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1875,11 +1875,11 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func,
1875 return -ENOMEM; 1875 return -ENOMEM;
1876 1876
1877 offset = *offsetp; 1877 offset = *offsetp;
1878 cdp->err = nfserr_eof; /* will be cleared on successful read */
1879 1878
1880 while (1) { 1879 while (1) {
1881 unsigned int reclen; 1880 unsigned int reclen;
1882 1881
1882 cdp->err = nfserr_eof; /* will be cleared on successful read */
1883 buf.used = 0; 1883 buf.used = 0;
1884 buf.full = 0; 1884 buf.full = 0;
1885 1885
@@ -1912,8 +1912,6 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func,
1912 de = (struct buffered_dirent *)((char *)de + reclen); 1912 de = (struct buffered_dirent *)((char *)de + reclen);
1913 } 1913 }
1914 offset = vfs_llseek(file, 0, SEEK_CUR); 1914 offset = vfs_llseek(file, 0, SEEK_CUR);
1915 if (!buf.full)
1916 break;
1917 } 1915 }
1918 1916
1919 done: 1917 done:
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8d3225a78073..e2570a3bc2b2 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -247,8 +247,8 @@ int ocfs2_update_inode_atime(struct inode *inode,
247 mlog_entry_void(); 247 mlog_entry_void();
248 248
249 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 249 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
250 if (handle == NULL) { 250 if (IS_ERR(handle)) {
251 ret = -ENOMEM; 251 ret = PTR_ERR(handle);
252 mlog_errno(ret); 252 mlog_errno(ret);
253 goto out; 253 goto out;
254 } 254 }
@@ -312,8 +312,8 @@ static int ocfs2_simple_size_update(struct inode *inode,
312 handle_t *handle = NULL; 312 handle_t *handle = NULL;
313 313
314 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 314 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
315 if (handle == NULL) { 315 if (IS_ERR(handle)) {
316 ret = -ENOMEM; 316 ret = PTR_ERR(handle);
317 mlog_errno(ret); 317 mlog_errno(ret);
318 goto out; 318 goto out;
319 } 319 }
@@ -679,8 +679,7 @@ leave:
679 679
680/* Some parts of this taken from generic_cont_expand, which turned out 680/* Some parts of this taken from generic_cont_expand, which turned out
681 * to be too fragile to do exactly what we need without us having to 681 * to be too fragile to do exactly what we need without us having to
682 * worry about recursive locking in ->prepare_write() and 682 * worry about recursive locking in ->write_begin() and ->write_end(). */
683 * ->commit_write(). */
684static int ocfs2_write_zero_page(struct inode *inode, 683static int ocfs2_write_zero_page(struct inode *inode,
685 u64 size) 684 u64 size)
686{ 685{
@@ -1056,8 +1055,8 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
1056 (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode); 1055 (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode);
1057 1056
1058 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 1057 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
1059 if (handle == NULL) { 1058 if (IS_ERR(handle)) {
1060 ret = -ENOMEM; 1059 ret = PTR_ERR(handle);
1061 mlog_errno(ret); 1060 mlog_errno(ret);
1062 goto out; 1061 goto out;
1063 } 1062 }
@@ -1260,8 +1259,8 @@ static int __ocfs2_remove_inode_range(struct inode *inode,
1260 } 1259 }
1261 1260
1262 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); 1261 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
1263 if (handle == NULL) { 1262 if (IS_ERR(handle)) {
1264 ret = -ENOMEM; 1263 ret = PTR_ERR(handle);
1265 mlog_errno(ret); 1264 mlog_errno(ret);
1266 goto out; 1265 goto out;
1267 } 1266 }
@@ -1353,8 +1352,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
1353 goto out; 1352 goto out;
1354 1353
1355 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 1354 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
1356 if (handle == NULL) { 1355 if (IS_ERR(handle)) {
1357 ret = -ENOMEM; 1356 ret = PTR_ERR(handle);
1358 mlog_errno(ret); 1357 mlog_errno(ret);
1359 goto out; 1358 goto out;
1360 } 1359 }
@@ -1867,6 +1866,13 @@ relock:
1867 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, 1866 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
1868 ppos, count, ocount); 1867 ppos, count, ocount);
1869 if (written < 0) { 1868 if (written < 0) {
1869 /*
1870 * direct write may have instantiated a few
1871 * blocks outside i_size. Trim these off again.
1872 * Don't need i_size_read because we hold i_mutex.
1873 */
1874 if (*ppos + count > inode->i_size)
1875 vmtruncate(inode, inode->i_size);
1870 ret = written; 1876 ret = written;
1871 goto out_dio; 1877 goto out_dio;
1872 } 1878 }
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 4903688f72a9..7aa00d511874 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1106,6 +1106,12 @@ void ocfs2_clear_inode(struct inode *inode)
1106 oi->ip_last_trans = 0; 1106 oi->ip_last_trans = 0;
1107 oi->ip_dir_start_lookup = 0; 1107 oi->ip_dir_start_lookup = 0;
1108 oi->ip_blkno = 0ULL; 1108 oi->ip_blkno = 0ULL;
1109
1110 /*
1111 * ip_jinode is used to track txns against this inode. We ensure that
1112 * the journal is flushed before journal shutdown. Thus it is safe to
1113 * have inodes get cleaned up after journal shutdown.
1114 */
1109 jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal, 1115 jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal,
1110 &oi->ip_jinode); 1116 &oi->ip_jinode);
1111 1117
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 81e40677eecb..99fe9d584f3c 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -690,6 +690,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
690 690
691 /* Shutdown the kernel journal system */ 691 /* Shutdown the kernel journal system */
692 jbd2_journal_destroy(journal->j_journal); 692 jbd2_journal_destroy(journal->j_journal);
693 journal->j_journal = NULL;
693 694
694 OCFS2_I(inode)->ip_open_count--; 695 OCFS2_I(inode)->ip_open_count--;
695 696
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 3dc18d67557c..eea1d24713ea 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -113,7 +113,11 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
113 * ocfs2_write_begin_nolock(). 113 * ocfs2_write_begin_nolock().
114 */ 114 */
115 if (!PageUptodate(page) || page->mapping != inode->i_mapping) { 115 if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
116 ret = -EINVAL; 116 /*
117 * the page has been umapped in ocfs2_data_downconvert_worker.
118 * So return 0 here and let VFS retry.
119 */
120 ret = 0;
117 goto out; 121 goto out;
118 } 122 }
119 123
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 485a6aa0ad39..f4967e634ffd 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -378,8 +378,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
378 } 378 }
379 379
380 inode = new_inode(dir->i_sb); 380 inode = new_inode(dir->i_sb);
381 if (IS_ERR(inode)) { 381 if (!inode) {
382 status = PTR_ERR(inode); 382 status = -ENOMEM;
383 mlog(ML_ERROR, "new_inode failed!\n"); 383 mlog(ML_ERROR, "new_inode failed!\n");
384 goto leave; 384 goto leave;
385 } 385 }
@@ -491,8 +491,10 @@ leave:
491 brelse(*new_fe_bh); 491 brelse(*new_fe_bh);
492 *new_fe_bh = NULL; 492 *new_fe_bh = NULL;
493 } 493 }
494 if (inode) 494 if (inode) {
495 clear_nlink(inode);
495 iput(inode); 496 iput(inode);
497 }
496 } 498 }
497 499
498 mlog_exit(status); 500 mlog_exit(status);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index a21a465490c4..fef7ece32376 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -473,6 +473,9 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
473 (____gd)->bg_signature); \ 473 (____gd)->bg_signature); \
474} while (0) 474} while (0)
475 475
476#define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \
477 (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE))
478
476static inline unsigned long ino_from_blkno(struct super_block *sb, 479static inline unsigned long ino_from_blkno(struct super_block *sb,
477 u64 blkno) 480 u64 blkno)
478{ 481{
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index f24ce3d3f956..5f180cf7abbd 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -742,12 +742,12 @@ struct ocfs2_group_desc
742 */ 742 */
743struct ocfs2_xattr_entry { 743struct ocfs2_xattr_entry {
744 __le32 xe_name_hash; /* hash value of xattr prefix+suffix. */ 744 __le32 xe_name_hash; /* hash value of xattr prefix+suffix. */
745 __le16 xe_name_offset; /* byte offset from the 1st etnry in the local 745 __le16 xe_name_offset; /* byte offset from the 1st entry in the
746 local xattr storage(inode, xattr block or 746 local xattr storage(inode, xattr block or
747 xattr bucket). */ 747 xattr bucket). */
748 __u8 xe_name_len; /* xattr name len, does't include prefix. */ 748 __u8 xe_name_len; /* xattr name len, does't include prefix. */
749 __u8 xe_type; /* the low 7 bits indicates the name prefix's 749 __u8 xe_type; /* the low 7 bits indicate the name prefix
750 * type and the highest 1 bits indicate whether 750 * type and the highest bit indicates whether
751 * the EA is stored in the local storage. */ 751 * the EA is stored in the local storage. */
752 __le64 xe_value_size; /* real xattr value length. */ 752 __le64 xe_value_size; /* real xattr value length. */
753}; 753};
@@ -766,9 +766,10 @@ struct ocfs2_xattr_header {
766 xattr. */ 766 xattr. */
767 __le16 xh_name_value_len; /* total length of name/value 767 __le16 xh_name_value_len; /* total length of name/value
768 length in this bucket. */ 768 length in this bucket. */
769 __le16 xh_num_buckets; /* bucket nums in one extent 769 __le16 xh_num_buckets; /* Number of xattr buckets
770 record, only valid in the 770 in this extent record,
771 first bucket. */ 771 only valid in the first
772 bucket. */
772 __le64 xh_csum; 773 __le64 xh_csum;
773 struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */ 774 struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */
774}; 775};
@@ -776,8 +777,8 @@ struct ocfs2_xattr_header {
776/* 777/*
777 * On disk structure for xattr value root. 778 * On disk structure for xattr value root.
778 * 779 *
779 * It is used when one extended attribute's size is larger, and we will save it 780 * When an xattr's value is large enough, it is stored in an external
780 * in an outside cluster. It will stored in a b-tree like file content. 781 * b-tree like file data. The xattr value root points to this structure.
781 */ 782 */
782struct ocfs2_xattr_value_root { 783struct ocfs2_xattr_value_root {
783/*00*/ __le32 xr_clusters; /* clusters covered by xattr value. */ 784/*00*/ __le32 xr_clusters; /* clusters covered by xattr value. */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 802c41492214..054e2efb0b7e 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3,25 +3,20 @@
3 * 3 *
4 * xattr.c 4 * xattr.c
5 * 5 *
6 * Copyright (C) 2008 Oracle. All rights reserved. 6 * Copyright (C) 2004, 2008 Oracle. All rights reserved.
7 * 7 *
8 * CREDITS: 8 * CREDITS:
9 * Lots of code in this file is taken from ext3. 9 * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
10 * 11 *
11 * This program is free software; you can redistribute it and/or 12 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public 13 * modify it under the terms of the GNU General Public
13 * License as published by the Free Software Foundation; either 14 * License version 2 as published by the Free Software Foundation.
14 * version 2 of the License, or (at your option) any later version.
15 * 15 *
16 * This program is distributed in the hope that it will be useful, 16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details. 19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public
22 * License along with this program; if not, write to the
23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 * Boston, MA 021110-1307, USA.
25 */ 20 */
26 21
27#include <linux/capability.h> 22#include <linux/capability.h>
@@ -83,7 +78,7 @@ struct xattr_handler *ocfs2_xattr_handlers[] = {
83 NULL 78 NULL
84}; 79};
85 80
86static struct xattr_handler *ocfs2_xattr_handler_map[] = { 81static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
87 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 82 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler,
88 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 83 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler,
89}; 84};
@@ -116,6 +111,10 @@ static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
116 int *block_off, 111 int *block_off,
117 int *new_offset); 112 int *new_offset);
118 113
114static int ocfs2_xattr_block_find(struct inode *inode,
115 int name_index,
116 const char *name,
117 struct ocfs2_xattr_search *xs);
119static int ocfs2_xattr_index_block_find(struct inode *inode, 118static int ocfs2_xattr_index_block_find(struct inode *inode,
120 struct buffer_head *root_bh, 119 struct buffer_head *root_bh,
121 int name_index, 120 int name_index,
@@ -137,6 +136,24 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
137static int ocfs2_delete_xattr_index_block(struct inode *inode, 136static int ocfs2_delete_xattr_index_block(struct inode *inode,
138 struct buffer_head *xb_bh); 137 struct buffer_head *xb_bh);
139 138
139static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
140{
141 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
142}
143
144static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
145{
146 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
147}
148
149static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
150{
151 u16 len = sb->s_blocksize -
152 offsetof(struct ocfs2_xattr_header, xh_entries);
153
154 return len / sizeof(struct ocfs2_xattr_entry);
155}
156
140static inline const char *ocfs2_xattr_prefix(int name_index) 157static inline const char *ocfs2_xattr_prefix(int name_index)
141{ 158{
142 struct xattr_handler *handler = NULL; 159 struct xattr_handler *handler = NULL;
@@ -542,14 +559,12 @@ static int ocfs2_xattr_block_list(struct inode *inode,
542 mlog_errno(ret); 559 mlog_errno(ret);
543 return ret; 560 return ret;
544 } 561 }
545 /*Verify the signature of xattr block*/
546 if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
547 strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
548 ret = -EFAULT;
549 goto cleanup;
550 }
551 562
552 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 563 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
564 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
565 ret = -EIO;
566 goto cleanup;
567 }
553 568
554 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 569 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
555 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 570 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
@@ -749,47 +764,25 @@ static int ocfs2_xattr_block_get(struct inode *inode,
749 size_t buffer_size, 764 size_t buffer_size,
750 struct ocfs2_xattr_search *xs) 765 struct ocfs2_xattr_search *xs)
751{ 766{
752 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
753 struct buffer_head *blk_bh = NULL;
754 struct ocfs2_xattr_block *xb; 767 struct ocfs2_xattr_block *xb;
755 struct ocfs2_xattr_value_root *xv; 768 struct ocfs2_xattr_value_root *xv;
756 size_t size; 769 size_t size;
757 int ret = -ENODATA, name_offset, name_len, block_off, i; 770 int ret = -ENODATA, name_offset, name_len, block_off, i;
758 771
759 if (!di->i_xattr_loc)
760 return ret;
761
762 memset(&xs->bucket, 0, sizeof(xs->bucket)); 772 memset(&xs->bucket, 0, sizeof(xs->bucket));
763 773
764 ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); 774 ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
765 if (ret < 0) { 775 if (ret) {
766 mlog_errno(ret); 776 mlog_errno(ret);
767 return ret;
768 }
769 /*Verify the signature of xattr block*/
770 if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
771 strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
772 ret = -EFAULT;
773 goto cleanup; 777 goto cleanup;
774 } 778 }
775 779
776 xs->xattr_bh = blk_bh; 780 if (xs->not_found) {
777 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 781 ret = -ENODATA;
778
779 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
780 xs->header = &xb->xb_attrs.xb_header;
781 xs->base = (void *)xs->header;
782 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
783 xs->here = xs->header->xh_entries;
784
785 ret = ocfs2_xattr_find_entry(name_index, name, xs);
786 } else
787 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
788 name_index,
789 name, xs);
790
791 if (ret)
792 goto cleanup; 782 goto cleanup;
783 }
784
785 xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
793 size = le64_to_cpu(xs->here->xe_value_size); 786 size = le64_to_cpu(xs->here->xe_value_size);
794 if (buffer) { 787 if (buffer) {
795 ret = -ERANGE; 788 ret = -ERANGE;
@@ -828,7 +821,8 @@ cleanup:
828 brelse(xs->bucket.bhs[i]); 821 brelse(xs->bucket.bhs[i]);
829 memset(&xs->bucket, 0, sizeof(xs->bucket)); 822 memset(&xs->bucket, 0, sizeof(xs->bucket));
830 823
831 brelse(blk_bh); 824 brelse(xs->xattr_bh);
825 xs->xattr_bh = NULL;
832 return ret; 826 return ret;
833} 827}
834 828
@@ -837,11 +831,11 @@ cleanup:
837 * Copy an extended attribute into the buffer provided. 831 * Copy an extended attribute into the buffer provided.
838 * Buffer is NULL to compute the size of buffer required. 832 * Buffer is NULL to compute the size of buffer required.
839 */ 833 */
840int ocfs2_xattr_get(struct inode *inode, 834static int ocfs2_xattr_get(struct inode *inode,
841 int name_index, 835 int name_index,
842 const char *name, 836 const char *name,
843 void *buffer, 837 void *buffer,
844 size_t buffer_size) 838 size_t buffer_size)
845{ 839{
846 int ret; 840 int ret;
847 struct ocfs2_dinode *di = NULL; 841 struct ocfs2_dinode *di = NULL;
@@ -871,7 +865,7 @@ int ocfs2_xattr_get(struct inode *inode,
871 down_read(&oi->ip_xattr_sem); 865 down_read(&oi->ip_xattr_sem);
872 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, 866 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
873 buffer_size, &xis); 867 buffer_size, &xis);
874 if (ret == -ENODATA) 868 if (ret == -ENODATA && di->i_xattr_loc)
875 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 869 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
876 buffer_size, &xbs); 870 buffer_size, &xbs);
877 up_read(&oi->ip_xattr_sem); 871 up_read(&oi->ip_xattr_sem);
@@ -1229,7 +1223,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1229 1223
1230 free = min_offs - ((void *)last - xs->base) - sizeof(__u32); 1224 free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
1231 if (free < 0) 1225 if (free < 0)
1232 return -EFAULT; 1226 return -EIO;
1233 1227
1234 if (!xs->not_found) { 1228 if (!xs->not_found) {
1235 size_t size = 0; 1229 size_t size = 0;
@@ -1514,10 +1508,9 @@ static int ocfs2_xattr_free_block(struct inode *inode,
1514 goto out; 1508 goto out;
1515 } 1509 }
1516 1510
1517 /*Verify the signature of xattr block*/ 1511 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1518 if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, 1512 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
1519 strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { 1513 ret = -EIO;
1520 ret = -EFAULT;
1521 goto out; 1514 goto out;
1522 } 1515 }
1523 1516
@@ -1527,7 +1520,6 @@ static int ocfs2_xattr_free_block(struct inode *inode,
1527 goto out; 1520 goto out;
1528 } 1521 }
1529 1522
1530 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1531 blk = le64_to_cpu(xb->xb_blkno); 1523 blk = le64_to_cpu(xb->xb_blkno);
1532 bit = le16_to_cpu(xb->xb_suballoc_bit); 1524 bit = le16_to_cpu(xb->xb_suballoc_bit);
1533 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 1525 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
@@ -1771,15 +1763,14 @@ static int ocfs2_xattr_block_find(struct inode *inode,
1771 mlog_errno(ret); 1763 mlog_errno(ret);
1772 return ret; 1764 return ret;
1773 } 1765 }
1774 /*Verify the signature of xattr block*/ 1766
1775 if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, 1767 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1776 strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { 1768 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
1777 ret = -EFAULT; 1769 ret = -EIO;
1778 goto cleanup; 1770 goto cleanup;
1779 } 1771 }
1780 1772
1781 xs->xattr_bh = blk_bh; 1773 xs->xattr_bh = blk_bh;
1782 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1783 1774
1784 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 1775 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1785 xs->header = &xb->xb_attrs.xb_header; 1776 xs->header = &xb->xb_attrs.xb_header;
@@ -1806,52 +1797,6 @@ cleanup:
1806} 1797}
1807 1798
1808/* 1799/*
1809 * When all the xattrs are deleted from index btree, the ocfs2_xattr_tree
1810 * will be erased and ocfs2_xattr_block will have its ocfs2_xattr_header
1811 * re-initialized.
1812 */
1813static int ocfs2_restore_xattr_block(struct inode *inode,
1814 struct ocfs2_xattr_search *xs)
1815{
1816 int ret;
1817 handle_t *handle;
1818 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1819 struct ocfs2_xattr_block *xb =
1820 (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1821 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
1822 u16 xb_flags = le16_to_cpu(xb->xb_flags);
1823
1824 BUG_ON(!(xb_flags & OCFS2_XATTR_INDEXED) ||
1825 le16_to_cpu(el->l_next_free_rec) != 0);
1826
1827 handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1828 if (IS_ERR(handle)) {
1829 ret = PTR_ERR(handle);
1830 handle = NULL;
1831 goto out;
1832 }
1833
1834 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1835 OCFS2_JOURNAL_ACCESS_WRITE);
1836 if (ret < 0) {
1837 mlog_errno(ret);
1838 goto out_commit;
1839 }
1840
1841 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
1842 offsetof(struct ocfs2_xattr_block, xb_attrs));
1843
1844 xb->xb_flags = cpu_to_le16(xb_flags & ~OCFS2_XATTR_INDEXED);
1845
1846 ocfs2_journal_dirty(handle, xs->xattr_bh);
1847
1848out_commit:
1849 ocfs2_commit_trans(osb, handle);
1850out:
1851 return ret;
1852}
1853
1854/*
1855 * ocfs2_xattr_block_set() 1800 * ocfs2_xattr_block_set()
1856 * 1801 *
1857 * Set, replace or remove an extended attribute into external block. 1802 * Set, replace or remove an extended attribute into external block.
@@ -1961,8 +1906,6 @@ out:
1961 } 1906 }
1962 1907
1963 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs); 1908 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs);
1964 if (!ret && xblk->xb_attrs.xb_root.xt_list.l_next_free_rec == 0)
1965 ret = ocfs2_restore_xattr_block(inode, xs);
1966 1909
1967end: 1910end:
1968 1911
@@ -2398,7 +2341,8 @@ static int ocfs2_xattr_index_block_find(struct inode *inode,
2398 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); 2341 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
2399 2342
2400 mlog(0, "find xattr extent rec %u clusters from %llu, the first hash " 2343 mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
2401 "in the rec is %u\n", num_clusters, p_blkno, first_hash); 2344 "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
2345 first_hash);
2402 2346
2403 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, 2347 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
2404 p_blkno, first_hash, num_clusters, xs); 2348 p_blkno, first_hash, num_clusters, xs);
@@ -2422,7 +2366,7 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
2422 memset(&bucket, 0, sizeof(bucket)); 2366 memset(&bucket, 0, sizeof(bucket));
2423 2367
2424 mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n", 2368 mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
2425 clusters, blkno); 2369 clusters, (unsigned long long)blkno);
2426 2370
2427 for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) { 2371 for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) {
2428 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, 2372 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket,
@@ -2440,7 +2384,8 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
2440 if (i == 0) 2384 if (i == 0)
2441 num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets); 2385 num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets);
2442 2386
2443 mlog(0, "iterating xattr bucket %llu, first hash %u\n", blkno, 2387 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
2388 (unsigned long long)blkno,
2444 le32_to_cpu(bucket.xh->xh_entries[0].xe_name_hash)); 2389 le32_to_cpu(bucket.xh->xh_entries[0].xe_name_hash));
2445 if (func) { 2390 if (func) {
2446 ret = func(inode, &bucket, para); 2391 ret = func(inode, &bucket, para);
@@ -2776,7 +2721,8 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
2776 */ 2721 */
2777 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 2722 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
2778 2723
2779 mlog(0, "allocate 1 cluster from %llu to xattr block\n", blkno); 2724 mlog(0, "allocate 1 cluster from %llu to xattr block\n",
2725 (unsigned long long)blkno);
2780 2726
2781 xh_bh = sb_getblk(inode->i_sb, blkno); 2727 xh_bh = sb_getblk(inode->i_sb, blkno);
2782 if (!xh_bh) { 2728 if (!xh_bh) {
@@ -2818,7 +2764,11 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
2818 if (data_bh) 2764 if (data_bh)
2819 ocfs2_journal_dirty(handle, data_bh); 2765 ocfs2_journal_dirty(handle, data_bh);
2820 2766
2821 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh); 2767 ret = ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh);
2768 if (ret) {
2769 mlog_errno(ret);
2770 goto out_commit;
2771 }
2822 2772
2823 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ 2773 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
2824 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - 2774 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
@@ -2941,8 +2891,8 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
2941 2891
2942 mlog(0, "adjust xattr bucket in %llu, count = %u, " 2892 mlog(0, "adjust xattr bucket in %llu, count = %u, "
2943 "xh_free_start = %u, xh_name_value_len = %u.\n", 2893 "xh_free_start = %u, xh_name_value_len = %u.\n",
2944 blkno, le16_to_cpu(xh->xh_count), xh_free_start, 2894 (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
2945 le16_to_cpu(xh->xh_name_value_len)); 2895 xh_free_start, le16_to_cpu(xh->xh_name_value_len));
2946 2896
2947 /* 2897 /*
2948 * sort all the entries by their offset. 2898 * sort all the entries by their offset.
@@ -3058,7 +3008,7 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3058 prev_blkno += (num_clusters - 1) * bpc + bpc / 2; 3008 prev_blkno += (num_clusters - 1) * bpc + bpc / 2;
3059 3009
3060 mlog(0, "move half of xattrs in cluster %llu to %llu\n", 3010 mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3061 prev_blkno, new_blkno); 3011 (unsigned long long)prev_blkno, (unsigned long long)new_blkno);
3062 3012
3063 /* 3013 /*
3064 * We need to update the 1st half of the new cluster and 3014 * We need to update the 1st half of the new cluster and
@@ -3168,26 +3118,74 @@ static int ocfs2_read_xattr_bucket(struct inode *inode,
3168} 3118}
3169 3119
3170/* 3120/*
3171 * Move half num of the xattrs in old bucket(blk) to new bucket(new_blk). 3121 * Find the suitable pos when we divide a bucket into 2.
3122 * We have to make sure the xattrs with the same hash value exist
3123 * in the same bucket.
3124 *
3125 * If this ocfs2_xattr_header covers more than one hash value, find a
3126 * place where the hash value changes. Try to find the most even split.
3127 * The most common case is that all entries have different hash values,
3128 * and the first check we make will find a place to split.
3129 */
3130static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
3131{
3132 struct ocfs2_xattr_entry *entries = xh->xh_entries;
3133 int count = le16_to_cpu(xh->xh_count);
3134 int delta, middle = count / 2;
3135
3136 /*
3137 * We start at the middle. Each step gets farther away in both
3138 * directions. We therefore hit the change in hash value
3139 * nearest to the middle. Note that this loop does not execute for
3140 * count < 2.
3141 */
3142 for (delta = 0; delta < middle; delta++) {
3143 /* Let's check delta earlier than middle */
3144 if (cmp_xe(&entries[middle - delta - 1],
3145 &entries[middle - delta]))
3146 return middle - delta;
3147
3148 /* For even counts, don't walk off the end */
3149 if ((middle + delta + 1) == count)
3150 continue;
3151
3152 /* Now try delta past middle */
3153 if (cmp_xe(&entries[middle + delta],
3154 &entries[middle + delta + 1]))
3155 return middle + delta + 1;
3156 }
3157
3158 /* Every entry had the same hash */
3159 return count;
3160}
3161
3162/*
3163 * Move some xattrs in old bucket(blk) to new bucket(new_blk).
3172 * first_hash will record the 1st hash of the new bucket. 3164 * first_hash will record the 1st hash of the new bucket.
3165 *
3166 * Normally half of the xattrs will be moved. But we have to make
3167 * sure that the xattrs with the same hash value are stored in the
3168 * same bucket. If all the xattrs in this bucket have the same hash
3169 * value, the new bucket will be initialized as an empty one and the
3170 * first_hash will be initialized as (hash_value+1).
3173 */ 3171 */
3174static int ocfs2_half_xattr_bucket(struct inode *inode, 3172static int ocfs2_divide_xattr_bucket(struct inode *inode,
3175 handle_t *handle, 3173 handle_t *handle,
3176 u64 blk, 3174 u64 blk,
3177 u64 new_blk, 3175 u64 new_blk,
3178 u32 *first_hash, 3176 u32 *first_hash,
3179 int new_bucket_head) 3177 int new_bucket_head)
3180{ 3178{
3181 int ret, i; 3179 int ret, i;
3182 u16 count, start, len, name_value_len, xe_len, name_offset; 3180 int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
3183 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3181 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3184 struct buffer_head **s_bhs, **t_bhs = NULL; 3182 struct buffer_head **s_bhs, **t_bhs = NULL;
3185 struct ocfs2_xattr_header *xh; 3183 struct ocfs2_xattr_header *xh;
3186 struct ocfs2_xattr_entry *xe; 3184 struct ocfs2_xattr_entry *xe;
3187 int blocksize = inode->i_sb->s_blocksize; 3185 int blocksize = inode->i_sb->s_blocksize;
3188 3186
3189 mlog(0, "move half of xattrs from bucket %llu to %llu\n", 3187 mlog(0, "move some of xattrs from bucket %llu to %llu\n",
3190 blk, new_blk); 3188 (unsigned long long)blk, (unsigned long long)new_blk);
3191 3189
3192 s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); 3190 s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
3193 if (!s_bhs) 3191 if (!s_bhs)
@@ -3220,21 +3218,44 @@ static int ocfs2_half_xattr_bucket(struct inode *inode,
3220 3218
3221 for (i = 0; i < blk_per_bucket; i++) { 3219 for (i = 0; i < blk_per_bucket; i++) {
3222 ret = ocfs2_journal_access(handle, inode, t_bhs[i], 3220 ret = ocfs2_journal_access(handle, inode, t_bhs[i],
3223 OCFS2_JOURNAL_ACCESS_CREATE); 3221 new_bucket_head ?
3222 OCFS2_JOURNAL_ACCESS_CREATE :
3223 OCFS2_JOURNAL_ACCESS_WRITE);
3224 if (ret) { 3224 if (ret) {
3225 mlog_errno(ret); 3225 mlog_errno(ret);
3226 goto out; 3226 goto out;
3227 } 3227 }
3228 } 3228 }
3229 3229
3230 xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
3231 count = le16_to_cpu(xh->xh_count);
3232 start = ocfs2_xattr_find_divide_pos(xh);
3233
3234 if (start == count) {
3235 xe = &xh->xh_entries[start-1];
3236
3237 /*
3238 * initialized a new empty bucket here.
3239 * The hash value is set as one larger than
3240 * that of the last entry in the previous bucket.
3241 */
3242 for (i = 0; i < blk_per_bucket; i++)
3243 memset(t_bhs[i]->b_data, 0, blocksize);
3244
3245 xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
3246 xh->xh_free_start = cpu_to_le16(blocksize);
3247 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
3248 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
3249
3250 goto set_num_buckets;
3251 }
3252
3230 /* copy the whole bucket to the new first. */ 3253 /* copy the whole bucket to the new first. */
3231 for (i = 0; i < blk_per_bucket; i++) 3254 for (i = 0; i < blk_per_bucket; i++)
3232 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize); 3255 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3233 3256
3234 /* update the new bucket. */ 3257 /* update the new bucket. */
3235 xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; 3258 xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
3236 count = le16_to_cpu(xh->xh_count);
3237 start = count / 2;
3238 3259
3239 /* 3260 /*
3240 * Calculate the total name/value len and xh_free_start for 3261 * Calculate the total name/value len and xh_free_start for
@@ -3291,6 +3312,7 @@ static int ocfs2_half_xattr_bucket(struct inode *inode,
3291 xh->xh_free_start = xe->xe_name_offset; 3312 xh->xh_free_start = xe->xe_name_offset;
3292 } 3313 }
3293 3314
3315set_num_buckets:
3294 /* set xh->xh_num_buckets for the new xh. */ 3316 /* set xh->xh_num_buckets for the new xh. */
3295 if (new_bucket_head) 3317 if (new_bucket_head)
3296 xh->xh_num_buckets = cpu_to_le16(1); 3318 xh->xh_num_buckets = cpu_to_le16(1);
@@ -3308,9 +3330,13 @@ static int ocfs2_half_xattr_bucket(struct inode *inode,
3308 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 3330 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3309 3331
3310 /* 3332 /*
3311 * Now only update the 1st block of the old bucket. 3333 * Now only update the 1st block of the old bucket. If we
3312 * Please note that the entry has been sorted already above. 3334 * just added a new empty bucket, there is no need to modify
3335 * it.
3313 */ 3336 */
3337 if (start == count)
3338 goto out;
3339
3314 xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; 3340 xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
3315 memset(&xh->xh_entries[start], 0, 3341 memset(&xh->xh_entries[start], 0,
3316 sizeof(struct ocfs2_xattr_entry) * (count - start)); 3342 sizeof(struct ocfs2_xattr_entry) * (count - start));
@@ -3358,7 +3384,8 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
3358 BUG_ON(s_blkno == t_blkno); 3384 BUG_ON(s_blkno == t_blkno);
3359 3385
3360 mlog(0, "cp bucket %llu to %llu, target is %d\n", 3386 mlog(0, "cp bucket %llu to %llu, target is %d\n",
3361 s_blkno, t_blkno, t_is_new); 3387 (unsigned long long)s_blkno, (unsigned long long)t_blkno,
3388 t_is_new);
3362 3389
3363 s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, 3390 s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
3364 GFP_NOFS); 3391 GFP_NOFS);
@@ -3382,6 +3409,8 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
3382 3409
3383 for (i = 0; i < blk_per_bucket; i++) { 3410 for (i = 0; i < blk_per_bucket; i++) {
3384 ret = ocfs2_journal_access(handle, inode, t_bhs[i], 3411 ret = ocfs2_journal_access(handle, inode, t_bhs[i],
3412 t_is_new ?
3413 OCFS2_JOURNAL_ACCESS_CREATE :
3385 OCFS2_JOURNAL_ACCESS_WRITE); 3414 OCFS2_JOURNAL_ACCESS_WRITE);
3386 if (ret) 3415 if (ret)
3387 goto out; 3416 goto out;
@@ -3428,7 +3457,8 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode,
3428 struct ocfs2_xattr_header *xh; 3457 struct ocfs2_xattr_header *xh;
3429 u64 to_blk_start = to_blk; 3458 u64 to_blk_start = to_blk;
3430 3459
3431 mlog(0, "cp xattrs from cluster %llu to %llu\n", src_blk, to_blk); 3460 mlog(0, "cp xattrs from cluster %llu to %llu\n",
3461 (unsigned long long)src_blk, (unsigned long long)to_blk);
3432 3462
3433 /* 3463 /*
3434 * We need to update the new cluster and 1 more for the update of 3464 * We need to update the new cluster and 1 more for the update of
@@ -3493,15 +3523,15 @@ out:
3493} 3523}
3494 3524
3495/* 3525/*
3496 * Move half of the xattrs in this cluster to the new cluster. 3526 * Move some xattrs in this cluster to the new cluster.
3497 * This function should only be called when bucket size == cluster size. 3527 * This function should only be called when bucket size == cluster size.
3498 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. 3528 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
3499 */ 3529 */
3500static int ocfs2_half_xattr_cluster(struct inode *inode, 3530static int ocfs2_divide_xattr_cluster(struct inode *inode,
3501 handle_t *handle, 3531 handle_t *handle,
3502 u64 prev_blk, 3532 u64 prev_blk,
3503 u64 new_blk, 3533 u64 new_blk,
3504 u32 *first_hash) 3534 u32 *first_hash)
3505{ 3535{
3506 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3536 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3507 int ret, credits = 2 * blk_per_bucket; 3537 int ret, credits = 2 * blk_per_bucket;
@@ -3515,8 +3545,8 @@ static int ocfs2_half_xattr_cluster(struct inode *inode,
3515 } 3545 }
3516 3546
3517 /* Move half of the xattr in start_blk to the next bucket. */ 3547 /* Move half of the xattr in start_blk to the next bucket. */
3518 return ocfs2_half_xattr_bucket(inode, handle, prev_blk, 3548 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
3519 new_blk, first_hash, 1); 3549 new_blk, first_hash, 1);
3520} 3550}
3521 3551
3522/* 3552/*
@@ -3559,7 +3589,8 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3559 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 3589 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3560 3590
3561 mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n", 3591 mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
3562 prev_blk, prev_clusters, new_blk); 3592 (unsigned long long)prev_blk, prev_clusters,
3593 (unsigned long long)new_blk);
3563 3594
3564 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) 3595 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1)
3565 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 3596 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
@@ -3578,9 +3609,9 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3578 last_blk, new_blk, 3609 last_blk, new_blk,
3579 v_start); 3610 v_start);
3580 else { 3611 else {
3581 ret = ocfs2_half_xattr_cluster(inode, handle, 3612 ret = ocfs2_divide_xattr_cluster(inode, handle,
3582 last_blk, new_blk, 3613 last_blk, new_blk,
3583 v_start); 3614 v_start);
3584 3615
3585 if ((*header_bh)->b_blocknr == last_blk && extend) 3616 if ((*header_bh)->b_blocknr == last_blk && extend)
3586 *extend = 0; 3617 *extend = 0;
@@ -3629,7 +3660,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3629 mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, " 3660 mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
3630 "previous xattr blkno = %llu\n", 3661 "previous xattr blkno = %llu\n",
3631 (unsigned long long)OCFS2_I(inode)->ip_blkno, 3662 (unsigned long long)OCFS2_I(inode)->ip_blkno,
3632 prev_cpos, prev_blkno); 3663 prev_cpos, (unsigned long long)prev_blkno);
3633 3664
3634 ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); 3665 ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
3635 3666
@@ -3716,7 +3747,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3716 } 3747 }
3717 } 3748 }
3718 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", 3749 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
3719 num_bits, block, v_start); 3750 num_bits, (unsigned long long)block, v_start);
3720 ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block, 3751 ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
3721 num_bits, 0, meta_ac); 3752 num_bits, 0, meta_ac);
3722 if (ret < 0) { 3753 if (ret < 0) {
@@ -3761,7 +3792,7 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
3761 u16 bucket = le16_to_cpu(first_xh->xh_num_buckets); 3792 u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
3762 3793
3763 mlog(0, "extend xattr bucket in %llu, xattr extend rec starting " 3794 mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
3764 "from %llu, len = %u\n", start_blk, 3795 "from %llu, len = %u\n", (unsigned long long)start_blk,
3765 (unsigned long long)first_bh->b_blocknr, num_clusters); 3796 (unsigned long long)first_bh->b_blocknr, num_clusters);
3766 3797
3767 BUG_ON(bucket >= num_buckets); 3798 BUG_ON(bucket >= num_buckets);
@@ -3797,8 +3828,8 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
3797 } 3828 }
3798 3829
3799 /* Move half of the xattr in start_blk to the next bucket. */ 3830 /* Move half of the xattr in start_blk to the next bucket. */
3800 ret = ocfs2_half_xattr_bucket(inode, handle, start_blk, 3831 ret = ocfs2_divide_xattr_bucket(inode, handle, start_blk,
3801 start_blk + blk_per_bucket, NULL, 0); 3832 start_blk + blk_per_bucket, NULL, 0);
3802 3833
3803 le16_add_cpu(&first_xh->xh_num_buckets, 1); 3834 le16_add_cpu(&first_xh->xh_num_buckets, 1);
3804 ocfs2_journal_dirty(handle, first_bh); 3835 ocfs2_journal_dirty(handle, first_bh);
@@ -4146,7 +4177,7 @@ static int ocfs2_xattr_value_update_size(struct inode *inode,
4146 handle_t *handle = NULL; 4177 handle_t *handle = NULL;
4147 4178
4148 handle = ocfs2_start_trans(osb, 1); 4179 handle = ocfs2_start_trans(osb, 1);
4149 if (handle == NULL) { 4180 if (IS_ERR(handle)) {
4150 ret = -ENOMEM; 4181 ret = -ENOMEM;
4151 mlog_errno(ret); 4182 mlog_errno(ret);
4152 goto out; 4183 goto out;
@@ -4313,7 +4344,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
4313 } 4344 }
4314 4345
4315 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); 4346 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
4316 if (handle == NULL) { 4347 if (IS_ERR(handle)) {
4317 ret = -ENOMEM; 4348 ret = -ENOMEM;
4318 mlog_errno(ret); 4349 mlog_errno(ret);
4319 goto out; 4350 goto out;
@@ -4489,11 +4520,21 @@ out:
4489 return ret; 4520 return ret;
4490} 4521}
4491 4522
4492/* check whether the xattr bucket is filled up with the same hash value. */ 4523/*
4524 * check whether the xattr bucket is filled up with the same hash value.
4525 * If we want to insert the xattr with the same hash, return -ENOSPC.
4526 * If we want to insert a xattr with different hash value, go ahead
4527 * and ocfs2_divide_xattr_bucket will handle this.
4528 */
4493static int ocfs2_check_xattr_bucket_collision(struct inode *inode, 4529static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4494 struct ocfs2_xattr_bucket *bucket) 4530 struct ocfs2_xattr_bucket *bucket,
4531 const char *name)
4495{ 4532{
4496 struct ocfs2_xattr_header *xh = bucket->xh; 4533 struct ocfs2_xattr_header *xh = bucket->xh;
4534 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
4535
4536 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
4537 return 0;
4497 4538
4498 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == 4539 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
4499 xh->xh_entries[0].xe_name_hash) { 4540 xh->xh_entries[0].xe_name_hash) {
@@ -4616,7 +4657,9 @@ try_again:
4616 * one bucket's worth, so check it here whether we need to 4657 * one bucket's worth, so check it here whether we need to
4617 * add a new bucket for the insert. 4658 * add a new bucket for the insert.
4618 */ 4659 */
4619 ret = ocfs2_check_xattr_bucket_collision(inode, &xs->bucket); 4660 ret = ocfs2_check_xattr_bucket_collision(inode,
4661 &xs->bucket,
4662 xi->name);
4620 if (ret) { 4663 if (ret) {
4621 mlog_errno(ret); 4664 mlog_errno(ret);
4622 goto out; 4665 goto out;
@@ -4727,14 +4770,11 @@ out:
4727/* 4770/*
4728 * 'trusted' attributes support 4771 * 'trusted' attributes support
4729 */ 4772 */
4730
4731#define XATTR_TRUSTED_PREFIX "trusted."
4732
4733static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list, 4773static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
4734 size_t list_size, const char *name, 4774 size_t list_size, const char *name,
4735 size_t name_len) 4775 size_t name_len)
4736{ 4776{
4737 const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX) - 1; 4777 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
4738 const size_t total_len = prefix_len + name_len + 1; 4778 const size_t total_len = prefix_len + name_len + 1;
4739 4779
4740 if (list && total_len <= list_size) { 4780 if (list && total_len <= list_size) {
@@ -4771,18 +4811,14 @@ struct xattr_handler ocfs2_xattr_trusted_handler = {
4771 .set = ocfs2_xattr_trusted_set, 4811 .set = ocfs2_xattr_trusted_set,
4772}; 4812};
4773 4813
4774
4775/* 4814/*
4776 * 'user' attributes support 4815 * 'user' attributes support
4777 */ 4816 */
4778
4779#define XATTR_USER_PREFIX "user."
4780
4781static size_t ocfs2_xattr_user_list(struct inode *inode, char *list, 4817static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
4782 size_t list_size, const char *name, 4818 size_t list_size, const char *name,
4783 size_t name_len) 4819 size_t name_len)
4784{ 4820{
4785 const size_t prefix_len = sizeof(XATTR_USER_PREFIX) - 1; 4821 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
4786 const size_t total_len = prefix_len + name_len + 1; 4822 const size_t total_len = prefix_len + name_len + 1;
4787 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4823 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4788 4824
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index c25c7c62a059..1d8314c7656d 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -3,24 +3,16 @@
3 * 3 *
4 * xattr.h 4 * xattr.h
5 * 5 *
6 * Function prototypes 6 * Copyright (C) 2004, 2008 Oracle. All rights reserved.
7 *
8 * Copyright (C) 2008 Oracle. All rights reserved.
9 * 7 *
10 * This program is free software; you can redistribute it and/or 8 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public 9 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either 10 * License version 2 as published by the Free Software Foundation.
13 * version 2 of the License, or (at your option) any later version.
14 * 11 *
15 * This program is distributed in the hope that it will be useful, 12 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details. 15 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA.
24 */ 16 */
25 17
26#ifndef OCFS2_XATTR_H 18#ifndef OCFS2_XATTR_H
@@ -40,29 +32,11 @@ enum ocfs2_xattr_type {
40 32
41extern struct xattr_handler ocfs2_xattr_user_handler; 33extern struct xattr_handler ocfs2_xattr_user_handler;
42extern struct xattr_handler ocfs2_xattr_trusted_handler; 34extern struct xattr_handler ocfs2_xattr_trusted_handler;
43
44extern ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
45extern int ocfs2_xattr_get(struct inode *, int, const char *, void *, size_t);
46extern int ocfs2_xattr_set(struct inode *, int, const char *, const void *,
47 size_t, int);
48extern int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh);
49extern struct xattr_handler *ocfs2_xattr_handlers[]; 35extern struct xattr_handler *ocfs2_xattr_handlers[];
50 36
51static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 37ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
52{ 38int ocfs2_xattr_set(struct inode *, int, const char *, const void *,
53 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; 39 size_t, int);
54} 40int ocfs2_xattr_remove(struct inode *, struct buffer_head *);
55
56static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
57{
58 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
59}
60
61static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
62{
63 u16 len = sb->s_blocksize -
64 offsetof(struct ocfs2_xattr_header, xh_entries);
65 41
66 return len / sizeof(struct ocfs2_xattr_entry);
67}
68#endif /* OCFS2_XATTR_H */ 42#endif /* OCFS2_XATTR_H */
diff --git a/fs/pipe.c b/fs/pipe.c
index fcba6542b8d0..7aea8b89baac 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -717,14 +717,12 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on)
717static int 717static int
718pipe_read_release(struct inode *inode, struct file *filp) 718pipe_read_release(struct inode *inode, struct file *filp)
719{ 719{
720 pipe_read_fasync(-1, filp, 0);
721 return pipe_release(inode, 1, 0); 720 return pipe_release(inode, 1, 0);
722} 721}
723 722
724static int 723static int
725pipe_write_release(struct inode *inode, struct file *filp) 724pipe_write_release(struct inode *inode, struct file *filp)
726{ 725{
727 pipe_write_fasync(-1, filp, 0);
728 return pipe_release(inode, 0, 1); 726 return pipe_release(inode, 0, 1);
729} 727}
730 728
@@ -733,7 +731,6 @@ pipe_rdwr_release(struct inode *inode, struct file *filp)
733{ 731{
734 int decr, decw; 732 int decr, decw;
735 733
736 pipe_rdwr_fasync(-1, filp, 0);
737 decr = (filp->f_mode & FMODE_READ) != 0; 734 decr = (filp->f_mode & FMODE_READ) != 0;
738 decw = (filp->f_mode & FMODE_WRITE) != 0; 735 decw = (filp->f_mode & FMODE_WRITE) != 0;
739 return pipe_release(inode, decr, decw); 736 return pipe_release(inode, decr, decw);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index bb9f4b05703d..6af7fba7abb1 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -40,7 +40,7 @@
40 * 40 *
41 * 41 *
42 * Alan Cox : security fixes. 42 * Alan Cox : security fixes.
43 * <Alan.Cox@linux.org> 43 * <alan@lxorguk.ukuu.org.uk>
44 * 44 *
45 * Al Viro : safe handling of mm_struct 45 * Al Viro : safe handling of mm_struct
46 * 46 *
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 0c10a0b3f146..df26aa88fa47 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -1,43 +1,45 @@
1#include <linux/fs.h>
2#include <linux/init.h> 1#include <linux/init.h>
3#include <linux/proc_fs.h> 2#include <linux/proc_fs.h>
4#include <linux/sched.h> 3#include <linux/sched.h>
5#include <linux/seq_file.h>
6#include <linux/time.h> 4#include <linux/time.h>
7#include <asm/cputime.h> 5#include <asm/cputime.h>
8 6
9static int uptime_proc_show(struct seq_file *m, void *v) 7static int proc_calc_metrics(char *page, char **start, off_t off,
8 int count, int *eof, int len)
9{
10 if (len <= off + count)
11 *eof = 1;
12 *start = page + off;
13 len -= off;
14 if (len > count)
15 len = count;
16 if (len < 0)
17 len = 0;
18 return len;
19}
20
21static int uptime_read_proc(char *page, char **start, off_t off, int count,
22 int *eof, void *data)
10{ 23{
11 struct timespec uptime; 24 struct timespec uptime;
12 struct timespec idle; 25 struct timespec idle;
26 int len;
13 cputime_t idletime = cputime_add(init_task.utime, init_task.stime); 27 cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
14 28
15 do_posix_clock_monotonic_gettime(&uptime); 29 do_posix_clock_monotonic_gettime(&uptime);
16 monotonic_to_bootbased(&uptime); 30 monotonic_to_bootbased(&uptime);
17 cputime_to_timespec(idletime, &idle); 31 cputime_to_timespec(idletime, &idle);
18 seq_printf(m, "%lu.%02lu %lu.%02lu\n", 32 len = sprintf(page, "%lu.%02lu %lu.%02lu\n",
19 (unsigned long) uptime.tv_sec, 33 (unsigned long) uptime.tv_sec,
20 (uptime.tv_nsec / (NSEC_PER_SEC / 100)), 34 (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
21 (unsigned long) idle.tv_sec, 35 (unsigned long) idle.tv_sec,
22 (idle.tv_nsec / (NSEC_PER_SEC / 100))); 36 (idle.tv_nsec / (NSEC_PER_SEC / 100)));
23 return 0; 37 return proc_calc_metrics(page, start, off, count, eof, len);
24} 38}
25 39
26static int uptime_proc_open(struct inode *inode, struct file *file)
27{
28 return single_open(file, uptime_proc_show, NULL);
29}
30
31static const struct file_operations uptime_proc_fops = {
32 .open = uptime_proc_open,
33 .read = seq_read,
34 .llseek = seq_lseek,
35 .release = single_release,
36};
37
38static int __init proc_uptime_init(void) 40static int __init proc_uptime_init(void)
39{ 41{
40 proc_create("uptime", 0, NULL, &uptime_proc_fops); 42 create_proc_read_entry("uptime", 0, NULL, uptime_read_proc, NULL);
41 return 0; 43 return 0;
42} 44}
43module_init(proc_uptime_init); 45module_init(proc_uptime_init);
diff --git a/fs/splice.c b/fs/splice.c
index a1e701c27156..1abab5cee4ba 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -731,8 +731,8 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
731 }; 731 };
732 732
733 /* 733 /*
734 * The actor worker might be calling ->prepare_write and 734 * The actor worker might be calling ->write_begin and
735 * ->commit_write. Most of the time, these expect i_mutex to 735 * ->write_end. Most of the time, these expect i_mutex to
736 * be held. Since this may result in an ABBA deadlock with 736 * be held. Since this may result in an ABBA deadlock with
737 * pipe->inode, we have to order lock acquiry here. 737 * pipe->inode, we have to order lock acquiry here.
738 */ 738 */
diff --git a/fs/vfat/Makefile b/fs/vfat/Makefile
deleted file mode 100644
index 40f2798a4f08..000000000000
--- a/fs/vfat/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
1#
2# Makefile for the linux vfat-filesystem routines.
3#
4
5obj-$(CONFIG_VFAT_FS) += vfat.o
6
7vfat-y := namei.o
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 9e561a9cefca..a11a8390bf6c 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1566,11 +1566,14 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1566 int nmap, error, w, count, c, got, i, mapi; 1566 int nmap, error, w, count, c, got, i, mapi;
1567 xfs_trans_t *tp; 1567 xfs_trans_t *tp;
1568 xfs_mount_t *mp; 1568 xfs_mount_t *mp;
1569 xfs_drfsbno_t nblks;
1569 1570
1570 dp = args->dp; 1571 dp = args->dp;
1571 mp = dp->i_mount; 1572 mp = dp->i_mount;
1572 w = args->whichfork; 1573 w = args->whichfork;
1573 tp = args->trans; 1574 tp = args->trans;
1575 nblks = dp->i_d.di_nblocks;
1576
1574 /* 1577 /*
1575 * For new directories adjust the file offset and block count. 1578 * For new directories adjust the file offset and block count.
1576 */ 1579 */
@@ -1647,6 +1650,8 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1647 } 1650 }
1648 if (mapp != &map) 1651 if (mapp != &map)
1649 kmem_free(mapp); 1652 kmem_free(mapp);
1653 /* account for newly allocated blocks in reserved blocks total */
1654 args->total -= dp->i_d.di_nblocks - nblks;
1650 *new_blkno = (xfs_dablk_t)bno; 1655 *new_blkno = (xfs_dablk_t)bno;
1651 return 0; 1656 return 0;
1652} 1657}
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 80e0dc51361c..1afb12278b8d 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -525,11 +525,13 @@ xfs_dir2_grow_inode(
525 xfs_mount_t *mp; 525 xfs_mount_t *mp;
526 int nmap; /* number of bmap entries */ 526 int nmap; /* number of bmap entries */
527 xfs_trans_t *tp; 527 xfs_trans_t *tp;
528 xfs_drfsbno_t nblks;
528 529
529 xfs_dir2_trace_args_s("grow_inode", args, space); 530 xfs_dir2_trace_args_s("grow_inode", args, space);
530 dp = args->dp; 531 dp = args->dp;
531 tp = args->trans; 532 tp = args->trans;
532 mp = dp->i_mount; 533 mp = dp->i_mount;
534 nblks = dp->i_d.di_nblocks;
533 /* 535 /*
534 * Set lowest possible block in the space requested. 536 * Set lowest possible block in the space requested.
535 */ 537 */
@@ -622,7 +624,11 @@ xfs_dir2_grow_inode(
622 */ 624 */
623 if (mapp != &map) 625 if (mapp != &map)
624 kmem_free(mapp); 626 kmem_free(mapp);
627
628 /* account for newly allocated blocks in reserved blocks total */
629 args->total -= dp->i_d.di_nblocks - nblks;
625 *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); 630 *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
631
626 /* 632 /*
627 * Update file's size if this is the data space and it grew. 633 * Update file's size if this is the data space and it grew.
628 */ 634 */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index dbd9cef852ec..a391b955df01 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1414,7 +1414,7 @@ xfs_itruncate_start(
1414 mp = ip->i_mount; 1414 mp = ip->i_mount;
1415 1415
1416 /* wait for the completion of any pending DIOs */ 1416 /* wait for the completion of any pending DIOs */
1417 if (new_size < ip->i_size) 1417 if (new_size == 0 || new_size < ip->i_size)
1418 vn_iowait(ip); 1418 vn_iowait(ip);
1419 1419
1420 /* 1420 /*
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 0b02c6443551..3608a0f0a5f6 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -563,6 +563,11 @@ xfs_log_mount(
563 } 563 }
564 564
565 mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); 565 mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
566 if (!mp->m_log) {
567 cmn_err(CE_WARN, "XFS: Log allocation failed: No memory!");
568 error = ENOMEM;
569 goto out;
570 }
566 571
567 /* 572 /*
568 * Initialize the AIL now we have a log. 573 * Initialize the AIL now we have a log.
@@ -601,6 +606,7 @@ xfs_log_mount(
601 return 0; 606 return 0;
602error: 607error:
603 xfs_log_unmount_dealloc(mp); 608 xfs_log_unmount_dealloc(mp);
609out:
604 return error; 610 return error;
605} /* xfs_log_mount */ 611} /* xfs_log_mount */
606 612
@@ -1217,7 +1223,9 @@ xlog_alloc_log(xfs_mount_t *mp,
1217 int i; 1223 int i;
1218 int iclogsize; 1224 int iclogsize;
1219 1225
1220 log = (xlog_t *)kmem_zalloc(sizeof(xlog_t), KM_SLEEP); 1226 log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
1227 if (!log)
1228 return NULL;
1221 1229
1222 log->l_mp = mp; 1230 log->l_mp = mp;
1223 log->l_targ = log_target; 1231 log->l_targ = log_target;
@@ -1249,6 +1257,8 @@ xlog_alloc_log(xfs_mount_t *mp,
1249 xlog_get_iclog_buffer_size(mp, log); 1257 xlog_get_iclog_buffer_size(mp, log);
1250 1258
1251 bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); 1259 bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
1260 if (!bp)
1261 goto out_free_log;
1252 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); 1262 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1253 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); 1263 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1254 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1264 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
@@ -1275,13 +1285,17 @@ xlog_alloc_log(xfs_mount_t *mp,
1275 iclogsize = log->l_iclog_size; 1285 iclogsize = log->l_iclog_size;
1276 ASSERT(log->l_iclog_size >= 4096); 1286 ASSERT(log->l_iclog_size >= 4096);
1277 for (i=0; i < log->l_iclog_bufs; i++) { 1287 for (i=0; i < log->l_iclog_bufs; i++) {
1278 *iclogp = (xlog_in_core_t *) 1288 *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL);
1279 kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP); 1289 if (!*iclogp)
1290 goto out_free_iclog;
1291
1280 iclog = *iclogp; 1292 iclog = *iclogp;
1281 iclog->ic_prev = prev_iclog; 1293 iclog->ic_prev = prev_iclog;
1282 prev_iclog = iclog; 1294 prev_iclog = iclog;
1283 1295
1284 bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp); 1296 bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
1297 if (!bp)
1298 goto out_free_iclog;
1285 if (!XFS_BUF_CPSEMA(bp)) 1299 if (!XFS_BUF_CPSEMA(bp))
1286 ASSERT(0); 1300 ASSERT(0);
1287 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); 1301 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
@@ -1323,6 +1337,25 @@ xlog_alloc_log(xfs_mount_t *mp,
1323 log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ 1337 log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */
1324 1338
1325 return log; 1339 return log;
1340
1341out_free_iclog:
1342 for (iclog = log->l_iclog; iclog; iclog = prev_iclog) {
1343 prev_iclog = iclog->ic_next;
1344 if (iclog->ic_bp) {
1345 sv_destroy(&iclog->ic_force_wait);
1346 sv_destroy(&iclog->ic_write_wait);
1347 xfs_buf_free(iclog->ic_bp);
1348 xlog_trace_iclog_dealloc(iclog);
1349 }
1350 kmem_free(iclog);
1351 }
1352 spinlock_destroy(&log->l_icloglock);
1353 spinlock_destroy(&log->l_grant_lock);
1354 xlog_trace_loggrant_dealloc(log);
1355 xfs_buf_free(log->l_xbuf);
1356out_free_log:
1357 kmem_free(log);
1358 return NULL;
1326} /* xlog_alloc_log */ 1359} /* xlog_alloc_log */
1327 1360
1328 1361
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 82d46ce69d5f..70e3ba32e6be 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1419,7 +1419,13 @@ xlog_recover_add_to_trans(
1419 return 0; 1419 return 0;
1420 item = trans->r_itemq; 1420 item = trans->r_itemq;
1421 if (item == NULL) { 1421 if (item == NULL) {
1422 ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC); 1422 /* we need to catch log corruptions here */
1423 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
1424 xlog_warn("XFS: xlog_recover_add_to_trans: "
1425 "bad header magic number");
1426 ASSERT(0);
1427 return XFS_ERROR(EIO);
1428 }
1423 if (len == sizeof(xfs_trans_header_t)) 1429 if (len == sizeof(xfs_trans_header_t))
1424 xlog_recover_add_item(&trans->r_itemq); 1430 xlog_recover_add_item(&trans->r_itemq);
1425 memcpy(&trans->r_theader, dp, len); /* d, s, l */ 1431 memcpy(&trans->r_theader, dp, len); /* d, s, l */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index a4503f5e9497..15f5dd22fbb2 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1245,6 +1245,9 @@ xfs_unmountfs(
1245 1245
1246 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); 1246 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
1247 1247
1248 if (mp->m_quotainfo)
1249 XFS_QM_DONE(mp);
1250
1248 /* 1251 /*
1249 * Flush out the log synchronously so that we know for sure 1252 * Flush out the log synchronously so that we know for sure
1250 * that nothing is pinned. This is important because bflush() 1253 * that nothing is pinned. This is important because bflush()
@@ -1297,8 +1300,6 @@ xfs_unmountfs(
1297 xfs_errortag_clearall(mp, 0); 1300 xfs_errortag_clearall(mp, 0);
1298#endif 1301#endif
1299 xfs_free_perag(mp); 1302 xfs_free_perag(mp);
1300 if (mp->m_quotainfo)
1301 XFS_QM_DONE(mp);
1302} 1303}
1303 1304
1304STATIC void 1305STATIC void