diff options
Diffstat (limited to 'fs')
310 files changed, 14956 insertions, 8192 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 047c791427aa..c061c3f18e7c 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -55,7 +55,7 @@ enum { | |||
55 | Opt_err | 55 | Opt_err |
56 | }; | 56 | }; |
57 | 57 | ||
58 | static match_table_t tokens = { | 58 | static const match_table_t tokens = { |
59 | {Opt_debug, "debug=%x"}, | 59 | {Opt_debug, "debug=%x"}, |
60 | {Opt_dfltuid, "dfltuid=%u"}, | 60 | {Opt_dfltuid, "dfltuid=%u"}, |
61 | {Opt_dfltgid, "dfltgid=%u"}, | 61 | {Opt_dfltgid, "dfltgid=%u"}, |
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 88e3787c6ea9..e298fe194093 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c | |||
@@ -119,6 +119,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) | |||
119 | 119 | ||
120 | const struct file_operations v9fs_dir_operations = { | 120 | const struct file_operations v9fs_dir_operations = { |
121 | .read = generic_read_dir, | 121 | .read = generic_read_dir, |
122 | .llseek = generic_file_llseek, | ||
122 | .readdir = v9fs_dir_readdir, | 123 | .readdir = v9fs_dir_readdir, |
123 | .open = v9fs_file_open, | 124 | .open = v9fs_file_open, |
124 | .release = v9fs_dir_release, | 125 | .release = v9fs_dir_release, |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index c95295c65045..e83aa5ebe861 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -626,8 +626,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | |||
626 | return NULL; | 626 | return NULL; |
627 | 627 | ||
628 | error: | 628 | error: |
629 | if (fid) | 629 | p9_client_clunk(fid); |
630 | p9_client_clunk(fid); | ||
631 | 630 | ||
632 | return ERR_PTR(result); | 631 | return ERR_PTR(result); |
633 | } | 632 | } |
diff --git a/fs/Kconfig b/fs/Kconfig index d3873583360b..9e9d70c02a07 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -136,37 +136,51 @@ config EXT3_FS_SECURITY | |||
136 | If you are not using a security module that requires using | 136 | If you are not using a security module that requires using |
137 | extended attributes for file security labels, say N. | 137 | extended attributes for file security labels, say N. |
138 | 138 | ||
139 | config EXT4DEV_FS | 139 | config EXT4_FS |
140 | tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)" | 140 | tristate "The Extended 4 (ext4) filesystem" |
141 | depends on EXPERIMENTAL | ||
142 | select JBD2 | 141 | select JBD2 |
143 | select CRC16 | 142 | select CRC16 |
144 | help | 143 | help |
145 | Ext4dev is a predecessor filesystem of the next generation | 144 | This is the next generation of the ext3 filesystem. |
146 | extended fs ext4, based on ext3 filesystem code. It will be | ||
147 | renamed ext4 fs later, once ext4dev is mature and stabilized. | ||
148 | 145 | ||
149 | Unlike the change from ext2 filesystem to ext3 filesystem, | 146 | Unlike the change from ext2 filesystem to ext3 filesystem, |
150 | the on-disk format of ext4dev is not the same as ext3 any more: | 147 | the on-disk format of ext4 is not forwards compatible with |
151 | it is based on extent maps and it supports 48-bit physical block | 148 | ext3; it is based on extent maps and it supports 48-bit |
152 | numbers. These combined on-disk format changes will allow | 149 | physical block numbers. The ext4 filesystem also supports delayed |
153 | ext4dev/ext4 to handle more than 16 TB filesystem volumes -- | 150 | allocation, persistent preallocation, high resolution time stamps, |
154 | a hard limit that ext3 cannot overcome without changing the | 151 | and a number of other features to improve performance and speed |
155 | on-disk format. | 152 | up fsck time. For more information, please see the web pages at |
156 | 153 | http://ext4.wiki.kernel.org. | |
157 | Other than extent maps and 48-bit block numbers, ext4dev also is | 154 | |
158 | likely to have other new features such as persistent preallocation, | 155 | The ext4 filesystem will support mounting an ext3 |
159 | high resolution time stamps, and larger file support etc. These | 156 | filesystem; while there will be some performance gains from |
160 | features will be added to ext4dev gradually. | 157 | the delayed allocation and inode table readahead, the best |
158 | performance gains will require enabling ext4 features in the | ||
159 | filesystem, or formating a new filesystem as an ext4 | ||
160 | filesystem initially. | ||
161 | 161 | ||
162 | To compile this file system support as a module, choose M here. The | 162 | To compile this file system support as a module, choose M here. The |
163 | module will be called ext4dev. | 163 | module will be called ext4dev. |
164 | 164 | ||
165 | If unsure, say N. | 165 | If unsure, say N. |
166 | 166 | ||
167 | config EXT4DEV_FS_XATTR | 167 | config EXT4DEV_COMPAT |
168 | bool "Ext4dev extended attributes" | 168 | bool "Enable ext4dev compatibility" |
169 | depends on EXT4DEV_FS | 169 | depends on EXT4_FS |
170 | help | ||
171 | Starting with 2.6.28, the name of the ext4 filesystem was | ||
172 | renamed from ext4dev to ext4. Unfortunately there are some | ||
173 | legacy userspace programs (such as klibc's fstype) have | ||
174 | "ext4dev" hardcoded. | ||
175 | |||
176 | To enable backwards compatibility so that systems that are | ||
177 | still expecting to mount ext4 filesystems using ext4dev, | ||
178 | chose Y here. This feature will go away by 2.6.31, so | ||
179 | please arrange to get your userspace programs fixed! | ||
180 | |||
181 | config EXT4_FS_XATTR | ||
182 | bool "Ext4 extended attributes" | ||
183 | depends on EXT4_FS | ||
170 | default y | 184 | default y |
171 | help | 185 | help |
172 | Extended attributes are name:value pairs associated with inodes by | 186 | Extended attributes are name:value pairs associated with inodes by |
@@ -175,11 +189,11 @@ config EXT4DEV_FS_XATTR | |||
175 | 189 | ||
176 | If unsure, say N. | 190 | If unsure, say N. |
177 | 191 | ||
178 | You need this for POSIX ACL support on ext4dev/ext4. | 192 | You need this for POSIX ACL support on ext4. |
179 | 193 | ||
180 | config EXT4DEV_FS_POSIX_ACL | 194 | config EXT4_FS_POSIX_ACL |
181 | bool "Ext4dev POSIX Access Control Lists" | 195 | bool "Ext4 POSIX Access Control Lists" |
182 | depends on EXT4DEV_FS_XATTR | 196 | depends on EXT4_FS_XATTR |
183 | select FS_POSIX_ACL | 197 | select FS_POSIX_ACL |
184 | help | 198 | help |
185 | POSIX Access Control Lists (ACLs) support permissions for users and | 199 | POSIX Access Control Lists (ACLs) support permissions for users and |
@@ -190,14 +204,14 @@ config EXT4DEV_FS_POSIX_ACL | |||
190 | 204 | ||
191 | If you don't know what Access Control Lists are, say N | 205 | If you don't know what Access Control Lists are, say N |
192 | 206 | ||
193 | config EXT4DEV_FS_SECURITY | 207 | config EXT4_FS_SECURITY |
194 | bool "Ext4dev Security Labels" | 208 | bool "Ext4 Security Labels" |
195 | depends on EXT4DEV_FS_XATTR | 209 | depends on EXT4_FS_XATTR |
196 | help | 210 | help |
197 | Security labels support alternative access control models | 211 | Security labels support alternative access control models |
198 | implemented by security modules like SELinux. This option | 212 | implemented by security modules like SELinux. This option |
199 | enables an extended attribute handler for file security | 213 | enables an extended attribute handler for file security |
200 | labels in the ext4dev/ext4 filesystem. | 214 | labels in the ext4 filesystem. |
201 | 215 | ||
202 | If you are not using a security module that requires using | 216 | If you are not using a security module that requires using |
203 | extended attributes for file security labels, say N. | 217 | extended attributes for file security labels, say N. |
@@ -206,17 +220,16 @@ config JBD | |||
206 | tristate | 220 | tristate |
207 | help | 221 | help |
208 | This is a generic journalling layer for block devices. It is | 222 | This is a generic journalling layer for block devices. It is |
209 | currently used by the ext3 and OCFS2 file systems, but it could | 223 | currently used by the ext3 file system, but it could also be |
210 | also be used to add journal support to other file systems or block | 224 | used to add journal support to other file systems or block |
211 | devices such as RAID or LVM. | 225 | devices such as RAID or LVM. |
212 | 226 | ||
213 | If you are using the ext3 or OCFS2 file systems, you need to | 227 | If you are using the ext3 file system, you need to say Y here. |
214 | say Y here. If you are not using ext3 OCFS2 then you will probably | 228 | If you are not using ext3 then you will probably want to say N. |
215 | want to say N. | ||
216 | 229 | ||
217 | To compile this device as a module, choose M here: the module will be | 230 | To compile this device as a module, choose M here: the module will be |
218 | called jbd. If you are compiling ext3 or OCFS2 into the kernel, | 231 | called jbd. If you are compiling ext3 into the kernel, you |
219 | you cannot compile this code as a module. | 232 | cannot compile this code as a module. |
220 | 233 | ||
221 | config JBD_DEBUG | 234 | config JBD_DEBUG |
222 | bool "JBD (ext3) debugging support" | 235 | bool "JBD (ext3) debugging support" |
@@ -240,22 +253,23 @@ config JBD2 | |||
240 | help | 253 | help |
241 | This is a generic journaling layer for block devices that support | 254 | This is a generic journaling layer for block devices that support |
242 | both 32-bit and 64-bit block numbers. It is currently used by | 255 | both 32-bit and 64-bit block numbers. It is currently used by |
243 | the ext4dev/ext4 filesystem, but it could also be used to add | 256 | the ext4 and OCFS2 filesystems, but it could also be used to add |
244 | journal support to other file systems or block devices such | 257 | journal support to other file systems or block devices such |
245 | as RAID or LVM. | 258 | as RAID or LVM. |
246 | 259 | ||
247 | If you are using ext4dev/ext4, you need to say Y here. If you are not | 260 | If you are using ext4 or OCFS2, you need to say Y here. |
248 | using ext4dev/ext4 then you will probably want to say N. | 261 | If you are not using ext4 or OCFS2 then you will |
262 | probably want to say N. | ||
249 | 263 | ||
250 | To compile this device as a module, choose M here. The module will be | 264 | To compile this device as a module, choose M here. The module will be |
251 | called jbd2. If you are compiling ext4dev/ext4 into the kernel, | 265 | called jbd2. If you are compiling ext4 or OCFS2 into the kernel, |
252 | you cannot compile this code as a module. | 266 | you cannot compile this code as a module. |
253 | 267 | ||
254 | config JBD2_DEBUG | 268 | config JBD2_DEBUG |
255 | bool "JBD2 (ext4dev/ext4) debugging support" | 269 | bool "JBD2 (ext4) debugging support" |
256 | depends on JBD2 && DEBUG_FS | 270 | depends on JBD2 && DEBUG_FS |
257 | help | 271 | help |
258 | If you are using the ext4dev/ext4 journaled file system (or | 272 | If you are using the ext4 journaled file system (or |
259 | potentially any other filesystem/device using JBD2), this option | 273 | potentially any other filesystem/device using JBD2), this option |
260 | allows you to enable debugging output while the system is running, | 274 | allows you to enable debugging output while the system is running, |
261 | in order to help track down any problems you are having. | 275 | in order to help track down any problems you are having. |
@@ -270,9 +284,9 @@ config JBD2_DEBUG | |||
270 | config FS_MBCACHE | 284 | config FS_MBCACHE |
271 | # Meta block cache for Extended Attributes (ext2/ext3/ext4) | 285 | # Meta block cache for Extended Attributes (ext2/ext3/ext4) |
272 | tristate | 286 | tristate |
273 | depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR | 287 | depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS_XATTR |
274 | default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y | 288 | default y if EXT2_FS=y || EXT3_FS=y || EXT4_FS=y |
275 | default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m | 289 | default m if EXT2_FS=m || EXT3_FS=m || EXT4_FS=m |
276 | 290 | ||
277 | config REISERFS_FS | 291 | config REISERFS_FS |
278 | tristate "Reiserfs support" | 292 | tristate "Reiserfs support" |
@@ -419,6 +433,14 @@ config FS_POSIX_ACL | |||
419 | bool | 433 | bool |
420 | default n | 434 | default n |
421 | 435 | ||
436 | config FILE_LOCKING | ||
437 | bool "Enable POSIX file locking API" if EMBEDDED | ||
438 | default y | ||
439 | help | ||
440 | This option enables standard file locking support, required | ||
441 | for filesystems like NFS and for the flock() system | ||
442 | call. Disabling this option saves about 11k. | ||
443 | |||
422 | source "fs/xfs/Kconfig" | 444 | source "fs/xfs/Kconfig" |
423 | source "fs/gfs2/Kconfig" | 445 | source "fs/gfs2/Kconfig" |
424 | 446 | ||
@@ -426,7 +448,7 @@ config OCFS2_FS | |||
426 | tristate "OCFS2 file system support" | 448 | tristate "OCFS2 file system support" |
427 | depends on NET && SYSFS | 449 | depends on NET && SYSFS |
428 | select CONFIGFS_FS | 450 | select CONFIGFS_FS |
429 | select JBD | 451 | select JBD2 |
430 | select CRC32 | 452 | select CRC32 |
431 | help | 453 | help |
432 | OCFS2 is a general purpose extent based shared disk cluster file | 454 | OCFS2 is a general purpose extent based shared disk cluster file |
@@ -497,6 +519,16 @@ config OCFS2_DEBUG_FS | |||
497 | this option for debugging only as it is likely to decrease | 519 | this option for debugging only as it is likely to decrease |
498 | performance of the filesystem. | 520 | performance of the filesystem. |
499 | 521 | ||
522 | config OCFS2_COMPAT_JBD | ||
523 | bool "Use JBD for compatibility" | ||
524 | depends on OCFS2_FS | ||
525 | default n | ||
526 | select JBD | ||
527 | help | ||
528 | The ocfs2 filesystem now uses JBD2 for its journalling. JBD2 | ||
529 | is backwards compatible with JBD. It is safe to say N here. | ||
530 | However, if you really want to use the original JBD, say Y here. | ||
531 | |||
500 | endif # BLOCK | 532 | endif # BLOCK |
501 | 533 | ||
502 | config DNOTIFY | 534 | config DNOTIFY |
@@ -1765,6 +1797,28 @@ config SUNRPC_XPRT_RDMA | |||
1765 | 1797 | ||
1766 | If unsure, say N. | 1798 | If unsure, say N. |
1767 | 1799 | ||
1800 | config SUNRPC_REGISTER_V4 | ||
1801 | bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)" | ||
1802 | depends on SUNRPC && EXPERIMENTAL | ||
1803 | default n | ||
1804 | help | ||
1805 | Sun added support for registering RPC services at an IPv6 | ||
1806 | address by creating two new versions of the rpcbind protocol | ||
1807 | (RFC 1833). | ||
1808 | |||
1809 | This option enables support in the kernel RPC server for | ||
1810 | registering kernel RPC services via version 4 of the rpcbind | ||
1811 | protocol. If you enable this option, you must run a portmapper | ||
1812 | daemon that supports rpcbind protocol version 4. | ||
1813 | |||
1814 | Serving NFS over IPv6 from knfsd (the kernel's NFS server) | ||
1815 | requires that you enable this option and use a portmapper that | ||
1816 | supports rpcbind version 4. | ||
1817 | |||
1818 | If unsure, say N to get traditional behavior (register kernel | ||
1819 | RPC services using only rpcbind version 2). Distributions | ||
1820 | using the legacy Linux portmapper daemon must say N here. | ||
1821 | |||
1768 | config RPCSEC_GSS_KRB5 | 1822 | config RPCSEC_GSS_KRB5 |
1769 | tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" | 1823 | tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" |
1770 | depends on SUNRPC && EXPERIMENTAL | 1824 | depends on SUNRPC && EXPERIMENTAL |
@@ -1930,6 +1984,16 @@ config CIFS_WEAK_PW_HASH | |||
1930 | 1984 | ||
1931 | If unsure, say N. | 1985 | If unsure, say N. |
1932 | 1986 | ||
1987 | config CIFS_UPCALL | ||
1988 | bool "Kerberos/SPNEGO advanced session setup" | ||
1989 | depends on CIFS && KEYS | ||
1990 | help | ||
1991 | Enables an upcall mechanism for CIFS which accesses | ||
1992 | userspace helper utilities to provide SPNEGO packaged (RFC 4178) | ||
1993 | Kerberos tickets which are needed to mount to certain secure servers | ||
1994 | (for which more secure Kerberos authentication is required). If | ||
1995 | unsure, say N. | ||
1996 | |||
1933 | config CIFS_XATTR | 1997 | config CIFS_XATTR |
1934 | bool "CIFS extended attributes" | 1998 | bool "CIFS extended attributes" |
1935 | depends on CIFS | 1999 | depends on CIFS |
@@ -1982,17 +2046,6 @@ config CIFS_EXPERIMENTAL | |||
1982 | (which is disabled by default). See the file fs/cifs/README | 2046 | (which is disabled by default). See the file fs/cifs/README |
1983 | for more details. If unsure, say N. | 2047 | for more details. If unsure, say N. |
1984 | 2048 | ||
1985 | config CIFS_UPCALL | ||
1986 | bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)" | ||
1987 | depends on CIFS_EXPERIMENTAL | ||
1988 | depends on KEYS | ||
1989 | help | ||
1990 | Enables an upcall mechanism for CIFS which accesses | ||
1991 | userspace helper utilities to provide SPNEGO packaged (RFC 4178) | ||
1992 | Kerberos tickets which are needed to mount to certain secure servers | ||
1993 | (for which more secure Kerberos authentication is required). If | ||
1994 | unsure, say N. | ||
1995 | |||
1996 | config CIFS_DFS_UPCALL | 2049 | config CIFS_DFS_UPCALL |
1997 | bool "DFS feature support (EXPERIMENTAL)" | 2050 | bool "DFS feature support (EXPERIMENTAL)" |
1998 | depends on CIFS_EXPERIMENTAL | 2051 | depends on CIFS_EXPERIMENTAL |
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 4a551af6f3fc..17c9c5ec14c5 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt | |||
@@ -59,10 +59,12 @@ config BINFMT_SHARED_FLAT | |||
59 | help | 59 | help |
60 | Support FLAT shared libraries | 60 | Support FLAT shared libraries |
61 | 61 | ||
62 | config HAVE_AOUT | ||
63 | def_bool n | ||
64 | |||
62 | config BINFMT_AOUT | 65 | config BINFMT_AOUT |
63 | tristate "Kernel support for a.out and ECOFF binaries" | 66 | tristate "Kernel support for a.out and ECOFF binaries" |
64 | depends on ARCH_SUPPORTS_AOUT && \ | 67 | depends on HAVE_AOUT |
65 | (X86_32 || ALPHA || ARM || M68K) | ||
66 | ---help--- | 68 | ---help--- |
67 | A.out (Assembler.OUTput) is a set of formats for libraries and | 69 | A.out (Assembler.OUTput) is a set of formats for libraries and |
68 | executables used in the earliest versions of UNIX. Linux used | 70 | executables used in the earliest versions of UNIX. Linux used |
diff --git a/fs/Makefile b/fs/Makefile index a1482a5eff15..b6f27dc26b72 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -7,7 +7,7 @@ | |||
7 | 7 | ||
8 | obj-y := open.o read_write.o file_table.o super.o \ | 8 | obj-y := open.o read_write.o file_table.o super.o \ |
9 | char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ | 9 | char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ |
10 | ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ | 10 | ioctl.o readdir.o select.o fifo.o dcache.o inode.o \ |
11 | attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ | 11 | attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ |
12 | seq_file.o xattr.o libfs.o fs-writeback.o \ | 12 | seq_file.o xattr.o libfs.o fs-writeback.o \ |
13 | pnode.o drop_caches.o splice.o sync.o utimes.o \ | 13 | pnode.o drop_caches.o splice.o sync.o utimes.o \ |
@@ -27,6 +27,7 @@ obj-$(CONFIG_ANON_INODES) += anon_inodes.o | |||
27 | obj-$(CONFIG_SIGNALFD) += signalfd.o | 27 | obj-$(CONFIG_SIGNALFD) += signalfd.o |
28 | obj-$(CONFIG_TIMERFD) += timerfd.o | 28 | obj-$(CONFIG_TIMERFD) += timerfd.o |
29 | obj-$(CONFIG_EVENTFD) += eventfd.o | 29 | obj-$(CONFIG_EVENTFD) += eventfd.o |
30 | obj-$(CONFIG_FILE_LOCKING) += locks.o | ||
30 | obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o | 31 | obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o |
31 | 32 | ||
32 | nfsd-$(CONFIG_NFSD) := nfsctl.o | 33 | nfsd-$(CONFIG_NFSD) := nfsctl.o |
@@ -69,7 +70,7 @@ obj-$(CONFIG_DLM) += dlm/ | |||
69 | # Do not add any filesystems before this line | 70 | # Do not add any filesystems before this line |
70 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ | 71 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ |
71 | obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 | 72 | obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 |
72 | obj-$(CONFIG_EXT4DEV_FS) += ext4/ # Before ext2 so root fs can be ext4dev | 73 | obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4dev |
73 | obj-$(CONFIG_JBD) += jbd/ | 74 | obj-$(CONFIG_JBD) += jbd/ |
74 | obj-$(CONFIG_JBD2) += jbd2/ | 75 | obj-$(CONFIG_JBD2) += jbd2/ |
75 | obj-$(CONFIG_EXT2_FS) += ext2/ | 76 | obj-$(CONFIG_EXT2_FS) += ext2/ |
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index fc1a8dc64d78..85a30e929800 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c | |||
@@ -197,6 +197,7 @@ out: | |||
197 | 197 | ||
198 | const struct file_operations adfs_dir_operations = { | 198 | const struct file_operations adfs_dir_operations = { |
199 | .read = generic_read_dir, | 199 | .read = generic_read_dir, |
200 | .llseek = generic_file_llseek, | ||
200 | .readdir = adfs_readdir, | 201 | .readdir = adfs_readdir, |
201 | .fsync = file_fsync, | 202 | .fsync = file_fsync, |
202 | }; | 203 | }; |
diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 26f3b43726bb..7f83a46f2b7e 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c | |||
@@ -157,7 +157,7 @@ static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt) | |||
157 | 157 | ||
158 | enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_err}; | 158 | enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_err}; |
159 | 159 | ||
160 | static match_table_t tokens = { | 160 | static const match_table_t tokens = { |
161 | {Opt_uid, "uid=%u"}, | 161 | {Opt_uid, "uid=%u"}, |
162 | {Opt_gid, "gid=%u"}, | 162 | {Opt_gid, "gid=%u"}, |
163 | {Opt_ownmask, "ownmask=%o"}, | 163 | {Opt_ownmask, "ownmask=%o"}, |
diff --git a/fs/affs/dir.c b/fs/affs/dir.c index 6e3f282424b0..7b36904dbeac 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c | |||
@@ -19,6 +19,7 @@ static int affs_readdir(struct file *, void *, filldir_t); | |||
19 | 19 | ||
20 | const struct file_operations affs_dir_operations = { | 20 | const struct file_operations affs_dir_operations = { |
21 | .read = generic_read_dir, | 21 | .read = generic_read_dir, |
22 | .llseek = generic_file_llseek, | ||
22 | .readdir = affs_readdir, | 23 | .readdir = affs_readdir, |
23 | .fsync = file_fsync, | 24 | .fsync = file_fsync, |
24 | }; | 25 | }; |
diff --git a/fs/affs/super.c b/fs/affs/super.c index 3a89094f93d0..8989c93193ed 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c | |||
@@ -135,7 +135,7 @@ enum { | |||
135 | Opt_verbose, Opt_volume, Opt_ignore, Opt_err, | 135 | Opt_verbose, Opt_volume, Opt_ignore, Opt_err, |
136 | }; | 136 | }; |
137 | 137 | ||
138 | static match_table_t tokens = { | 138 | static const match_table_t tokens = { |
139 | {Opt_bs, "bs=%u"}, | 139 | {Opt_bs, "bs=%u"}, |
140 | {Opt_mode, "mode=%o"}, | 140 | {Opt_mode, "mode=%o"}, |
141 | {Opt_mufs, "mufs"}, | 141 | {Opt_mufs, "mufs"}, |
diff --git a/fs/afs/super.c b/fs/afs/super.c index 250d8c4d66e4..aee239a048cb 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
@@ -64,7 +64,7 @@ enum { | |||
64 | afs_opt_vol, | 64 | afs_opt_vol, |
65 | }; | 65 | }; |
66 | 66 | ||
67 | static match_table_t afs_options_list = { | 67 | static const match_table_t afs_options_list = { |
68 | { afs_opt_cell, "cell=%s" }, | 68 | { afs_opt_cell, "cell=%s" }, |
69 | { afs_opt_rwpath, "rwpath" }, | 69 | { afs_opt_rwpath, "rwpath" }, |
70 | { afs_opt_vol, "vol=%s" }, | 70 | { afs_opt_vol, "vol=%s" }, |
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index dda510d31f84..b70eea1e8c59 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c | |||
@@ -59,7 +59,7 @@ static const struct super_operations autofs_sops = { | |||
59 | 59 | ||
60 | enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto}; | 60 | enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto}; |
61 | 61 | ||
62 | static match_table_t autofs_tokens = { | 62 | static const match_table_t autofs_tokens = { |
63 | {Opt_fd, "fd=%u"}, | 63 | {Opt_fd, "fd=%u"}, |
64 | {Opt_uid, "uid=%u"}, | 64 | {Opt_uid, "uid=%u"}, |
65 | {Opt_gid, "gid=%u"}, | 65 | {Opt_gid, "gid=%u"}, |
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 7bb3e5ba0537..45d55819203d 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c | |||
@@ -213,7 +213,7 @@ static const struct super_operations autofs4_sops = { | |||
213 | enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto, | 213 | enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto, |
214 | Opt_indirect, Opt_direct, Opt_offset}; | 214 | Opt_indirect, Opt_direct, Opt_offset}; |
215 | 215 | ||
216 | static match_table_t tokens = { | 216 | static const match_table_t tokens = { |
217 | {Opt_fd, "fd=%u"}, | 217 | {Opt_fd, "fd=%u"}, |
218 | {Opt_uid, "uid=%u"}, | 218 | {Opt_uid, "uid=%u"}, |
219 | {Opt_gid, "gid=%u"}, | 219 | {Opt_gid, "gid=%u"}, |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index bcfb2dc0a61b..2a41c2a7fc52 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -36,6 +36,7 @@ const struct file_operations autofs4_root_operations = { | |||
36 | .release = dcache_dir_close, | 36 | .release = dcache_dir_close, |
37 | .read = generic_read_dir, | 37 | .read = generic_read_dir, |
38 | .readdir = dcache_readdir, | 38 | .readdir = dcache_readdir, |
39 | .llseek = dcache_dir_lseek, | ||
39 | .ioctl = autofs4_root_ioctl, | 40 | .ioctl = autofs4_root_ioctl, |
40 | }; | 41 | }; |
41 | 42 | ||
@@ -44,6 +45,7 @@ const struct file_operations autofs4_dir_operations = { | |||
44 | .release = dcache_dir_close, | 45 | .release = dcache_dir_close, |
45 | .read = generic_read_dir, | 46 | .read = generic_read_dir, |
46 | .readdir = dcache_readdir, | 47 | .readdir = dcache_readdir, |
48 | .llseek = dcache_dir_lseek, | ||
47 | }; | 49 | }; |
48 | 50 | ||
49 | const struct inode_operations autofs4_indirect_root_inode_operations = { | 51 | const struct inode_operations autofs4_indirect_root_inode_operations = { |
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 02c6e62b72f8..9286b2af893a 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -66,6 +66,7 @@ static struct kmem_cache *befs_inode_cachep; | |||
66 | static const struct file_operations befs_dir_operations = { | 66 | static const struct file_operations befs_dir_operations = { |
67 | .read = generic_read_dir, | 67 | .read = generic_read_dir, |
68 | .readdir = befs_readdir, | 68 | .readdir = befs_readdir, |
69 | .llseek = generic_file_llseek, | ||
69 | }; | 70 | }; |
70 | 71 | ||
71 | static const struct inode_operations befs_dir_inode_operations = { | 72 | static const struct inode_operations befs_dir_inode_operations = { |
@@ -649,7 +650,7 @@ enum { | |||
649 | Opt_uid, Opt_gid, Opt_charset, Opt_debug, Opt_err, | 650 | Opt_uid, Opt_gid, Opt_charset, Opt_debug, Opt_err, |
650 | }; | 651 | }; |
651 | 652 | ||
652 | static match_table_t befs_tokens = { | 653 | static const match_table_t befs_tokens = { |
653 | {Opt_uid, "uid=%d"}, | 654 | {Opt_uid, "uid=%d"}, |
654 | {Opt_gid, "gid=%d"}, | 655 | {Opt_gid, "gid=%d"}, |
655 | {Opt_charset, "iocharset=%s"}, | 656 | {Opt_charset, "iocharset=%s"}, |
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 87ee5ccee348..ed8feb052df9 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c | |||
@@ -125,8 +125,8 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
125 | inode->i_ino); | 125 | inode->i_ino); |
126 | if (err) { | 126 | if (err) { |
127 | inode_dec_link_count(inode); | 127 | inode_dec_link_count(inode); |
128 | iput(inode); | ||
129 | mutex_unlock(&info->bfs_lock); | 128 | mutex_unlock(&info->bfs_lock); |
129 | iput(inode); | ||
130 | return err; | 130 | return err; |
131 | } | 131 | } |
132 | mutex_unlock(&info->bfs_lock); | 132 | mutex_unlock(&info->bfs_lock); |
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 56372ecf1690..dfc0197905ca 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
@@ -914,7 +914,9 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) | |||
914 | /* Stash our initial stack pointer into the mm structure */ | 914 | /* Stash our initial stack pointer into the mm structure */ |
915 | current->mm->start_stack = (unsigned long )sp; | 915 | current->mm->start_stack = (unsigned long )sp; |
916 | 916 | ||
917 | 917 | #ifdef FLAT_PLAT_INIT | |
918 | FLAT_PLAT_INIT(regs); | ||
919 | #endif | ||
918 | DBG_FLT("start_thread(regs=0x%x, entry=0x%x, start_stack=0x%x)\n", | 920 | DBG_FLT("start_thread(regs=0x%x, entry=0x%x, start_stack=0x%x)\n", |
919 | (int)regs, (int)start_addr, (int)current->mm->start_stack); | 921 | (int)regs, (int)start_addr, (int)current->mm->start_stack); |
920 | 922 | ||
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 756205314c24..8d7e88e02e0f 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c | |||
@@ -120,8 +120,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
120 | if (bprm->misc_bang) | 120 | if (bprm->misc_bang) |
121 | goto _ret; | 121 | goto _ret; |
122 | 122 | ||
123 | bprm->misc_bang = 1; | ||
124 | |||
125 | /* to keep locking time low, we copy the interpreter string */ | 123 | /* to keep locking time low, we copy the interpreter string */ |
126 | read_lock(&entries_lock); | 124 | read_lock(&entries_lock); |
127 | fmt = check_file(bprm); | 125 | fmt = check_file(bprm); |
@@ -199,6 +197,8 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
199 | if (retval < 0) | 197 | if (retval < 0) |
200 | goto _error; | 198 | goto _error; |
201 | 199 | ||
200 | bprm->misc_bang = 1; | ||
201 | |||
202 | retval = search_binary_handler (bprm, regs); | 202 | retval = search_binary_handler (bprm, regs); |
203 | if (retval < 0) | 203 | if (retval < 0) |
204 | goto _error; | 204 | goto _error; |
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index c3e174b35fe6..19caf7c962ac 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c | |||
@@ -107,7 +107,8 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs) | |||
107 | BUG_ON(bip == NULL); | 107 | BUG_ON(bip == NULL); |
108 | 108 | ||
109 | /* A cloned bio doesn't own the integrity metadata */ | 109 | /* A cloned bio doesn't own the integrity metadata */ |
110 | if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL) | 110 | if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY) |
111 | && bip->bip_buf != NULL) | ||
111 | kfree(bip->bip_buf); | 112 | kfree(bip->bip_buf); |
112 | 113 | ||
113 | mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); | 114 | mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); |
@@ -150,6 +151,24 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, | |||
150 | } | 151 | } |
151 | EXPORT_SYMBOL(bio_integrity_add_page); | 152 | EXPORT_SYMBOL(bio_integrity_add_page); |
152 | 153 | ||
154 | static int bdev_integrity_enabled(struct block_device *bdev, int rw) | ||
155 | { | ||
156 | struct blk_integrity *bi = bdev_get_integrity(bdev); | ||
157 | |||
158 | if (bi == NULL) | ||
159 | return 0; | ||
160 | |||
161 | if (rw == READ && bi->verify_fn != NULL && | ||
162 | (bi->flags & INTEGRITY_FLAG_READ)) | ||
163 | return 1; | ||
164 | |||
165 | if (rw == WRITE && bi->generate_fn != NULL && | ||
166 | (bi->flags & INTEGRITY_FLAG_WRITE)) | ||
167 | return 1; | ||
168 | |||
169 | return 0; | ||
170 | } | ||
171 | |||
153 | /** | 172 | /** |
154 | * bio_integrity_enabled - Check whether integrity can be passed | 173 | * bio_integrity_enabled - Check whether integrity can be passed |
155 | * @bio: bio to check | 174 | * @bio: bio to check |
@@ -313,6 +332,14 @@ static void bio_integrity_generate(struct bio *bio) | |||
313 | } | 332 | } |
314 | } | 333 | } |
315 | 334 | ||
335 | static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) | ||
336 | { | ||
337 | if (bi) | ||
338 | return bi->tuple_size; | ||
339 | |||
340 | return 0; | ||
341 | } | ||
342 | |||
316 | /** | 343 | /** |
317 | * bio_integrity_prep - Prepare bio for integrity I/O | 344 | * bio_integrity_prep - Prepare bio for integrity I/O |
318 | * @bio: bio to prepare | 345 | * @bio: bio to prepare |
@@ -30,7 +30,7 @@ | |||
30 | 30 | ||
31 | static struct kmem_cache *bio_slab __read_mostly; | 31 | static struct kmem_cache *bio_slab __read_mostly; |
32 | 32 | ||
33 | mempool_t *bio_split_pool __read_mostly; | 33 | static mempool_t *bio_split_pool __read_mostly; |
34 | 34 | ||
35 | /* | 35 | /* |
36 | * if you change this list, also change bvec_alloc or things will | 36 | * if you change this list, also change bvec_alloc or things will |
@@ -60,25 +60,46 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct | |||
60 | struct bio_vec *bvl; | 60 | struct bio_vec *bvl; |
61 | 61 | ||
62 | /* | 62 | /* |
63 | * see comment near bvec_array define! | 63 | * If 'bs' is given, lookup the pool and do the mempool alloc. |
64 | * If not, this is a bio_kmalloc() allocation and just do a | ||
65 | * kzalloc() for the exact number of vecs right away. | ||
64 | */ | 66 | */ |
65 | switch (nr) { | 67 | if (bs) { |
66 | case 1 : *idx = 0; break; | 68 | /* |
67 | case 2 ... 4: *idx = 1; break; | 69 | * see comment near bvec_array define! |
68 | case 5 ... 16: *idx = 2; break; | 70 | */ |
69 | case 17 ... 64: *idx = 3; break; | 71 | switch (nr) { |
70 | case 65 ... 128: *idx = 4; break; | 72 | case 1: |
71 | case 129 ... BIO_MAX_PAGES: *idx = 5; break; | 73 | *idx = 0; |
74 | break; | ||
75 | case 2 ... 4: | ||
76 | *idx = 1; | ||
77 | break; | ||
78 | case 5 ... 16: | ||
79 | *idx = 2; | ||
80 | break; | ||
81 | case 17 ... 64: | ||
82 | *idx = 3; | ||
83 | break; | ||
84 | case 65 ... 128: | ||
85 | *idx = 4; | ||
86 | break; | ||
87 | case 129 ... BIO_MAX_PAGES: | ||
88 | *idx = 5; | ||
89 | break; | ||
72 | default: | 90 | default: |
73 | return NULL; | 91 | return NULL; |
74 | } | 92 | } |
75 | /* | ||
76 | * idx now points to the pool we want to allocate from | ||
77 | */ | ||
78 | 93 | ||
79 | bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); | 94 | /* |
80 | if (bvl) | 95 | * idx now points to the pool we want to allocate from |
81 | memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); | 96 | */ |
97 | bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); | ||
98 | if (bvl) | ||
99 | memset(bvl, 0, | ||
100 | bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); | ||
101 | } else | ||
102 | bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask); | ||
82 | 103 | ||
83 | return bvl; | 104 | return bvl; |
84 | } | 105 | } |
@@ -107,10 +128,17 @@ static void bio_fs_destructor(struct bio *bio) | |||
107 | bio_free(bio, fs_bio_set); | 128 | bio_free(bio, fs_bio_set); |
108 | } | 129 | } |
109 | 130 | ||
131 | static void bio_kmalloc_destructor(struct bio *bio) | ||
132 | { | ||
133 | kfree(bio->bi_io_vec); | ||
134 | kfree(bio); | ||
135 | } | ||
136 | |||
110 | void bio_init(struct bio *bio) | 137 | void bio_init(struct bio *bio) |
111 | { | 138 | { |
112 | memset(bio, 0, sizeof(*bio)); | 139 | memset(bio, 0, sizeof(*bio)); |
113 | bio->bi_flags = 1 << BIO_UPTODATE; | 140 | bio->bi_flags = 1 << BIO_UPTODATE; |
141 | bio->bi_comp_cpu = -1; | ||
114 | atomic_set(&bio->bi_cnt, 1); | 142 | atomic_set(&bio->bi_cnt, 1); |
115 | } | 143 | } |
116 | 144 | ||
@@ -118,19 +146,25 @@ void bio_init(struct bio *bio) | |||
118 | * bio_alloc_bioset - allocate a bio for I/O | 146 | * bio_alloc_bioset - allocate a bio for I/O |
119 | * @gfp_mask: the GFP_ mask given to the slab allocator | 147 | * @gfp_mask: the GFP_ mask given to the slab allocator |
120 | * @nr_iovecs: number of iovecs to pre-allocate | 148 | * @nr_iovecs: number of iovecs to pre-allocate |
121 | * @bs: the bio_set to allocate from | 149 | * @bs: the bio_set to allocate from. If %NULL, just use kmalloc |
122 | * | 150 | * |
123 | * Description: | 151 | * Description: |
124 | * bio_alloc_bioset will first try it's on mempool to satisfy the allocation. | 152 | * bio_alloc_bioset will first try its own mempool to satisfy the allocation. |
125 | * If %__GFP_WAIT is set then we will block on the internal pool waiting | 153 | * If %__GFP_WAIT is set then we will block on the internal pool waiting |
126 | * for a &struct bio to become free. | 154 | * for a &struct bio to become free. If a %NULL @bs is passed in, we will |
155 | * fall back to just using @kmalloc to allocate the required memory. | ||
127 | * | 156 | * |
128 | * allocate bio and iovecs from the memory pools specified by the | 157 | * allocate bio and iovecs from the memory pools specified by the |
129 | * bio_set structure. | 158 | * bio_set structure, or @kmalloc if none given. |
130 | **/ | 159 | **/ |
131 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | 160 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) |
132 | { | 161 | { |
133 | struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask); | 162 | struct bio *bio; |
163 | |||
164 | if (bs) | ||
165 | bio = mempool_alloc(bs->bio_pool, gfp_mask); | ||
166 | else | ||
167 | bio = kmalloc(sizeof(*bio), gfp_mask); | ||
134 | 168 | ||
135 | if (likely(bio)) { | 169 | if (likely(bio)) { |
136 | struct bio_vec *bvl = NULL; | 170 | struct bio_vec *bvl = NULL; |
@@ -141,7 +175,10 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | |||
141 | 175 | ||
142 | bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); | 176 | bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); |
143 | if (unlikely(!bvl)) { | 177 | if (unlikely(!bvl)) { |
144 | mempool_free(bio, bs->bio_pool); | 178 | if (bs) |
179 | mempool_free(bio, bs->bio_pool); | ||
180 | else | ||
181 | kfree(bio); | ||
145 | bio = NULL; | 182 | bio = NULL; |
146 | goto out; | 183 | goto out; |
147 | } | 184 | } |
@@ -164,6 +201,23 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) | |||
164 | return bio; | 201 | return bio; |
165 | } | 202 | } |
166 | 203 | ||
204 | /* | ||
205 | * Like bio_alloc(), but doesn't use a mempool backing. This means that | ||
206 | * it CAN fail, but while bio_alloc() can only be used for allocations | ||
207 | * that have a short (finite) life span, bio_kmalloc() should be used | ||
208 | * for more permanent bio allocations (like allocating some bio's for | ||
209 | * initalization or setup purposes). | ||
210 | */ | ||
211 | struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) | ||
212 | { | ||
213 | struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); | ||
214 | |||
215 | if (bio) | ||
216 | bio->bi_destructor = bio_kmalloc_destructor; | ||
217 | |||
218 | return bio; | ||
219 | } | ||
220 | |||
167 | void zero_fill_bio(struct bio *bio) | 221 | void zero_fill_bio(struct bio *bio) |
168 | { | 222 | { |
169 | unsigned long flags; | 223 | unsigned long flags; |
@@ -208,14 +262,6 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio) | |||
208 | return bio->bi_phys_segments; | 262 | return bio->bi_phys_segments; |
209 | } | 263 | } |
210 | 264 | ||
211 | inline int bio_hw_segments(struct request_queue *q, struct bio *bio) | ||
212 | { | ||
213 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) | ||
214 | blk_recount_segments(q, bio); | ||
215 | |||
216 | return bio->bi_hw_segments; | ||
217 | } | ||
218 | |||
219 | /** | 265 | /** |
220 | * __bio_clone - clone a bio | 266 | * __bio_clone - clone a bio |
221 | * @bio: destination bio | 267 | * @bio: destination bio |
@@ -350,8 +396,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
350 | */ | 396 | */ |
351 | 397 | ||
352 | while (bio->bi_phys_segments >= q->max_phys_segments | 398 | while (bio->bi_phys_segments >= q->max_phys_segments |
353 | || bio->bi_hw_segments >= q->max_hw_segments | 399 | || bio->bi_phys_segments >= q->max_hw_segments) { |
354 | || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) { | ||
355 | 400 | ||
356 | if (retried_segments) | 401 | if (retried_segments) |
357 | return 0; | 402 | return 0; |
@@ -395,13 +440,11 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
395 | } | 440 | } |
396 | 441 | ||
397 | /* If we may be able to merge these biovecs, force a recount */ | 442 | /* If we may be able to merge these biovecs, force a recount */ |
398 | if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) || | 443 | if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) |
399 | BIOVEC_VIRT_MERGEABLE(bvec-1, bvec))) | ||
400 | bio->bi_flags &= ~(1 << BIO_SEG_VALID); | 444 | bio->bi_flags &= ~(1 << BIO_SEG_VALID); |
401 | 445 | ||
402 | bio->bi_vcnt++; | 446 | bio->bi_vcnt++; |
403 | bio->bi_phys_segments++; | 447 | bio->bi_phys_segments++; |
404 | bio->bi_hw_segments++; | ||
405 | done: | 448 | done: |
406 | bio->bi_size += len; | 449 | bio->bi_size += len; |
407 | return len; | 450 | return len; |
@@ -449,16 +492,19 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, | |||
449 | 492 | ||
450 | struct bio_map_data { | 493 | struct bio_map_data { |
451 | struct bio_vec *iovecs; | 494 | struct bio_vec *iovecs; |
452 | int nr_sgvecs; | ||
453 | struct sg_iovec *sgvecs; | 495 | struct sg_iovec *sgvecs; |
496 | int nr_sgvecs; | ||
497 | int is_our_pages; | ||
454 | }; | 498 | }; |
455 | 499 | ||
456 | static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, | 500 | static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, |
457 | struct sg_iovec *iov, int iov_count) | 501 | struct sg_iovec *iov, int iov_count, |
502 | int is_our_pages) | ||
458 | { | 503 | { |
459 | memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); | 504 | memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); |
460 | memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); | 505 | memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); |
461 | bmd->nr_sgvecs = iov_count; | 506 | bmd->nr_sgvecs = iov_count; |
507 | bmd->is_our_pages = is_our_pages; | ||
462 | bio->bi_private = bmd; | 508 | bio->bi_private = bmd; |
463 | } | 509 | } |
464 | 510 | ||
@@ -469,20 +515,21 @@ static void bio_free_map_data(struct bio_map_data *bmd) | |||
469 | kfree(bmd); | 515 | kfree(bmd); |
470 | } | 516 | } |
471 | 517 | ||
472 | static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count) | 518 | static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, |
519 | gfp_t gfp_mask) | ||
473 | { | 520 | { |
474 | struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL); | 521 | struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask); |
475 | 522 | ||
476 | if (!bmd) | 523 | if (!bmd) |
477 | return NULL; | 524 | return NULL; |
478 | 525 | ||
479 | bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL); | 526 | bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask); |
480 | if (!bmd->iovecs) { | 527 | if (!bmd->iovecs) { |
481 | kfree(bmd); | 528 | kfree(bmd); |
482 | return NULL; | 529 | return NULL; |
483 | } | 530 | } |
484 | 531 | ||
485 | bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, GFP_KERNEL); | 532 | bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask); |
486 | if (bmd->sgvecs) | 533 | if (bmd->sgvecs) |
487 | return bmd; | 534 | return bmd; |
488 | 535 | ||
@@ -491,8 +538,9 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count) | |||
491 | return NULL; | 538 | return NULL; |
492 | } | 539 | } |
493 | 540 | ||
494 | static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, | 541 | static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, |
495 | int uncopy) | 542 | struct sg_iovec *iov, int iov_count, int uncopy, |
543 | int do_free_page) | ||
496 | { | 544 | { |
497 | int ret = 0, i; | 545 | int ret = 0, i; |
498 | struct bio_vec *bvec; | 546 | struct bio_vec *bvec; |
@@ -502,7 +550,7 @@ static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, | |||
502 | 550 | ||
503 | __bio_for_each_segment(bvec, bio, i, 0) { | 551 | __bio_for_each_segment(bvec, bio, i, 0) { |
504 | char *bv_addr = page_address(bvec->bv_page); | 552 | char *bv_addr = page_address(bvec->bv_page); |
505 | unsigned int bv_len = bvec->bv_len; | 553 | unsigned int bv_len = iovecs[i].bv_len; |
506 | 554 | ||
507 | while (bv_len && iov_idx < iov_count) { | 555 | while (bv_len && iov_idx < iov_count) { |
508 | unsigned int bytes; | 556 | unsigned int bytes; |
@@ -535,7 +583,7 @@ static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, | |||
535 | } | 583 | } |
536 | } | 584 | } |
537 | 585 | ||
538 | if (uncopy) | 586 | if (do_free_page) |
539 | __free_page(bvec->bv_page); | 587 | __free_page(bvec->bv_page); |
540 | } | 588 | } |
541 | 589 | ||
@@ -552,10 +600,11 @@ static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, | |||
552 | int bio_uncopy_user(struct bio *bio) | 600 | int bio_uncopy_user(struct bio *bio) |
553 | { | 601 | { |
554 | struct bio_map_data *bmd = bio->bi_private; | 602 | struct bio_map_data *bmd = bio->bi_private; |
555 | int ret; | 603 | int ret = 0; |
556 | |||
557 | ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1); | ||
558 | 604 | ||
605 | if (!bio_flagged(bio, BIO_NULL_MAPPED)) | ||
606 | ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, | ||
607 | bmd->nr_sgvecs, 1, bmd->is_our_pages); | ||
559 | bio_free_map_data(bmd); | 608 | bio_free_map_data(bmd); |
560 | bio_put(bio); | 609 | bio_put(bio); |
561 | return ret; | 610 | return ret; |
@@ -564,16 +613,20 @@ int bio_uncopy_user(struct bio *bio) | |||
564 | /** | 613 | /** |
565 | * bio_copy_user_iov - copy user data to bio | 614 | * bio_copy_user_iov - copy user data to bio |
566 | * @q: destination block queue | 615 | * @q: destination block queue |
616 | * @map_data: pointer to the rq_map_data holding pages (if necessary) | ||
567 | * @iov: the iovec. | 617 | * @iov: the iovec. |
568 | * @iov_count: number of elements in the iovec | 618 | * @iov_count: number of elements in the iovec |
569 | * @write_to_vm: bool indicating writing to pages or not | 619 | * @write_to_vm: bool indicating writing to pages or not |
620 | * @gfp_mask: memory allocation flags | ||
570 | * | 621 | * |
571 | * Prepares and returns a bio for indirect user io, bouncing data | 622 | * Prepares and returns a bio for indirect user io, bouncing data |
572 | * to/from kernel pages as necessary. Must be paired with | 623 | * to/from kernel pages as necessary. Must be paired with |
573 | * call bio_uncopy_user() on io completion. | 624 | * call bio_uncopy_user() on io completion. |
574 | */ | 625 | */ |
575 | struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | 626 | struct bio *bio_copy_user_iov(struct request_queue *q, |
576 | int iov_count, int write_to_vm) | 627 | struct rq_map_data *map_data, |
628 | struct sg_iovec *iov, int iov_count, | ||
629 | int write_to_vm, gfp_t gfp_mask) | ||
577 | { | 630 | { |
578 | struct bio_map_data *bmd; | 631 | struct bio_map_data *bmd; |
579 | struct bio_vec *bvec; | 632 | struct bio_vec *bvec; |
@@ -596,25 +649,38 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | |||
596 | len += iov[i].iov_len; | 649 | len += iov[i].iov_len; |
597 | } | 650 | } |
598 | 651 | ||
599 | bmd = bio_alloc_map_data(nr_pages, iov_count); | 652 | bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); |
600 | if (!bmd) | 653 | if (!bmd) |
601 | return ERR_PTR(-ENOMEM); | 654 | return ERR_PTR(-ENOMEM); |
602 | 655 | ||
603 | ret = -ENOMEM; | 656 | ret = -ENOMEM; |
604 | bio = bio_alloc(GFP_KERNEL, nr_pages); | 657 | bio = bio_alloc(gfp_mask, nr_pages); |
605 | if (!bio) | 658 | if (!bio) |
606 | goto out_bmd; | 659 | goto out_bmd; |
607 | 660 | ||
608 | bio->bi_rw |= (!write_to_vm << BIO_RW); | 661 | bio->bi_rw |= (!write_to_vm << BIO_RW); |
609 | 662 | ||
610 | ret = 0; | 663 | ret = 0; |
664 | i = 0; | ||
611 | while (len) { | 665 | while (len) { |
612 | unsigned int bytes = PAGE_SIZE; | 666 | unsigned int bytes; |
667 | |||
668 | if (map_data) | ||
669 | bytes = 1U << (PAGE_SHIFT + map_data->page_order); | ||
670 | else | ||
671 | bytes = PAGE_SIZE; | ||
613 | 672 | ||
614 | if (bytes > len) | 673 | if (bytes > len) |
615 | bytes = len; | 674 | bytes = len; |
616 | 675 | ||
617 | page = alloc_page(q->bounce_gfp | GFP_KERNEL); | 676 | if (map_data) { |
677 | if (i == map_data->nr_entries) { | ||
678 | ret = -ENOMEM; | ||
679 | break; | ||
680 | } | ||
681 | page = map_data->pages[i++]; | ||
682 | } else | ||
683 | page = alloc_page(q->bounce_gfp | gfp_mask); | ||
618 | if (!page) { | 684 | if (!page) { |
619 | ret = -ENOMEM; | 685 | ret = -ENOMEM; |
620 | break; | 686 | break; |
@@ -633,16 +699,17 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | |||
633 | * success | 699 | * success |
634 | */ | 700 | */ |
635 | if (!write_to_vm) { | 701 | if (!write_to_vm) { |
636 | ret = __bio_copy_iov(bio, iov, iov_count, 0); | 702 | ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0); |
637 | if (ret) | 703 | if (ret) |
638 | goto cleanup; | 704 | goto cleanup; |
639 | } | 705 | } |
640 | 706 | ||
641 | bio_set_map_data(bmd, bio, iov, iov_count); | 707 | bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1); |
642 | return bio; | 708 | return bio; |
643 | cleanup: | 709 | cleanup: |
644 | bio_for_each_segment(bvec, bio, i) | 710 | if (!map_data) |
645 | __free_page(bvec->bv_page); | 711 | bio_for_each_segment(bvec, bio, i) |
712 | __free_page(bvec->bv_page); | ||
646 | 713 | ||
647 | bio_put(bio); | 714 | bio_put(bio); |
648 | out_bmd: | 715 | out_bmd: |
@@ -653,29 +720,32 @@ out_bmd: | |||
653 | /** | 720 | /** |
654 | * bio_copy_user - copy user data to bio | 721 | * bio_copy_user - copy user data to bio |
655 | * @q: destination block queue | 722 | * @q: destination block queue |
723 | * @map_data: pointer to the rq_map_data holding pages (if necessary) | ||
656 | * @uaddr: start of user address | 724 | * @uaddr: start of user address |
657 | * @len: length in bytes | 725 | * @len: length in bytes |
658 | * @write_to_vm: bool indicating writing to pages or not | 726 | * @write_to_vm: bool indicating writing to pages or not |
727 | * @gfp_mask: memory allocation flags | ||
659 | * | 728 | * |
660 | * Prepares and returns a bio for indirect user io, bouncing data | 729 | * Prepares and returns a bio for indirect user io, bouncing data |
661 | * to/from kernel pages as necessary. Must be paired with | 730 | * to/from kernel pages as necessary. Must be paired with |
662 | * call bio_uncopy_user() on io completion. | 731 | * call bio_uncopy_user() on io completion. |
663 | */ | 732 | */ |
664 | struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, | 733 | struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data, |
665 | unsigned int len, int write_to_vm) | 734 | unsigned long uaddr, unsigned int len, |
735 | int write_to_vm, gfp_t gfp_mask) | ||
666 | { | 736 | { |
667 | struct sg_iovec iov; | 737 | struct sg_iovec iov; |
668 | 738 | ||
669 | iov.iov_base = (void __user *)uaddr; | 739 | iov.iov_base = (void __user *)uaddr; |
670 | iov.iov_len = len; | 740 | iov.iov_len = len; |
671 | 741 | ||
672 | return bio_copy_user_iov(q, &iov, 1, write_to_vm); | 742 | return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask); |
673 | } | 743 | } |
674 | 744 | ||
675 | static struct bio *__bio_map_user_iov(struct request_queue *q, | 745 | static struct bio *__bio_map_user_iov(struct request_queue *q, |
676 | struct block_device *bdev, | 746 | struct block_device *bdev, |
677 | struct sg_iovec *iov, int iov_count, | 747 | struct sg_iovec *iov, int iov_count, |
678 | int write_to_vm) | 748 | int write_to_vm, gfp_t gfp_mask) |
679 | { | 749 | { |
680 | int i, j; | 750 | int i, j; |
681 | int nr_pages = 0; | 751 | int nr_pages = 0; |
@@ -701,12 +771,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, | |||
701 | if (!nr_pages) | 771 | if (!nr_pages) |
702 | return ERR_PTR(-EINVAL); | 772 | return ERR_PTR(-EINVAL); |
703 | 773 | ||
704 | bio = bio_alloc(GFP_KERNEL, nr_pages); | 774 | bio = bio_alloc(gfp_mask, nr_pages); |
705 | if (!bio) | 775 | if (!bio) |
706 | return ERR_PTR(-ENOMEM); | 776 | return ERR_PTR(-ENOMEM); |
707 | 777 | ||
708 | ret = -ENOMEM; | 778 | ret = -ENOMEM; |
709 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); | 779 | pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); |
710 | if (!pages) | 780 | if (!pages) |
711 | goto out; | 781 | goto out; |
712 | 782 | ||
@@ -785,19 +855,21 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, | |||
785 | * @uaddr: start of user address | 855 | * @uaddr: start of user address |
786 | * @len: length in bytes | 856 | * @len: length in bytes |
787 | * @write_to_vm: bool indicating writing to pages or not | 857 | * @write_to_vm: bool indicating writing to pages or not |
858 | * @gfp_mask: memory allocation flags | ||
788 | * | 859 | * |
789 | * Map the user space address into a bio suitable for io to a block | 860 | * Map the user space address into a bio suitable for io to a block |
790 | * device. Returns an error pointer in case of error. | 861 | * device. Returns an error pointer in case of error. |
791 | */ | 862 | */ |
792 | struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, | 863 | struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, |
793 | unsigned long uaddr, unsigned int len, int write_to_vm) | 864 | unsigned long uaddr, unsigned int len, int write_to_vm, |
865 | gfp_t gfp_mask) | ||
794 | { | 866 | { |
795 | struct sg_iovec iov; | 867 | struct sg_iovec iov; |
796 | 868 | ||
797 | iov.iov_base = (void __user *)uaddr; | 869 | iov.iov_base = (void __user *)uaddr; |
798 | iov.iov_len = len; | 870 | iov.iov_len = len; |
799 | 871 | ||
800 | return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); | 872 | return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask); |
801 | } | 873 | } |
802 | 874 | ||
803 | /** | 875 | /** |
@@ -807,18 +879,19 @@ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, | |||
807 | * @iov: the iovec. | 879 | * @iov: the iovec. |
808 | * @iov_count: number of elements in the iovec | 880 | * @iov_count: number of elements in the iovec |
809 | * @write_to_vm: bool indicating writing to pages or not | 881 | * @write_to_vm: bool indicating writing to pages or not |
882 | * @gfp_mask: memory allocation flags | ||
810 | * | 883 | * |
811 | * Map the user space address into a bio suitable for io to a block | 884 | * Map the user space address into a bio suitable for io to a block |
812 | * device. Returns an error pointer in case of error. | 885 | * device. Returns an error pointer in case of error. |
813 | */ | 886 | */ |
814 | struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, | 887 | struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, |
815 | struct sg_iovec *iov, int iov_count, | 888 | struct sg_iovec *iov, int iov_count, |
816 | int write_to_vm) | 889 | int write_to_vm, gfp_t gfp_mask) |
817 | { | 890 | { |
818 | struct bio *bio; | 891 | struct bio *bio; |
819 | 892 | ||
820 | bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); | 893 | bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm, |
821 | 894 | gfp_mask); | |
822 | if (IS_ERR(bio)) | 895 | if (IS_ERR(bio)) |
823 | return bio; | 896 | return bio; |
824 | 897 | ||
@@ -942,19 +1015,22 @@ static void bio_copy_kern_endio(struct bio *bio, int err) | |||
942 | { | 1015 | { |
943 | struct bio_vec *bvec; | 1016 | struct bio_vec *bvec; |
944 | const int read = bio_data_dir(bio) == READ; | 1017 | const int read = bio_data_dir(bio) == READ; |
945 | char *p = bio->bi_private; | 1018 | struct bio_map_data *bmd = bio->bi_private; |
946 | int i; | 1019 | int i; |
1020 | char *p = bmd->sgvecs[0].iov_base; | ||
947 | 1021 | ||
948 | __bio_for_each_segment(bvec, bio, i, 0) { | 1022 | __bio_for_each_segment(bvec, bio, i, 0) { |
949 | char *addr = page_address(bvec->bv_page); | 1023 | char *addr = page_address(bvec->bv_page); |
1024 | int len = bmd->iovecs[i].bv_len; | ||
950 | 1025 | ||
951 | if (read && !err) | 1026 | if (read && !err) |
952 | memcpy(p, addr, bvec->bv_len); | 1027 | memcpy(p, addr, len); |
953 | 1028 | ||
954 | __free_page(bvec->bv_page); | 1029 | __free_page(bvec->bv_page); |
955 | p += bvec->bv_len; | 1030 | p += len; |
956 | } | 1031 | } |
957 | 1032 | ||
1033 | bio_free_map_data(bmd); | ||
958 | bio_put(bio); | 1034 | bio_put(bio); |
959 | } | 1035 | } |
960 | 1036 | ||
@@ -972,38 +1048,13 @@ static void bio_copy_kern_endio(struct bio *bio, int err) | |||
972 | struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, | 1048 | struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, |
973 | gfp_t gfp_mask, int reading) | 1049 | gfp_t gfp_mask, int reading) |
974 | { | 1050 | { |
975 | unsigned long kaddr = (unsigned long)data; | ||
976 | unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
977 | unsigned long start = kaddr >> PAGE_SHIFT; | ||
978 | const int nr_pages = end - start; | ||
979 | struct bio *bio; | 1051 | struct bio *bio; |
980 | struct bio_vec *bvec; | 1052 | struct bio_vec *bvec; |
981 | int i, ret; | 1053 | int i; |
982 | |||
983 | bio = bio_alloc(gfp_mask, nr_pages); | ||
984 | if (!bio) | ||
985 | return ERR_PTR(-ENOMEM); | ||
986 | |||
987 | while (len) { | ||
988 | struct page *page; | ||
989 | unsigned int bytes = PAGE_SIZE; | ||
990 | |||
991 | if (bytes > len) | ||
992 | bytes = len; | ||
993 | |||
994 | page = alloc_page(q->bounce_gfp | gfp_mask); | ||
995 | if (!page) { | ||
996 | ret = -ENOMEM; | ||
997 | goto cleanup; | ||
998 | } | ||
999 | |||
1000 | if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) { | ||
1001 | ret = -EINVAL; | ||
1002 | goto cleanup; | ||
1003 | } | ||
1004 | 1054 | ||
1005 | len -= bytes; | 1055 | bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask); |
1006 | } | 1056 | if (IS_ERR(bio)) |
1057 | return bio; | ||
1007 | 1058 | ||
1008 | if (!reading) { | 1059 | if (!reading) { |
1009 | void *p = data; | 1060 | void *p = data; |
@@ -1016,16 +1067,9 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, | |||
1016 | } | 1067 | } |
1017 | } | 1068 | } |
1018 | 1069 | ||
1019 | bio->bi_private = data; | ||
1020 | bio->bi_end_io = bio_copy_kern_endio; | 1070 | bio->bi_end_io = bio_copy_kern_endio; |
1021 | return bio; | ||
1022 | cleanup: | ||
1023 | bio_for_each_segment(bvec, bio, i) | ||
1024 | __free_page(bvec->bv_page); | ||
1025 | |||
1026 | bio_put(bio); | ||
1027 | 1071 | ||
1028 | return ERR_PTR(ret); | 1072 | return bio; |
1029 | } | 1073 | } |
1030 | 1074 | ||
1031 | /* | 1075 | /* |
@@ -1212,9 +1256,9 @@ static void bio_pair_end_2(struct bio *bi, int err) | |||
1212 | * split a bio - only worry about a bio with a single page | 1256 | * split a bio - only worry about a bio with a single page |
1213 | * in it's iovec | 1257 | * in it's iovec |
1214 | */ | 1258 | */ |
1215 | struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) | 1259 | struct bio_pair *bio_split(struct bio *bi, int first_sectors) |
1216 | { | 1260 | { |
1217 | struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO); | 1261 | struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO); |
1218 | 1262 | ||
1219 | if (!bp) | 1263 | if (!bp) |
1220 | return bp; | 1264 | return bp; |
@@ -1248,7 +1292,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) | |||
1248 | bp->bio2.bi_end_io = bio_pair_end_2; | 1292 | bp->bio2.bi_end_io = bio_pair_end_2; |
1249 | 1293 | ||
1250 | bp->bio1.bi_private = bi; | 1294 | bp->bio1.bi_private = bi; |
1251 | bp->bio2.bi_private = pool; | 1295 | bp->bio2.bi_private = bio_split_pool; |
1252 | 1296 | ||
1253 | if (bio_integrity(bi)) | 1297 | if (bio_integrity(bi)) |
1254 | bio_integrity_split(bi, bp, first_sectors); | 1298 | bio_integrity_split(bi, bp, first_sectors); |
@@ -1256,6 +1300,42 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) | |||
1256 | return bp; | 1300 | return bp; |
1257 | } | 1301 | } |
1258 | 1302 | ||
1303 | /** | ||
1304 | * bio_sector_offset - Find hardware sector offset in bio | ||
1305 | * @bio: bio to inspect | ||
1306 | * @index: bio_vec index | ||
1307 | * @offset: offset in bv_page | ||
1308 | * | ||
1309 | * Return the number of hardware sectors between beginning of bio | ||
1310 | * and an end point indicated by a bio_vec index and an offset | ||
1311 | * within that vector's page. | ||
1312 | */ | ||
1313 | sector_t bio_sector_offset(struct bio *bio, unsigned short index, | ||
1314 | unsigned int offset) | ||
1315 | { | ||
1316 | unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue); | ||
1317 | struct bio_vec *bv; | ||
1318 | sector_t sectors; | ||
1319 | int i; | ||
1320 | |||
1321 | sectors = 0; | ||
1322 | |||
1323 | if (index >= bio->bi_idx) | ||
1324 | index = bio->bi_vcnt - 1; | ||
1325 | |||
1326 | __bio_for_each_segment(bv, bio, i, 0) { | ||
1327 | if (i == index) { | ||
1328 | if (offset > bv->bv_offset) | ||
1329 | sectors += (offset - bv->bv_offset) / sector_sz; | ||
1330 | break; | ||
1331 | } | ||
1332 | |||
1333 | sectors += bv->bv_len / sector_sz; | ||
1334 | } | ||
1335 | |||
1336 | return sectors; | ||
1337 | } | ||
1338 | EXPORT_SYMBOL(bio_sector_offset); | ||
1259 | 1339 | ||
1260 | /* | 1340 | /* |
1261 | * create memory pools for biovec's in a bio_set. | 1341 | * create memory pools for biovec's in a bio_set. |
@@ -1358,6 +1438,7 @@ static int __init init_bio(void) | |||
1358 | subsys_initcall(init_bio); | 1438 | subsys_initcall(init_bio); |
1359 | 1439 | ||
1360 | EXPORT_SYMBOL(bio_alloc); | 1440 | EXPORT_SYMBOL(bio_alloc); |
1441 | EXPORT_SYMBOL(bio_kmalloc); | ||
1361 | EXPORT_SYMBOL(bio_put); | 1442 | EXPORT_SYMBOL(bio_put); |
1362 | EXPORT_SYMBOL(bio_free); | 1443 | EXPORT_SYMBOL(bio_free); |
1363 | EXPORT_SYMBOL(bio_endio); | 1444 | EXPORT_SYMBOL(bio_endio); |
@@ -1365,7 +1446,6 @@ EXPORT_SYMBOL(bio_init); | |||
1365 | EXPORT_SYMBOL(__bio_clone); | 1446 | EXPORT_SYMBOL(__bio_clone); |
1366 | EXPORT_SYMBOL(bio_clone); | 1447 | EXPORT_SYMBOL(bio_clone); |
1367 | EXPORT_SYMBOL(bio_phys_segments); | 1448 | EXPORT_SYMBOL(bio_phys_segments); |
1368 | EXPORT_SYMBOL(bio_hw_segments); | ||
1369 | EXPORT_SYMBOL(bio_add_page); | 1449 | EXPORT_SYMBOL(bio_add_page); |
1370 | EXPORT_SYMBOL(bio_add_pc_page); | 1450 | EXPORT_SYMBOL(bio_add_pc_page); |
1371 | EXPORT_SYMBOL(bio_get_nr_vecs); | 1451 | EXPORT_SYMBOL(bio_get_nr_vecs); |
@@ -1375,7 +1455,6 @@ EXPORT_SYMBOL(bio_map_kern); | |||
1375 | EXPORT_SYMBOL(bio_copy_kern); | 1455 | EXPORT_SYMBOL(bio_copy_kern); |
1376 | EXPORT_SYMBOL(bio_pair_release); | 1456 | EXPORT_SYMBOL(bio_pair_release); |
1377 | EXPORT_SYMBOL(bio_split); | 1457 | EXPORT_SYMBOL(bio_split); |
1378 | EXPORT_SYMBOL(bio_split_pool); | ||
1379 | EXPORT_SYMBOL(bio_copy_user); | 1458 | EXPORT_SYMBOL(bio_copy_user); |
1380 | EXPORT_SYMBOL(bio_uncopy_user); | 1459 | EXPORT_SYMBOL(bio_uncopy_user); |
1381 | EXPORT_SYMBOL(bioset_create); | 1460 | EXPORT_SYMBOL(bioset_create); |
diff --git a/fs/block_dev.c b/fs/block_dev.c index aff54219e049..d84f0469a016 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -540,22 +540,6 @@ EXPORT_SYMBOL(bd_release); | |||
540 | * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 | 540 | * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 |
541 | */ | 541 | */ |
542 | 542 | ||
543 | static struct kobject *bdev_get_kobj(struct block_device *bdev) | ||
544 | { | ||
545 | if (bdev->bd_contains != bdev) | ||
546 | return kobject_get(&bdev->bd_part->dev.kobj); | ||
547 | else | ||
548 | return kobject_get(&bdev->bd_disk->dev.kobj); | ||
549 | } | ||
550 | |||
551 | static struct kobject *bdev_get_holder(struct block_device *bdev) | ||
552 | { | ||
553 | if (bdev->bd_contains != bdev) | ||
554 | return kobject_get(bdev->bd_part->holder_dir); | ||
555 | else | ||
556 | return kobject_get(bdev->bd_disk->holder_dir); | ||
557 | } | ||
558 | |||
559 | static int add_symlink(struct kobject *from, struct kobject *to) | 543 | static int add_symlink(struct kobject *from, struct kobject *to) |
560 | { | 544 | { |
561 | if (!from || !to) | 545 | if (!from || !to) |
@@ -604,11 +588,11 @@ static int bd_holder_grab_dirs(struct block_device *bdev, | |||
604 | if (!bo->hdev) | 588 | if (!bo->hdev) |
605 | goto fail_put_sdir; | 589 | goto fail_put_sdir; |
606 | 590 | ||
607 | bo->sdev = bdev_get_kobj(bdev); | 591 | bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj); |
608 | if (!bo->sdev) | 592 | if (!bo->sdev) |
609 | goto fail_put_hdev; | 593 | goto fail_put_hdev; |
610 | 594 | ||
611 | bo->hdir = bdev_get_holder(bdev); | 595 | bo->hdir = kobject_get(bdev->bd_part->holder_dir); |
612 | if (!bo->hdir) | 596 | if (!bo->hdir) |
613 | goto fail_put_sdev; | 597 | goto fail_put_sdev; |
614 | 598 | ||
@@ -868,6 +852,87 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode) | |||
868 | 852 | ||
869 | EXPORT_SYMBOL(open_by_devnum); | 853 | EXPORT_SYMBOL(open_by_devnum); |
870 | 854 | ||
855 | /** | ||
856 | * flush_disk - invalidates all buffer-cache entries on a disk | ||
857 | * | ||
858 | * @bdev: struct block device to be flushed | ||
859 | * | ||
860 | * Invalidates all buffer-cache entries on a disk. It should be called | ||
861 | * when a disk has been changed -- either by a media change or online | ||
862 | * resize. | ||
863 | */ | ||
864 | static void flush_disk(struct block_device *bdev) | ||
865 | { | ||
866 | if (__invalidate_device(bdev)) { | ||
867 | char name[BDEVNAME_SIZE] = ""; | ||
868 | |||
869 | if (bdev->bd_disk) | ||
870 | disk_name(bdev->bd_disk, 0, name); | ||
871 | printk(KERN_WARNING "VFS: busy inodes on changed media or " | ||
872 | "resized disk %s\n", name); | ||
873 | } | ||
874 | |||
875 | if (!bdev->bd_disk) | ||
876 | return; | ||
877 | if (disk_partitionable(bdev->bd_disk)) | ||
878 | bdev->bd_invalidated = 1; | ||
879 | } | ||
880 | |||
881 | /** | ||
882 | * check_disk_size_change - checks for disk size change and adjusts bdev size. | ||
883 | * @disk: struct gendisk to check | ||
884 | * @bdev: struct bdev to adjust. | ||
885 | * | ||
886 | * This routine checks to see if the bdev size does not match the disk size | ||
887 | * and adjusts it if it differs. | ||
888 | */ | ||
889 | void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) | ||
890 | { | ||
891 | loff_t disk_size, bdev_size; | ||
892 | |||
893 | disk_size = (loff_t)get_capacity(disk) << 9; | ||
894 | bdev_size = i_size_read(bdev->bd_inode); | ||
895 | if (disk_size != bdev_size) { | ||
896 | char name[BDEVNAME_SIZE]; | ||
897 | |||
898 | disk_name(disk, 0, name); | ||
899 | printk(KERN_INFO | ||
900 | "%s: detected capacity change from %lld to %lld\n", | ||
901 | name, bdev_size, disk_size); | ||
902 | i_size_write(bdev->bd_inode, disk_size); | ||
903 | flush_disk(bdev); | ||
904 | } | ||
905 | } | ||
906 | EXPORT_SYMBOL(check_disk_size_change); | ||
907 | |||
908 | /** | ||
909 | * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back | ||
910 | * @disk: struct gendisk to be revalidated | ||
911 | * | ||
912 | * This routine is a wrapper for lower-level driver's revalidate_disk | ||
913 | * call-backs. It is used to do common pre and post operations needed | ||
914 | * for all revalidate_disk operations. | ||
915 | */ | ||
916 | int revalidate_disk(struct gendisk *disk) | ||
917 | { | ||
918 | struct block_device *bdev; | ||
919 | int ret = 0; | ||
920 | |||
921 | if (disk->fops->revalidate_disk) | ||
922 | ret = disk->fops->revalidate_disk(disk); | ||
923 | |||
924 | bdev = bdget_disk(disk, 0); | ||
925 | if (!bdev) | ||
926 | return ret; | ||
927 | |||
928 | mutex_lock(&bdev->bd_mutex); | ||
929 | check_disk_size_change(disk, bdev); | ||
930 | mutex_unlock(&bdev->bd_mutex); | ||
931 | bdput(bdev); | ||
932 | return ret; | ||
933 | } | ||
934 | EXPORT_SYMBOL(revalidate_disk); | ||
935 | |||
871 | /* | 936 | /* |
872 | * This routine checks whether a removable media has been changed, | 937 | * This routine checks whether a removable media has been changed, |
873 | * and invalidates all buffer-cache-entries in that case. This | 938 | * and invalidates all buffer-cache-entries in that case. This |
@@ -887,13 +952,9 @@ int check_disk_change(struct block_device *bdev) | |||
887 | if (!bdops->media_changed(bdev->bd_disk)) | 952 | if (!bdops->media_changed(bdev->bd_disk)) |
888 | return 0; | 953 | return 0; |
889 | 954 | ||
890 | if (__invalidate_device(bdev)) | 955 | flush_disk(bdev); |
891 | printk("VFS: busy inodes on changed media.\n"); | ||
892 | |||
893 | if (bdops->revalidate_disk) | 956 | if (bdops->revalidate_disk) |
894 | bdops->revalidate_disk(bdev->bd_disk); | 957 | bdops->revalidate_disk(bdev->bd_disk); |
895 | if (bdev->bd_disk->minors > 1) | ||
896 | bdev->bd_invalidated = 1; | ||
897 | return 1; | 958 | return 1; |
898 | } | 959 | } |
899 | 960 | ||
@@ -927,10 +988,10 @@ static int __blkdev_put(struct block_device *bdev, int for_part); | |||
927 | 988 | ||
928 | static int do_open(struct block_device *bdev, struct file *file, int for_part) | 989 | static int do_open(struct block_device *bdev, struct file *file, int for_part) |
929 | { | 990 | { |
930 | struct module *owner = NULL; | ||
931 | struct gendisk *disk; | 991 | struct gendisk *disk; |
992 | struct hd_struct *part = NULL; | ||
932 | int ret; | 993 | int ret; |
933 | int part; | 994 | int partno; |
934 | int perm = 0; | 995 | int perm = 0; |
935 | 996 | ||
936 | if (file->f_mode & FMODE_READ) | 997 | if (file->f_mode & FMODE_READ) |
@@ -948,25 +1009,27 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
948 | 1009 | ||
949 | ret = -ENXIO; | 1010 | ret = -ENXIO; |
950 | file->f_mapping = bdev->bd_inode->i_mapping; | 1011 | file->f_mapping = bdev->bd_inode->i_mapping; |
1012 | |||
951 | lock_kernel(); | 1013 | lock_kernel(); |
952 | disk = get_gendisk(bdev->bd_dev, &part); | 1014 | |
953 | if (!disk) { | 1015 | disk = get_gendisk(bdev->bd_dev, &partno); |
954 | unlock_kernel(); | 1016 | if (!disk) |
955 | bdput(bdev); | 1017 | goto out_unlock_kernel; |
956 | return ret; | 1018 | part = disk_get_part(disk, partno); |
957 | } | 1019 | if (!part) |
958 | owner = disk->fops->owner; | 1020 | goto out_unlock_kernel; |
959 | 1021 | ||
960 | mutex_lock_nested(&bdev->bd_mutex, for_part); | 1022 | mutex_lock_nested(&bdev->bd_mutex, for_part); |
961 | if (!bdev->bd_openers) { | 1023 | if (!bdev->bd_openers) { |
962 | bdev->bd_disk = disk; | 1024 | bdev->bd_disk = disk; |
1025 | bdev->bd_part = part; | ||
963 | bdev->bd_contains = bdev; | 1026 | bdev->bd_contains = bdev; |
964 | if (!part) { | 1027 | if (!partno) { |
965 | struct backing_dev_info *bdi; | 1028 | struct backing_dev_info *bdi; |
966 | if (disk->fops->open) { | 1029 | if (disk->fops->open) { |
967 | ret = disk->fops->open(bdev->bd_inode, file); | 1030 | ret = disk->fops->open(bdev->bd_inode, file); |
968 | if (ret) | 1031 | if (ret) |
969 | goto out_first; | 1032 | goto out_clear; |
970 | } | 1033 | } |
971 | if (!bdev->bd_openers) { | 1034 | if (!bdev->bd_openers) { |
972 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); | 1035 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); |
@@ -978,36 +1041,36 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
978 | if (bdev->bd_invalidated) | 1041 | if (bdev->bd_invalidated) |
979 | rescan_partitions(disk, bdev); | 1042 | rescan_partitions(disk, bdev); |
980 | } else { | 1043 | } else { |
981 | struct hd_struct *p; | ||
982 | struct block_device *whole; | 1044 | struct block_device *whole; |
983 | whole = bdget_disk(disk, 0); | 1045 | whole = bdget_disk(disk, 0); |
984 | ret = -ENOMEM; | 1046 | ret = -ENOMEM; |
985 | if (!whole) | 1047 | if (!whole) |
986 | goto out_first; | 1048 | goto out_clear; |
987 | BUG_ON(for_part); | 1049 | BUG_ON(for_part); |
988 | ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); | 1050 | ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); |
989 | if (ret) | 1051 | if (ret) |
990 | goto out_first; | 1052 | goto out_clear; |
991 | bdev->bd_contains = whole; | 1053 | bdev->bd_contains = whole; |
992 | p = disk->part[part - 1]; | ||
993 | bdev->bd_inode->i_data.backing_dev_info = | 1054 | bdev->bd_inode->i_data.backing_dev_info = |
994 | whole->bd_inode->i_data.backing_dev_info; | 1055 | whole->bd_inode->i_data.backing_dev_info; |
995 | if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { | 1056 | if (!(disk->flags & GENHD_FL_UP) || |
1057 | !part || !part->nr_sects) { | ||
996 | ret = -ENXIO; | 1058 | ret = -ENXIO; |
997 | goto out_first; | 1059 | goto out_clear; |
998 | } | 1060 | } |
999 | kobject_get(&p->dev.kobj); | 1061 | bd_set_size(bdev, (loff_t)part->nr_sects << 9); |
1000 | bdev->bd_part = p; | ||
1001 | bd_set_size(bdev, (loff_t) p->nr_sects << 9); | ||
1002 | } | 1062 | } |
1003 | } else { | 1063 | } else { |
1064 | disk_put_part(part); | ||
1004 | put_disk(disk); | 1065 | put_disk(disk); |
1005 | module_put(owner); | 1066 | module_put(disk->fops->owner); |
1067 | part = NULL; | ||
1068 | disk = NULL; | ||
1006 | if (bdev->bd_contains == bdev) { | 1069 | if (bdev->bd_contains == bdev) { |
1007 | if (bdev->bd_disk->fops->open) { | 1070 | if (bdev->bd_disk->fops->open) { |
1008 | ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); | 1071 | ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); |
1009 | if (ret) | 1072 | if (ret) |
1010 | goto out; | 1073 | goto out_unlock_bdev; |
1011 | } | 1074 | } |
1012 | if (bdev->bd_invalidated) | 1075 | if (bdev->bd_invalidated) |
1013 | rescan_partitions(bdev->bd_disk, bdev); | 1076 | rescan_partitions(bdev->bd_disk, bdev); |
@@ -1020,19 +1083,24 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
1020 | unlock_kernel(); | 1083 | unlock_kernel(); |
1021 | return 0; | 1084 | return 0; |
1022 | 1085 | ||
1023 | out_first: | 1086 | out_clear: |
1024 | bdev->bd_disk = NULL; | 1087 | bdev->bd_disk = NULL; |
1088 | bdev->bd_part = NULL; | ||
1025 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1089 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; |
1026 | if (bdev != bdev->bd_contains) | 1090 | if (bdev != bdev->bd_contains) |
1027 | __blkdev_put(bdev->bd_contains, 1); | 1091 | __blkdev_put(bdev->bd_contains, 1); |
1028 | bdev->bd_contains = NULL; | 1092 | bdev->bd_contains = NULL; |
1029 | put_disk(disk); | 1093 | out_unlock_bdev: |
1030 | module_put(owner); | ||
1031 | out: | ||
1032 | mutex_unlock(&bdev->bd_mutex); | 1094 | mutex_unlock(&bdev->bd_mutex); |
1095 | out_unlock_kernel: | ||
1033 | unlock_kernel(); | 1096 | unlock_kernel(); |
1034 | if (ret) | 1097 | |
1035 | bdput(bdev); | 1098 | disk_put_part(part); |
1099 | if (disk) | ||
1100 | module_put(disk->fops->owner); | ||
1101 | put_disk(disk); | ||
1102 | bdput(bdev); | ||
1103 | |||
1036 | return ret; | 1104 | return ret; |
1037 | } | 1105 | } |
1038 | 1106 | ||
@@ -1117,11 +1185,8 @@ static int __blkdev_put(struct block_device *bdev, int for_part) | |||
1117 | 1185 | ||
1118 | put_disk(disk); | 1186 | put_disk(disk); |
1119 | module_put(owner); | 1187 | module_put(owner); |
1120 | 1188 | disk_put_part(bdev->bd_part); | |
1121 | if (bdev->bd_contains != bdev) { | 1189 | bdev->bd_part = NULL; |
1122 | kobject_put(&bdev->bd_part->dev.kobj); | ||
1123 | bdev->bd_part = NULL; | ||
1124 | } | ||
1125 | bdev->bd_disk = NULL; | 1190 | bdev->bd_disk = NULL; |
1126 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1191 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; |
1127 | if (bdev != bdev->bd_contains) | 1192 | if (bdev != bdev->bd_contains) |
@@ -1197,10 +1262,9 @@ EXPORT_SYMBOL(ioctl_by_bdev); | |||
1197 | 1262 | ||
1198 | /** | 1263 | /** |
1199 | * lookup_bdev - lookup a struct block_device by name | 1264 | * lookup_bdev - lookup a struct block_device by name |
1265 | * @pathname: special file representing the block device | ||
1200 | * | 1266 | * |
1201 | * @path: special file representing the block device | 1267 | * Get a reference to the blockdevice at @pathname in the current |
1202 | * | ||
1203 | * Get a reference to the blockdevice at @path in the current | ||
1204 | * namespace if possible and return it. Return ERR_PTR(error) | 1268 | * namespace if possible and return it. Return ERR_PTR(error) |
1205 | * otherwise. | 1269 | * otherwise. |
1206 | */ | 1270 | */ |
diff --git a/fs/buffer.c b/fs/buffer.c index 38653e36e225..ac78d4c19b3b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -2926,14 +2926,17 @@ int submit_bh(int rw, struct buffer_head * bh) | |||
2926 | BUG_ON(!buffer_mapped(bh)); | 2926 | BUG_ON(!buffer_mapped(bh)); |
2927 | BUG_ON(!bh->b_end_io); | 2927 | BUG_ON(!bh->b_end_io); |
2928 | 2928 | ||
2929 | if (buffer_ordered(bh) && (rw == WRITE)) | 2929 | /* |
2930 | rw = WRITE_BARRIER; | 2930 | * Mask in barrier bit for a write (could be either a WRITE or a |
2931 | * WRITE_SYNC | ||
2932 | */ | ||
2933 | if (buffer_ordered(bh) && (rw & WRITE)) | ||
2934 | rw |= WRITE_BARRIER; | ||
2931 | 2935 | ||
2932 | /* | 2936 | /* |
2933 | * Only clear out a write error when rewriting, should this | 2937 | * Only clear out a write error when rewriting |
2934 | * include WRITE_SYNC as well? | ||
2935 | */ | 2938 | */ |
2936 | if (test_set_buffer_req(bh) && (rw == WRITE || rw == WRITE_BARRIER)) | 2939 | if (test_set_buffer_req(bh) && (rw & WRITE)) |
2937 | clear_buffer_write_io_error(bh); | 2940 | clear_buffer_write_io_error(bh); |
2938 | 2941 | ||
2939 | /* | 2942 | /* |
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index f5d0083e09fa..06e521a945c3 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES | |||
@@ -4,7 +4,15 @@ Fix premature write failure on congested networks (we would give up | |||
4 | on EAGAIN from the socket too quickly on large writes). | 4 | on EAGAIN from the socket too quickly on large writes). |
5 | Cifs_mkdir and cifs_create now respect the setgid bit on parent dir. | 5 | Cifs_mkdir and cifs_create now respect the setgid bit on parent dir. |
6 | Fix endian problems in acl (mode from/to cifs acl) on bigendian | 6 | Fix endian problems in acl (mode from/to cifs acl) on bigendian |
7 | architectures. | 7 | architectures. Fix problems with preserving timestamps on copying open |
8 | files (e.g. "cp -a") to Windows servers. For mkdir and create honor setgid bit | ||
9 | on parent directory when server supports Unix Extensions but not POSIX | ||
10 | create. Update cifs.upcall version to handle new Kerberos sec flags | ||
11 | (this requires update of cifs.upcall program from Samba). Fix memory leak | ||
12 | on dns_upcall (resolving DFS referralls). Fix plain text password | ||
13 | authentication (requires setting SecurityFlags to 0x30030 to enable | ||
14 | lanman and plain text though). Fix writes to be at correct offset when | ||
15 | file is open with O_APPEND and file is on a directio (forcediretio) mount. | ||
8 | 16 | ||
9 | Version 1.53 | 17 | Version 1.53 |
10 | ------------ | 18 | ------------ |
diff --git a/fs/cifs/README b/fs/cifs/README index 2bd6fe556f88..bd2343d4c6a6 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -542,10 +542,20 @@ SecurityFlags Flags which control security negotiation and | |||
542 | hashing mechanisms (as "must use") on the other hand | 542 | hashing mechanisms (as "must use") on the other hand |
543 | does not make much sense. Default flags are | 543 | does not make much sense. Default flags are |
544 | 0x07007 | 544 | 0x07007 |
545 | (NTLM, NTLMv2 and packet signing allowed). Maximum | 545 | (NTLM, NTLMv2 and packet signing allowed). The maximum |
546 | allowable flags if you want to allow mounts to servers | 546 | allowable flags if you want to allow mounts to servers |
547 | using weaker password hashes is 0x37037 (lanman, | 547 | using weaker password hashes is 0x37037 (lanman, |
548 | plaintext, ntlm, ntlmv2, signing allowed): | 548 | plaintext, ntlm, ntlmv2, signing allowed). Some |
549 | SecurityFlags require the corresponding menuconfig | ||
550 | options to be enabled (lanman and plaintext require | ||
551 | CONFIG_CIFS_WEAK_PW_HASH for example). Enabling | ||
552 | plaintext authentication currently requires also | ||
553 | enabling lanman authentication in the security flags | ||
554 | because the cifs module only supports sending | ||
555 | laintext passwords using the older lanman dialect | ||
556 | form of the session setup SMB. (e.g. for authentication | ||
557 | using plain text passwords, set the SecurityFlags | ||
558 | to 0x30030): | ||
549 | 559 | ||
550 | may use packet signing 0x00001 | 560 | may use packet signing 0x00001 |
551 | must use packet signing 0x01001 | 561 | must use packet signing 0x01001 |
@@ -642,8 +652,30 @@ The statistics for the number of total SMBs and oplock breaks are different in | |||
642 | that they represent all for that share, not just those for which the server | 652 | that they represent all for that share, not just those for which the server |
643 | returned success. | 653 | returned success. |
644 | 654 | ||
645 | Also note that "cat /proc/fs/cifs/DebugData" will display information about | 655 | Also note that "cat /proc/fs/cifs/DebugData" will display information about |
646 | the active sessions and the shares that are mounted. | 656 | the active sessions and the shares that are mounted. |
647 | Enabling Kerberos (extended security) works when CONFIG_CIFS_EXPERIMENTAL is | 657 | |
648 | on but requires a user space helper (from the Samba project). NTLM and NTLMv2 and | 658 | Enabling Kerberos (extended security) works but requires version 1.2 or later |
649 | LANMAN support do not require this helper. | 659 | of the helper program cifs.upcall to be present and to be configured in the |
660 | /etc/request-key.conf file. The cifs.upcall helper program is from the Samba | ||
661 | project(http://www.samba.org). NTLM and NTLMv2 and LANMAN support do not | ||
662 | require this helper. Note that NTLMv2 security (which does not require the | ||
663 | cifs.upcall helper program), instead of using Kerberos, is sufficient for | ||
664 | some use cases. | ||
665 | |||
666 | Enabling DFS support (used to access shares transparently in an MS-DFS | ||
667 | global name space) requires that CONFIG_CIFS_EXPERIMENTAL be enabled. In | ||
668 | addition, DFS support for target shares which are specified as UNC | ||
669 | names which begin with host names (rather than IP addresses) requires | ||
670 | a user space helper (such as cifs.upcall) to be present in order to | ||
671 | translate host names to ip address, and the user space helper must also | ||
672 | be configured in the file /etc/request-key.conf | ||
673 | |||
674 | To use cifs Kerberos and DFS support, the Linux keyutils package should be | ||
675 | installed and something like the following lines should be added to the | ||
676 | /etc/request-key.conf file: | ||
677 | |||
678 | create cifs.spnego * * /usr/local/sbin/cifs.upcall %k | ||
679 | create dns_resolver * * /usr/local/sbin/cifs.upcall %k | ||
680 | |||
681 | |||
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index 5fabd2caf93c..1b09f1670061 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c | |||
@@ -476,6 +476,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, | |||
476 | unsigned int cls, con, tag, oidlen, rc; | 476 | unsigned int cls, con, tag, oidlen, rc; |
477 | bool use_ntlmssp = false; | 477 | bool use_ntlmssp = false; |
478 | bool use_kerberos = false; | 478 | bool use_kerberos = false; |
479 | bool use_mskerberos = false; | ||
479 | 480 | ||
480 | *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ | 481 | *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ |
481 | 482 | ||
@@ -574,10 +575,12 @@ decode_negTokenInit(unsigned char *security_blob, int length, | |||
574 | *(oid + 1), *(oid + 2), *(oid + 3))); | 575 | *(oid + 1), *(oid + 2), *(oid + 3))); |
575 | 576 | ||
576 | if (compare_oid(oid, oidlen, MSKRB5_OID, | 577 | if (compare_oid(oid, oidlen, MSKRB5_OID, |
577 | MSKRB5_OID_LEN)) | 578 | MSKRB5_OID_LEN) && |
578 | use_kerberos = true; | 579 | !use_kerberos) |
580 | use_mskerberos = true; | ||
579 | else if (compare_oid(oid, oidlen, KRB5_OID, | 581 | else if (compare_oid(oid, oidlen, KRB5_OID, |
580 | KRB5_OID_LEN)) | 582 | KRB5_OID_LEN) && |
583 | !use_mskerberos) | ||
581 | use_kerberos = true; | 584 | use_kerberos = true; |
582 | else if (compare_oid(oid, oidlen, NTLMSSP_OID, | 585 | else if (compare_oid(oid, oidlen, NTLMSSP_OID, |
583 | NTLMSSP_OID_LEN)) | 586 | NTLMSSP_OID_LEN)) |
@@ -630,6 +633,8 @@ decode_negTokenInit(unsigned char *security_blob, int length, | |||
630 | 633 | ||
631 | if (use_kerberos) | 634 | if (use_kerberos) |
632 | *secType = Kerberos; | 635 | *secType = Kerberos; |
636 | else if (use_mskerberos) | ||
637 | *secType = MSKerberos; | ||
633 | else if (use_ntlmssp) | 638 | else if (use_ntlmssp) |
634 | *secType = NTLMSSP; | 639 | *secType = NTLMSSP; |
635 | 640 | ||
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 2434ab0e8791..fcee9298b620 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c | |||
@@ -66,11 +66,28 @@ struct key_type cifs_spnego_key_type = { | |||
66 | .describe = user_describe, | 66 | .describe = user_describe, |
67 | }; | 67 | }; |
68 | 68 | ||
69 | #define MAX_VER_STR_LEN 8 /* length of longest version string e.g. | 69 | /* length of longest version string e.g. strlen("ver=0xFF") */ |
70 | strlen("ver=0xFF") */ | 70 | #define MAX_VER_STR_LEN 8 |
71 | #define MAX_MECH_STR_LEN 13 /* length of longest security mechanism name, eg | 71 | |
72 | in future could have strlen(";sec=ntlmsspi") */ | 72 | /* length of longest security mechanism name, eg in future could have |
73 | #define MAX_IPV6_ADDR_LEN 42 /* eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */ | 73 | * strlen(";sec=ntlmsspi") */ |
74 | #define MAX_MECH_STR_LEN 13 | ||
75 | |||
76 | /* max possible addr len eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */ | ||
77 | #define MAX_IPV6_ADDR_LEN 42 | ||
78 | |||
79 | /* strlen of "host=" */ | ||
80 | #define HOST_KEY_LEN 5 | ||
81 | |||
82 | /* strlen of ";ip4=" or ";ip6=" */ | ||
83 | #define IP_KEY_LEN 5 | ||
84 | |||
85 | /* strlen of ";uid=0x" */ | ||
86 | #define UID_KEY_LEN 7 | ||
87 | |||
88 | /* strlen of ";user=" */ | ||
89 | #define USER_KEY_LEN 6 | ||
90 | |||
74 | /* get a key struct with a SPNEGO security blob, suitable for session setup */ | 91 | /* get a key struct with a SPNEGO security blob, suitable for session setup */ |
75 | struct key * | 92 | struct key * |
76 | cifs_get_spnego_key(struct cifsSesInfo *sesInfo) | 93 | cifs_get_spnego_key(struct cifsSesInfo *sesInfo) |
@@ -84,11 +101,11 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) | |||
84 | /* length of fields (with semicolons): ver=0xyz ip4=ipaddress | 101 | /* length of fields (with semicolons): ver=0xyz ip4=ipaddress |
85 | host=hostname sec=mechanism uid=0xFF user=username */ | 102 | host=hostname sec=mechanism uid=0xFF user=username */ |
86 | desc_len = MAX_VER_STR_LEN + | 103 | desc_len = MAX_VER_STR_LEN + |
87 | 6 /* len of "host=" */ + strlen(hostname) + | 104 | HOST_KEY_LEN + strlen(hostname) + |
88 | 5 /* len of ";ipv4=" */ + MAX_IPV6_ADDR_LEN + | 105 | IP_KEY_LEN + MAX_IPV6_ADDR_LEN + |
89 | MAX_MECH_STR_LEN + | 106 | MAX_MECH_STR_LEN + |
90 | 7 /* len of ";uid=0x" */ + (sizeof(uid_t) * 2) + | 107 | UID_KEY_LEN + (sizeof(uid_t) * 2) + |
91 | 6 /* len of ";user=" */ + strlen(sesInfo->userName) + 1; | 108 | USER_KEY_LEN + strlen(sesInfo->userName) + 1; |
92 | 109 | ||
93 | spnego_key = ERR_PTR(-ENOMEM); | 110 | spnego_key = ERR_PTR(-ENOMEM); |
94 | description = kzalloc(desc_len, GFP_KERNEL); | 111 | description = kzalloc(desc_len, GFP_KERNEL); |
@@ -114,9 +131,11 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) | |||
114 | 131 | ||
115 | dp = description + strlen(description); | 132 | dp = description + strlen(description); |
116 | 133 | ||
117 | /* for now, only sec=krb5 is valid */ | 134 | /* for now, only sec=krb5 and sec=mskrb5 are valid */ |
118 | if (server->secType == Kerberos) | 135 | if (server->secType == Kerberos) |
119 | sprintf(dp, ";sec=krb5"); | 136 | sprintf(dp, ";sec=krb5"); |
137 | else if (server->secType == MSKerberos) | ||
138 | sprintf(dp, ";sec=mskrb5"); | ||
120 | else | 139 | else |
121 | goto out; | 140 | goto out; |
122 | 141 | ||
diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h index 05a34b17a1ab..e4041ec4d712 100644 --- a/fs/cifs/cifs_spnego.h +++ b/fs/cifs/cifs_spnego.h | |||
@@ -23,7 +23,7 @@ | |||
23 | #ifndef _CIFS_SPNEGO_H | 23 | #ifndef _CIFS_SPNEGO_H |
24 | #define _CIFS_SPNEGO_H | 24 | #define _CIFS_SPNEGO_H |
25 | 25 | ||
26 | #define CIFS_SPNEGO_UPCALL_VERSION 1 | 26 | #define CIFS_SPNEGO_UPCALL_VERSION 2 |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * The version field should always be set to CIFS_SPNEGO_UPCALL_VERSION. | 29 | * The version field should always be set to CIFS_SPNEGO_UPCALL_VERSION. |
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 83fd40dc1ef0..bd5f13d38450 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
@@ -294,6 +294,7 @@ void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key) | |||
294 | 294 | ||
295 | if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) | 295 | if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) |
296 | if (extended_security & CIFSSEC_MAY_PLNTXT) { | 296 | if (extended_security & CIFSSEC_MAY_PLNTXT) { |
297 | memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE); | ||
297 | memcpy(lnm_session_key, password_with_pad, | 298 | memcpy(lnm_session_key, password_with_pad, |
298 | CIFS_ENCPWD_SIZE); | 299 | CIFS_ENCPWD_SIZE); |
299 | return; | 300 | return; |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index e8da4ee761b5..25ecbd5b0404 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -175,6 +175,8 @@ out_no_root: | |||
175 | if (inode) | 175 | if (inode) |
176 | iput(inode); | 176 | iput(inode); |
177 | 177 | ||
178 | cifs_umount(sb, cifs_sb); | ||
179 | |||
178 | out_mount_failed: | 180 | out_mount_failed: |
179 | if (cifs_sb) { | 181 | if (cifs_sb) { |
180 | #ifdef CONFIG_CIFS_DFS_UPCALL | 182 | #ifdef CONFIG_CIFS_DFS_UPCALL |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 135c965c4137..f7b4a5cd837b 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -41,7 +41,7 @@ extern int cifs_create(struct inode *, struct dentry *, int, | |||
41 | struct nameidata *); | 41 | struct nameidata *); |
42 | extern struct dentry *cifs_lookup(struct inode *, struct dentry *, | 42 | extern struct dentry *cifs_lookup(struct inode *, struct dentry *, |
43 | struct nameidata *); | 43 | struct nameidata *); |
44 | extern int cifs_unlink(struct inode *, struct dentry *); | 44 | extern int cifs_unlink(struct inode *dir, struct dentry *dentry); |
45 | extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); | 45 | extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); |
46 | extern int cifs_mknod(struct inode *, struct dentry *, int, dev_t); | 46 | extern int cifs_mknod(struct inode *, struct dentry *, int, dev_t); |
47 | extern int cifs_mkdir(struct inode *, struct dentry *, int); | 47 | extern int cifs_mkdir(struct inode *, struct dentry *, int); |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 7e1cf262effe..0d22479d99b7 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -80,7 +80,8 @@ enum securityEnum { | |||
80 | NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ | 80 | NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ |
81 | RawNTLMSSP, /* NTLMSSP without SPNEGO */ | 81 | RawNTLMSSP, /* NTLMSSP without SPNEGO */ |
82 | NTLMSSP, /* NTLMSSP via SPNEGO */ | 82 | NTLMSSP, /* NTLMSSP via SPNEGO */ |
83 | Kerberos /* Kerberos via SPNEGO */ | 83 | Kerberos, /* Kerberos via SPNEGO */ |
84 | MSKerberos, /* MS Kerberos via SPNEGO */ | ||
84 | }; | 85 | }; |
85 | 86 | ||
86 | enum protocolEnum { | 87 | enum protocolEnum { |
@@ -308,6 +309,7 @@ struct cifs_search_info { | |||
308 | __u32 resume_key; | 309 | __u32 resume_key; |
309 | char *ntwrk_buf_start; | 310 | char *ntwrk_buf_start; |
310 | char *srch_entries_start; | 311 | char *srch_entries_start; |
312 | char *last_entry; | ||
311 | char *presume_name; | 313 | char *presume_name; |
312 | unsigned int resume_name_len; | 314 | unsigned int resume_name_len; |
313 | bool endOfSearch:1; | 315 | bool endOfSearch:1; |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index a729d083e6f4..0cff7fe986e8 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -179,6 +179,8 @@ extern int CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, | |||
179 | extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, | 179 | extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, |
180 | const FILE_BASIC_INFO *data, __u16 fid, | 180 | const FILE_BASIC_INFO *data, __u16 fid, |
181 | __u32 pid_of_opener); | 181 | __u32 pid_of_opener); |
182 | extern int CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon, | ||
183 | bool delete_file, __u16 fid, __u32 pid_of_opener); | ||
182 | #if 0 | 184 | #if 0 |
183 | extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, | 185 | extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, |
184 | char *fileName, __u16 dos_attributes, | 186 | char *fileName, __u16 dos_attributes, |
@@ -229,7 +231,7 @@ extern int CIFSSMBRename(const int xid, struct cifsTconInfo *tcon, | |||
229 | const struct nls_table *nls_codepage, | 231 | const struct nls_table *nls_codepage, |
230 | int remap_special_chars); | 232 | int remap_special_chars); |
231 | extern int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, | 233 | extern int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, |
232 | int netfid, char *target_name, | 234 | int netfid, const char *target_name, |
233 | const struct nls_table *nls_codepage, | 235 | const struct nls_table *nls_codepage, |
234 | int remap_special_chars); | 236 | int remap_special_chars); |
235 | extern int CIFSCreateHardLink(const int xid, | 237 | extern int CIFSCreateHardLink(const int xid, |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 994de7c90474..6f4ffe15d68d 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -2017,7 +2017,7 @@ renameRetry: | |||
2017 | } | 2017 | } |
2018 | 2018 | ||
2019 | int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, | 2019 | int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, |
2020 | int netfid, char *target_name, | 2020 | int netfid, const char *target_name, |
2021 | const struct nls_table *nls_codepage, int remap) | 2021 | const struct nls_table *nls_codepage, int remap) |
2022 | { | 2022 | { |
2023 | struct smb_com_transaction2_sfi_req *pSMB = NULL; | 2023 | struct smb_com_transaction2_sfi_req *pSMB = NULL; |
@@ -2071,7 +2071,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, | |||
2071 | remap); | 2071 | remap); |
2072 | } | 2072 | } |
2073 | rename_info->target_name_len = cpu_to_le32(2 * len_of_str); | 2073 | rename_info->target_name_len = cpu_to_le32(2 * len_of_str); |
2074 | count = 12 /* sizeof(struct set_file_rename) */ + (2 * len_of_str) + 2; | 2074 | count = 12 /* sizeof(struct set_file_rename) */ + (2 * len_of_str); |
2075 | byte_count += count; | 2075 | byte_count += count; |
2076 | pSMB->DataCount = cpu_to_le16(count); | 2076 | pSMB->DataCount = cpu_to_le16(count); |
2077 | pSMB->TotalDataCount = pSMB->DataCount; | 2077 | pSMB->TotalDataCount = pSMB->DataCount; |
@@ -3614,6 +3614,8 @@ findFirstRetry: | |||
3614 | /* BB remember to free buffer if error BB */ | 3614 | /* BB remember to free buffer if error BB */ |
3615 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); | 3615 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); |
3616 | if (rc == 0) { | 3616 | if (rc == 0) { |
3617 | unsigned int lnoff; | ||
3618 | |||
3617 | if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) | 3619 | if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) |
3618 | psrch_inf->unicode = true; | 3620 | psrch_inf->unicode = true; |
3619 | else | 3621 | else |
@@ -3636,6 +3638,17 @@ findFirstRetry: | |||
3636 | le16_to_cpu(parms->SearchCount); | 3638 | le16_to_cpu(parms->SearchCount); |
3637 | psrch_inf->index_of_last_entry = 2 /* skip . and .. */ + | 3639 | psrch_inf->index_of_last_entry = 2 /* skip . and .. */ + |
3638 | psrch_inf->entries_in_buffer; | 3640 | psrch_inf->entries_in_buffer; |
3641 | lnoff = le16_to_cpu(parms->LastNameOffset); | ||
3642 | if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE < | ||
3643 | lnoff) { | ||
3644 | cERROR(1, ("ignoring corrupt resume name")); | ||
3645 | psrch_inf->last_entry = NULL; | ||
3646 | return rc; | ||
3647 | } | ||
3648 | |||
3649 | psrch_inf->last_entry = psrch_inf->srch_entries_start + | ||
3650 | lnoff; | ||
3651 | |||
3639 | *pnetfid = parms->SearchHandle; | 3652 | *pnetfid = parms->SearchHandle; |
3640 | } else { | 3653 | } else { |
3641 | cifs_buf_release(pSMB); | 3654 | cifs_buf_release(pSMB); |
@@ -3725,6 +3738,8 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, | |||
3725 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); | 3738 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); |
3726 | 3739 | ||
3727 | if (rc == 0) { | 3740 | if (rc == 0) { |
3741 | unsigned int lnoff; | ||
3742 | |||
3728 | /* BB fixme add lock for file (srch_info) struct here */ | 3743 | /* BB fixme add lock for file (srch_info) struct here */ |
3729 | if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) | 3744 | if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) |
3730 | psrch_inf->unicode = true; | 3745 | psrch_inf->unicode = true; |
@@ -3751,6 +3766,16 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, | |||
3751 | le16_to_cpu(parms->SearchCount); | 3766 | le16_to_cpu(parms->SearchCount); |
3752 | psrch_inf->index_of_last_entry += | 3767 | psrch_inf->index_of_last_entry += |
3753 | psrch_inf->entries_in_buffer; | 3768 | psrch_inf->entries_in_buffer; |
3769 | lnoff = le16_to_cpu(parms->LastNameOffset); | ||
3770 | if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE < | ||
3771 | lnoff) { | ||
3772 | cERROR(1, ("ignoring corrupt resume name")); | ||
3773 | psrch_inf->last_entry = NULL; | ||
3774 | return rc; | ||
3775 | } else | ||
3776 | psrch_inf->last_entry = | ||
3777 | psrch_inf->srch_entries_start + lnoff; | ||
3778 | |||
3754 | /* cFYI(1,("fnxt2 entries in buf %d index_of_last %d", | 3779 | /* cFYI(1,("fnxt2 entries in buf %d index_of_last %d", |
3755 | psrch_inf->entries_in_buffer, psrch_inf->index_of_last_entry)); */ | 3780 | psrch_inf->entries_in_buffer, psrch_inf->index_of_last_entry)); */ |
3756 | 3781 | ||
@@ -4876,6 +4901,61 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, | |||
4876 | return rc; | 4901 | return rc; |
4877 | } | 4902 | } |
4878 | 4903 | ||
4904 | int | ||
4905 | CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon, | ||
4906 | bool delete_file, __u16 fid, __u32 pid_of_opener) | ||
4907 | { | ||
4908 | struct smb_com_transaction2_sfi_req *pSMB = NULL; | ||
4909 | char *data_offset; | ||
4910 | int rc = 0; | ||
4911 | __u16 params, param_offset, offset, byte_count, count; | ||
4912 | |||
4913 | cFYI(1, ("Set File Disposition (via SetFileInfo)")); | ||
4914 | rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); | ||
4915 | |||
4916 | if (rc) | ||
4917 | return rc; | ||
4918 | |||
4919 | pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); | ||
4920 | pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16)); | ||
4921 | |||
4922 | params = 6; | ||
4923 | pSMB->MaxSetupCount = 0; | ||
4924 | pSMB->Reserved = 0; | ||
4925 | pSMB->Flags = 0; | ||
4926 | pSMB->Timeout = 0; | ||
4927 | pSMB->Reserved2 = 0; | ||
4928 | param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; | ||
4929 | offset = param_offset + params; | ||
4930 | |||
4931 | data_offset = (char *) (&pSMB->hdr.Protocol) + offset; | ||
4932 | |||
4933 | count = 1; | ||
4934 | pSMB->MaxParameterCount = cpu_to_le16(2); | ||
4935 | /* BB find max SMB PDU from sess */ | ||
4936 | pSMB->MaxDataCount = cpu_to_le16(1000); | ||
4937 | pSMB->SetupCount = 1; | ||
4938 | pSMB->Reserved3 = 0; | ||
4939 | pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION); | ||
4940 | byte_count = 3 /* pad */ + params + count; | ||
4941 | pSMB->DataCount = cpu_to_le16(count); | ||
4942 | pSMB->ParameterCount = cpu_to_le16(params); | ||
4943 | pSMB->TotalDataCount = pSMB->DataCount; | ||
4944 | pSMB->TotalParameterCount = pSMB->ParameterCount; | ||
4945 | pSMB->ParameterOffset = cpu_to_le16(param_offset); | ||
4946 | pSMB->DataOffset = cpu_to_le16(offset); | ||
4947 | pSMB->Fid = fid; | ||
4948 | pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO); | ||
4949 | pSMB->Reserved4 = 0; | ||
4950 | pSMB->hdr.smb_buf_length += byte_count; | ||
4951 | pSMB->ByteCount = cpu_to_le16(byte_count); | ||
4952 | *data_offset = delete_file ? 1 : 0; | ||
4953 | rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); | ||
4954 | if (rc) | ||
4955 | cFYI(1, ("Send error in SetFileDisposition = %d", rc)); | ||
4956 | |||
4957 | return rc; | ||
4958 | } | ||
4879 | 4959 | ||
4880 | int | 4960 | int |
4881 | CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, | 4961 | CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0711db65afe8..4c13bcdb92a5 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -3598,19 +3598,21 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, | |||
3598 | char ntlm_session_key[CIFS_SESS_KEY_SIZE]; | 3598 | char ntlm_session_key[CIFS_SESS_KEY_SIZE]; |
3599 | bool ntlmv2_flag = false; | 3599 | bool ntlmv2_flag = false; |
3600 | int first_time = 0; | 3600 | int first_time = 0; |
3601 | struct TCP_Server_Info *server = pSesInfo->server; | ||
3601 | 3602 | ||
3602 | /* what if server changes its buffer size after dropping the session? */ | 3603 | /* what if server changes its buffer size after dropping the session? */ |
3603 | if (pSesInfo->server->maxBuf == 0) /* no need to send on reconnect */ { | 3604 | if (server->maxBuf == 0) /* no need to send on reconnect */ { |
3604 | rc = CIFSSMBNegotiate(xid, pSesInfo); | 3605 | rc = CIFSSMBNegotiate(xid, pSesInfo); |
3605 | if (rc == -EAGAIN) /* retry only once on 1st time connection */ { | 3606 | if (rc == -EAGAIN) { |
3607 | /* retry only once on 1st time connection */ | ||
3606 | rc = CIFSSMBNegotiate(xid, pSesInfo); | 3608 | rc = CIFSSMBNegotiate(xid, pSesInfo); |
3607 | if (rc == -EAGAIN) | 3609 | if (rc == -EAGAIN) |
3608 | rc = -EHOSTDOWN; | 3610 | rc = -EHOSTDOWN; |
3609 | } | 3611 | } |
3610 | if (rc == 0) { | 3612 | if (rc == 0) { |
3611 | spin_lock(&GlobalMid_Lock); | 3613 | spin_lock(&GlobalMid_Lock); |
3612 | if (pSesInfo->server->tcpStatus != CifsExiting) | 3614 | if (server->tcpStatus != CifsExiting) |
3613 | pSesInfo->server->tcpStatus = CifsGood; | 3615 | server->tcpStatus = CifsGood; |
3614 | else | 3616 | else |
3615 | rc = -EHOSTDOWN; | 3617 | rc = -EHOSTDOWN; |
3616 | spin_unlock(&GlobalMid_Lock); | 3618 | spin_unlock(&GlobalMid_Lock); |
@@ -3623,23 +3625,22 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, | |||
3623 | goto ss_err_exit; | 3625 | goto ss_err_exit; |
3624 | 3626 | ||
3625 | pSesInfo->flags = 0; | 3627 | pSesInfo->flags = 0; |
3626 | pSesInfo->capabilities = pSesInfo->server->capabilities; | 3628 | pSesInfo->capabilities = server->capabilities; |
3627 | if (linuxExtEnabled == 0) | 3629 | if (linuxExtEnabled == 0) |
3628 | pSesInfo->capabilities &= (~CAP_UNIX); | 3630 | pSesInfo->capabilities &= (~CAP_UNIX); |
3629 | /* pSesInfo->sequence_number = 0;*/ | 3631 | /* pSesInfo->sequence_number = 0;*/ |
3630 | cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", | 3632 | cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", |
3631 | pSesInfo->server->secMode, | 3633 | server->secMode, server->capabilities, server->timeAdj)); |
3632 | pSesInfo->server->capabilities, | 3634 | |
3633 | pSesInfo->server->timeAdj)); | ||
3634 | if (experimEnabled < 2) | 3635 | if (experimEnabled < 2) |
3635 | rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info); | 3636 | rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info); |
3636 | else if (extended_security | 3637 | else if (extended_security |
3637 | && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) | 3638 | && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) |
3638 | && (pSesInfo->server->secType == NTLMSSP)) { | 3639 | && (server->secType == NTLMSSP)) { |
3639 | rc = -EOPNOTSUPP; | 3640 | rc = -EOPNOTSUPP; |
3640 | } else if (extended_security | 3641 | } else if (extended_security |
3641 | && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) | 3642 | && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) |
3642 | && (pSesInfo->server->secType == RawNTLMSSP)) { | 3643 | && (server->secType == RawNTLMSSP)) { |
3643 | cFYI(1, ("NTLMSSP sesssetup")); | 3644 | cFYI(1, ("NTLMSSP sesssetup")); |
3644 | rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag, | 3645 | rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag, |
3645 | nls_info); | 3646 | nls_info); |
@@ -3668,12 +3669,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, | |||
3668 | 3669 | ||
3669 | } else { | 3670 | } else { |
3670 | SMBNTencrypt(pSesInfo->password, | 3671 | SMBNTencrypt(pSesInfo->password, |
3671 | pSesInfo->server->cryptKey, | 3672 | server->cryptKey, |
3672 | ntlm_session_key); | 3673 | ntlm_session_key); |
3673 | 3674 | ||
3674 | if (first_time) | 3675 | if (first_time) |
3675 | cifs_calculate_mac_key( | 3676 | cifs_calculate_mac_key( |
3676 | &pSesInfo->server->mac_signing_key, | 3677 | &server->mac_signing_key, |
3677 | ntlm_session_key, | 3678 | ntlm_session_key, |
3678 | pSesInfo->password); | 3679 | pSesInfo->password); |
3679 | } | 3680 | } |
@@ -3686,13 +3687,13 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, | |||
3686 | nls_info); | 3687 | nls_info); |
3687 | } | 3688 | } |
3688 | } else { /* old style NTLM 0.12 session setup */ | 3689 | } else { /* old style NTLM 0.12 session setup */ |
3689 | SMBNTencrypt(pSesInfo->password, pSesInfo->server->cryptKey, | 3690 | SMBNTencrypt(pSesInfo->password, server->cryptKey, |
3690 | ntlm_session_key); | 3691 | ntlm_session_key); |
3691 | 3692 | ||
3692 | if (first_time) | 3693 | if (first_time) |
3693 | cifs_calculate_mac_key( | 3694 | cifs_calculate_mac_key(&server->mac_signing_key, |
3694 | &pSesInfo->server->mac_signing_key, | 3695 | ntlm_session_key, |
3695 | ntlm_session_key, pSesInfo->password); | 3696 | pSesInfo->password); |
3696 | 3697 | ||
3697 | rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info); | 3698 | rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info); |
3698 | } | 3699 | } |
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index f730ef35499e..1e0c1bd8f2e4 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c | |||
@@ -29,38 +29,13 @@ | |||
29 | #include "cifsproto.h" | 29 | #include "cifsproto.h" |
30 | #include "cifs_debug.h" | 30 | #include "cifs_debug.h" |
31 | 31 | ||
32 | static int dns_resolver_instantiate(struct key *key, const void *data, | ||
33 | size_t datalen) | ||
34 | { | ||
35 | int rc = 0; | ||
36 | char *ip; | ||
37 | |||
38 | ip = kmalloc(datalen+1, GFP_KERNEL); | ||
39 | if (!ip) | ||
40 | return -ENOMEM; | ||
41 | |||
42 | memcpy(ip, data, datalen); | ||
43 | ip[datalen] = '\0'; | ||
44 | |||
45 | rcu_assign_pointer(key->payload.data, ip); | ||
46 | |||
47 | return rc; | ||
48 | } | ||
49 | |||
50 | struct key_type key_type_dns_resolver = { | ||
51 | .name = "dns_resolver", | ||
52 | .def_datalen = sizeof(struct in_addr), | ||
53 | .describe = user_describe, | ||
54 | .instantiate = dns_resolver_instantiate, | ||
55 | .match = user_match, | ||
56 | }; | ||
57 | |||
58 | /* Checks if supplied name is IP address | 32 | /* Checks if supplied name is IP address |
59 | * returns: | 33 | * returns: |
60 | * 1 - name is IP | 34 | * 1 - name is IP |
61 | * 0 - name is not IP | 35 | * 0 - name is not IP |
62 | */ | 36 | */ |
63 | static int is_ip(const char *name) | 37 | static int |
38 | is_ip(const char *name) | ||
64 | { | 39 | { |
65 | int rc; | 40 | int rc; |
66 | struct sockaddr_in sin_server; | 41 | struct sockaddr_in sin_server; |
@@ -82,6 +57,47 @@ static int is_ip(const char *name) | |||
82 | return 0; | 57 | return 0; |
83 | } | 58 | } |
84 | 59 | ||
60 | static int | ||
61 | dns_resolver_instantiate(struct key *key, const void *data, | ||
62 | size_t datalen) | ||
63 | { | ||
64 | int rc = 0; | ||
65 | char *ip; | ||
66 | |||
67 | ip = kmalloc(datalen + 1, GFP_KERNEL); | ||
68 | if (!ip) | ||
69 | return -ENOMEM; | ||
70 | |||
71 | memcpy(ip, data, datalen); | ||
72 | ip[datalen] = '\0'; | ||
73 | |||
74 | /* make sure this looks like an address */ | ||
75 | if (!is_ip((const char *) ip)) { | ||
76 | kfree(ip); | ||
77 | return -EINVAL; | ||
78 | } | ||
79 | |||
80 | key->type_data.x[0] = datalen; | ||
81 | rcu_assign_pointer(key->payload.data, ip); | ||
82 | |||
83 | return rc; | ||
84 | } | ||
85 | |||
86 | static void | ||
87 | dns_resolver_destroy(struct key *key) | ||
88 | { | ||
89 | kfree(key->payload.data); | ||
90 | } | ||
91 | |||
92 | struct key_type key_type_dns_resolver = { | ||
93 | .name = "dns_resolver", | ||
94 | .def_datalen = sizeof(struct in_addr), | ||
95 | .describe = user_describe, | ||
96 | .instantiate = dns_resolver_instantiate, | ||
97 | .destroy = dns_resolver_destroy, | ||
98 | .match = user_match, | ||
99 | }; | ||
100 | |||
85 | /* Resolves server name to ip address. | 101 | /* Resolves server name to ip address. |
86 | * input: | 102 | * input: |
87 | * unc - server UNC | 103 | * unc - server UNC |
@@ -133,6 +149,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) | |||
133 | 149 | ||
134 | rkey = request_key(&key_type_dns_resolver, name, ""); | 150 | rkey = request_key(&key_type_dns_resolver, name, ""); |
135 | if (!IS_ERR(rkey)) { | 151 | if (!IS_ERR(rkey)) { |
152 | len = rkey->type_data.x[0]; | ||
136 | data = rkey->payload.data; | 153 | data = rkey->payload.data; |
137 | } else { | 154 | } else { |
138 | cERROR(1, ("%s: unable to resolve: %s", __func__, name)); | 155 | cERROR(1, ("%s: unable to resolve: %s", __func__, name)); |
@@ -141,11 +158,9 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) | |||
141 | 158 | ||
142 | skip_upcall: | 159 | skip_upcall: |
143 | if (data) { | 160 | if (data) { |
144 | len = strlen(data); | 161 | *ip_addr = kmalloc(len + 1, GFP_KERNEL); |
145 | *ip_addr = kmalloc(len+1, GFP_KERNEL); | ||
146 | if (*ip_addr) { | 162 | if (*ip_addr) { |
147 | memcpy(*ip_addr, data, len); | 163 | memcpy(*ip_addr, data, len + 1); |
148 | (*ip_addr)[len] = '\0'; | ||
149 | if (!IS_ERR(rkey)) | 164 | if (!IS_ERR(rkey)) |
150 | cFYI(1, ("%s: resolved: %s to %s", __func__, | 165 | cFYI(1, ("%s: resolved: %s to %s", __func__, |
151 | name, | 166 | name, |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ff14d14903a0..c4a8a0605125 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -107,7 +107,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, | |||
107 | 107 | ||
108 | /* want handles we can use to read with first | 108 | /* want handles we can use to read with first |
109 | in the list so we do not have to walk the | 109 | in the list so we do not have to walk the |
110 | list to search for one in prepare_write */ | 110 | list to search for one in write_begin */ |
111 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) { | 111 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) { |
112 | list_add_tail(&pCifsFile->flist, | 112 | list_add_tail(&pCifsFile->flist, |
113 | &pCifsInode->openFileList); | 113 | &pCifsInode->openFileList); |
@@ -833,6 +833,10 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
833 | return -EBADF; | 833 | return -EBADF; |
834 | open_file = (struct cifsFileInfo *) file->private_data; | 834 | open_file = (struct cifsFileInfo *) file->private_data; |
835 | 835 | ||
836 | rc = generic_write_checks(file, poffset, &write_size, 0); | ||
837 | if (rc) | ||
838 | return rc; | ||
839 | |||
836 | xid = GetXid(); | 840 | xid = GetXid(); |
837 | 841 | ||
838 | if (*poffset > file->f_path.dentry->d_inode->i_size) | 842 | if (*poffset > file->f_path.dentry->d_inode->i_size) |
@@ -911,7 +915,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
911 | } | 915 | } |
912 | 916 | ||
913 | static ssize_t cifs_write(struct file *file, const char *write_data, | 917 | static ssize_t cifs_write(struct file *file, const char *write_data, |
914 | size_t write_size, loff_t *poffset) | 918 | size_t write_size, loff_t *poffset) |
915 | { | 919 | { |
916 | int rc = 0; | 920 | int rc = 0; |
917 | unsigned int bytes_written = 0; | 921 | unsigned int bytes_written = 0; |
@@ -1061,6 +1065,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode) | |||
1061 | struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode) | 1065 | struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode) |
1062 | { | 1066 | { |
1063 | struct cifsFileInfo *open_file; | 1067 | struct cifsFileInfo *open_file; |
1068 | bool any_available = false; | ||
1064 | int rc; | 1069 | int rc; |
1065 | 1070 | ||
1066 | /* Having a null inode here (because mapping->host was set to zero by | 1071 | /* Having a null inode here (because mapping->host was set to zero by |
@@ -1076,8 +1081,10 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode) | |||
1076 | read_lock(&GlobalSMBSeslock); | 1081 | read_lock(&GlobalSMBSeslock); |
1077 | refind_writable: | 1082 | refind_writable: |
1078 | list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { | 1083 | list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { |
1079 | if (open_file->closePend) | 1084 | if (open_file->closePend || |
1085 | (!any_available && open_file->pid != current->tgid)) | ||
1080 | continue; | 1086 | continue; |
1087 | |||
1081 | if (open_file->pfile && | 1088 | if (open_file->pfile && |
1082 | ((open_file->pfile->f_flags & O_RDWR) || | 1089 | ((open_file->pfile->f_flags & O_RDWR) || |
1083 | (open_file->pfile->f_flags & O_WRONLY))) { | 1090 | (open_file->pfile->f_flags & O_WRONLY))) { |
@@ -1127,6 +1134,11 @@ refind_writable: | |||
1127 | of the loop here. */ | 1134 | of the loop here. */ |
1128 | } | 1135 | } |
1129 | } | 1136 | } |
1137 | /* couldn't find useable FH with same pid, try any available */ | ||
1138 | if (!any_available) { | ||
1139 | any_available = true; | ||
1140 | goto refind_writable; | ||
1141 | } | ||
1130 | read_unlock(&GlobalSMBSeslock); | 1142 | read_unlock(&GlobalSMBSeslock); |
1131 | return NULL; | 1143 | return NULL; |
1132 | } | 1144 | } |
@@ -1443,49 +1455,52 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc) | |||
1443 | return rc; | 1455 | return rc; |
1444 | } | 1456 | } |
1445 | 1457 | ||
1446 | static int cifs_commit_write(struct file *file, struct page *page, | 1458 | static int cifs_write_end(struct file *file, struct address_space *mapping, |
1447 | unsigned offset, unsigned to) | 1459 | loff_t pos, unsigned len, unsigned copied, |
1460 | struct page *page, void *fsdata) | ||
1448 | { | 1461 | { |
1449 | int xid; | 1462 | int rc; |
1450 | int rc = 0; | 1463 | struct inode *inode = mapping->host; |
1451 | struct inode *inode = page->mapping->host; | ||
1452 | loff_t position = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; | ||
1453 | char *page_data; | ||
1454 | 1464 | ||
1455 | xid = GetXid(); | 1465 | cFYI(1, ("write_end for page %p from pos %lld with %d bytes", |
1456 | cFYI(1, ("commit write for page %p up to position %lld for %d", | 1466 | page, pos, copied)); |
1457 | page, position, to)); | 1467 | |
1458 | spin_lock(&inode->i_lock); | 1468 | if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE) |
1459 | if (position > inode->i_size) | 1469 | SetPageUptodate(page); |
1460 | i_size_write(inode, position); | ||
1461 | 1470 | ||
1462 | spin_unlock(&inode->i_lock); | ||
1463 | if (!PageUptodate(page)) { | 1471 | if (!PageUptodate(page)) { |
1464 | position = ((loff_t)page->index << PAGE_CACHE_SHIFT) + offset; | 1472 | char *page_data; |
1465 | /* can not rely on (or let) writepage write this data */ | 1473 | unsigned offset = pos & (PAGE_CACHE_SIZE - 1); |
1466 | if (to < offset) { | 1474 | int xid; |
1467 | cFYI(1, ("Illegal offsets, can not copy from %d to %d", | 1475 | |
1468 | offset, to)); | 1476 | xid = GetXid(); |
1469 | FreeXid(xid); | ||
1470 | return rc; | ||
1471 | } | ||
1472 | /* this is probably better than directly calling | 1477 | /* this is probably better than directly calling |
1473 | partialpage_write since in this function the file handle is | 1478 | partialpage_write since in this function the file handle is |
1474 | known which we might as well leverage */ | 1479 | known which we might as well leverage */ |
1475 | /* BB check if anything else missing out of ppw | 1480 | /* BB check if anything else missing out of ppw |
1476 | such as updating last write time */ | 1481 | such as updating last write time */ |
1477 | page_data = kmap(page); | 1482 | page_data = kmap(page); |
1478 | rc = cifs_write(file, page_data + offset, to-offset, | 1483 | rc = cifs_write(file, page_data + offset, copied, &pos); |
1479 | &position); | 1484 | /* if (rc < 0) should we set writebehind rc? */ |
1480 | if (rc > 0) | ||
1481 | rc = 0; | ||
1482 | /* else if (rc < 0) should we set writebehind rc? */ | ||
1483 | kunmap(page); | 1485 | kunmap(page); |
1486 | |||
1487 | FreeXid(xid); | ||
1484 | } else { | 1488 | } else { |
1489 | rc = copied; | ||
1490 | pos += copied; | ||
1485 | set_page_dirty(page); | 1491 | set_page_dirty(page); |
1486 | } | 1492 | } |
1487 | 1493 | ||
1488 | FreeXid(xid); | 1494 | if (rc > 0) { |
1495 | spin_lock(&inode->i_lock); | ||
1496 | if (pos > inode->i_size) | ||
1497 | i_size_write(inode, pos); | ||
1498 | spin_unlock(&inode->i_lock); | ||
1499 | } | ||
1500 | |||
1501 | unlock_page(page); | ||
1502 | page_cache_release(page); | ||
1503 | |||
1489 | return rc; | 1504 | return rc; |
1490 | } | 1505 | } |
1491 | 1506 | ||
@@ -2031,49 +2046,44 @@ bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file) | |||
2031 | return true; | 2046 | return true; |
2032 | } | 2047 | } |
2033 | 2048 | ||
2034 | static int cifs_prepare_write(struct file *file, struct page *page, | 2049 | static int cifs_write_begin(struct file *file, struct address_space *mapping, |
2035 | unsigned from, unsigned to) | 2050 | loff_t pos, unsigned len, unsigned flags, |
2051 | struct page **pagep, void **fsdata) | ||
2036 | { | 2052 | { |
2037 | int rc = 0; | 2053 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
2038 | loff_t i_size; | 2054 | loff_t offset = pos & (PAGE_CACHE_SIZE - 1); |
2039 | loff_t offset; | 2055 | |
2056 | cFYI(1, ("write_begin from %lld len %d", (long long)pos, len)); | ||
2040 | 2057 | ||
2041 | cFYI(1, ("prepare write for page %p from %d to %d", page, from, to)); | 2058 | *pagep = __grab_cache_page(mapping, index); |
2042 | if (PageUptodate(page)) | 2059 | if (!*pagep) |
2060 | return -ENOMEM; | ||
2061 | |||
2062 | if (PageUptodate(*pagep)) | ||
2043 | return 0; | 2063 | return 0; |
2044 | 2064 | ||
2045 | /* If we are writing a full page it will be up to date, | 2065 | /* If we are writing a full page it will be up to date, |
2046 | no need to read from the server */ | 2066 | no need to read from the server */ |
2047 | if ((to == PAGE_CACHE_SIZE) && (from == 0)) { | 2067 | if (len == PAGE_CACHE_SIZE && flags & AOP_FLAG_UNINTERRUPTIBLE) |
2048 | SetPageUptodate(page); | ||
2049 | return 0; | 2068 | return 0; |
2050 | } | ||
2051 | 2069 | ||
2052 | offset = (loff_t)page->index << PAGE_CACHE_SHIFT; | 2070 | if ((file->f_flags & O_ACCMODE) != O_WRONLY) { |
2053 | i_size = i_size_read(page->mapping->host); | 2071 | int rc; |
2054 | 2072 | ||
2055 | if ((offset >= i_size) || | ||
2056 | ((from == 0) && (offset + to) >= i_size)) { | ||
2057 | /* | ||
2058 | * We don't need to read data beyond the end of the file. | ||
2059 | * zero it, and set the page uptodate | ||
2060 | */ | ||
2061 | simple_prepare_write(file, page, from, to); | ||
2062 | SetPageUptodate(page); | ||
2063 | } else if ((file->f_flags & O_ACCMODE) != O_WRONLY) { | ||
2064 | /* might as well read a page, it is fast enough */ | 2073 | /* might as well read a page, it is fast enough */ |
2065 | rc = cifs_readpage_worker(file, page, &offset); | 2074 | rc = cifs_readpage_worker(file, *pagep, &offset); |
2075 | |||
2076 | /* we do not need to pass errors back | ||
2077 | e.g. if we do not have read access to the file | ||
2078 | because cifs_write_end will attempt synchronous writes | ||
2079 | -- shaggy */ | ||
2066 | } else { | 2080 | } else { |
2067 | /* we could try using another file handle if there is one - | 2081 | /* we could try using another file handle if there is one - |
2068 | but how would we lock it to prevent close of that handle | 2082 | but how would we lock it to prevent close of that handle |
2069 | racing with this read? In any case | 2083 | racing with this read? In any case |
2070 | this will be written out by commit_write so is fine */ | 2084 | this will be written out by write_end so is fine */ |
2071 | } | 2085 | } |
2072 | 2086 | ||
2073 | /* we do not need to pass errors back | ||
2074 | e.g. if we do not have read access to the file | ||
2075 | because cifs_commit_write will do the right thing. -- shaggy */ | ||
2076 | |||
2077 | return 0; | 2087 | return 0; |
2078 | } | 2088 | } |
2079 | 2089 | ||
@@ -2082,8 +2092,8 @@ const struct address_space_operations cifs_addr_ops = { | |||
2082 | .readpages = cifs_readpages, | 2092 | .readpages = cifs_readpages, |
2083 | .writepage = cifs_writepage, | 2093 | .writepage = cifs_writepage, |
2084 | .writepages = cifs_writepages, | 2094 | .writepages = cifs_writepages, |
2085 | .prepare_write = cifs_prepare_write, | 2095 | .write_begin = cifs_write_begin, |
2086 | .commit_write = cifs_commit_write, | 2096 | .write_end = cifs_write_end, |
2087 | .set_page_dirty = __set_page_dirty_nobuffers, | 2097 | .set_page_dirty = __set_page_dirty_nobuffers, |
2088 | /* .sync_page = cifs_sync_page, */ | 2098 | /* .sync_page = cifs_sync_page, */ |
2089 | /* .direct_IO = */ | 2099 | /* .direct_IO = */ |
@@ -2098,8 +2108,8 @@ const struct address_space_operations cifs_addr_ops_smallbuf = { | |||
2098 | .readpage = cifs_readpage, | 2108 | .readpage = cifs_readpage, |
2099 | .writepage = cifs_writepage, | 2109 | .writepage = cifs_writepage, |
2100 | .writepages = cifs_writepages, | 2110 | .writepages = cifs_writepages, |
2101 | .prepare_write = cifs_prepare_write, | 2111 | .write_begin = cifs_write_begin, |
2102 | .commit_write = cifs_commit_write, | 2112 | .write_end = cifs_write_end, |
2103 | .set_page_dirty = __set_page_dirty_nobuffers, | 2113 | .set_page_dirty = __set_page_dirty_nobuffers, |
2104 | /* .sync_page = cifs_sync_page, */ | 2114 | /* .sync_page = cifs_sync_page, */ |
2105 | /* .direct_IO = */ | 2115 | /* .direct_IO = */ |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 28a22092d450..a8c833345fc9 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -546,7 +546,8 @@ int cifs_get_inode_info(struct inode **pinode, | |||
546 | if ((inode->i_mode & S_IWUGO) == 0 && | 546 | if ((inode->i_mode & S_IWUGO) == 0 && |
547 | (attr & ATTR_READONLY) == 0) | 547 | (attr & ATTR_READONLY) == 0) |
548 | inode->i_mode |= (S_IWUGO & default_mode); | 548 | inode->i_mode |= (S_IWUGO & default_mode); |
549 | inode->i_mode &= ~S_IFMT; | 549 | |
550 | inode->i_mode &= ~S_IFMT; | ||
550 | } | 551 | } |
551 | /* clear write bits if ATTR_READONLY is set */ | 552 | /* clear write bits if ATTR_READONLY is set */ |
552 | if (attr & ATTR_READONLY) | 553 | if (attr & ATTR_READONLY) |
@@ -649,6 +650,7 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino) | |||
649 | inode->i_fop = &simple_dir_operations; | 650 | inode->i_fop = &simple_dir_operations; |
650 | inode->i_uid = cifs_sb->mnt_uid; | 651 | inode->i_uid = cifs_sb->mnt_uid; |
651 | inode->i_gid = cifs_sb->mnt_gid; | 652 | inode->i_gid = cifs_sb->mnt_gid; |
653 | } else if (rc) { | ||
652 | _FreeXid(xid); | 654 | _FreeXid(xid); |
653 | iget_failed(inode); | 655 | iget_failed(inode); |
654 | return ERR_PTR(rc); | 656 | return ERR_PTR(rc); |
@@ -663,40 +665,201 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino) | |||
663 | return inode; | 665 | return inode; |
664 | } | 666 | } |
665 | 667 | ||
666 | int cifs_unlink(struct inode *inode, struct dentry *direntry) | 668 | static int |
669 | cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid, | ||
670 | char *full_path, __u32 dosattr) | ||
671 | { | ||
672 | int rc; | ||
673 | int oplock = 0; | ||
674 | __u16 netfid; | ||
675 | __u32 netpid; | ||
676 | bool set_time = false; | ||
677 | struct cifsFileInfo *open_file; | ||
678 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | ||
679 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
680 | struct cifsTconInfo *pTcon = cifs_sb->tcon; | ||
681 | FILE_BASIC_INFO info_buf; | ||
682 | |||
683 | if (attrs->ia_valid & ATTR_ATIME) { | ||
684 | set_time = true; | ||
685 | info_buf.LastAccessTime = | ||
686 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); | ||
687 | } else | ||
688 | info_buf.LastAccessTime = 0; | ||
689 | |||
690 | if (attrs->ia_valid & ATTR_MTIME) { | ||
691 | set_time = true; | ||
692 | info_buf.LastWriteTime = | ||
693 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime)); | ||
694 | } else | ||
695 | info_buf.LastWriteTime = 0; | ||
696 | |||
697 | /* | ||
698 | * Samba throws this field away, but windows may actually use it. | ||
699 | * Do not set ctime unless other time stamps are changed explicitly | ||
700 | * (i.e. by utimes()) since we would then have a mix of client and | ||
701 | * server times. | ||
702 | */ | ||
703 | if (set_time && (attrs->ia_valid & ATTR_CTIME)) { | ||
704 | cFYI(1, ("CIFS - CTIME changed")); | ||
705 | info_buf.ChangeTime = | ||
706 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); | ||
707 | } else | ||
708 | info_buf.ChangeTime = 0; | ||
709 | |||
710 | info_buf.CreationTime = 0; /* don't change */ | ||
711 | info_buf.Attributes = cpu_to_le32(dosattr); | ||
712 | |||
713 | /* | ||
714 | * If the file is already open for write, just use that fileid | ||
715 | */ | ||
716 | open_file = find_writable_file(cifsInode); | ||
717 | if (open_file) { | ||
718 | netfid = open_file->netfid; | ||
719 | netpid = open_file->pid; | ||
720 | goto set_via_filehandle; | ||
721 | } | ||
722 | |||
723 | /* | ||
724 | * NT4 apparently returns success on this call, but it doesn't | ||
725 | * really work. | ||
726 | */ | ||
727 | if (!(pTcon->ses->flags & CIFS_SES_NT4)) { | ||
728 | rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, | ||
729 | &info_buf, cifs_sb->local_nls, | ||
730 | cifs_sb->mnt_cifs_flags & | ||
731 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
732 | if (rc == 0) { | ||
733 | cifsInode->cifsAttrs = dosattr; | ||
734 | goto out; | ||
735 | } else if (rc != -EOPNOTSUPP && rc != -EINVAL) | ||
736 | goto out; | ||
737 | } | ||
738 | |||
739 | cFYI(1, ("calling SetFileInfo since SetPathInfo for " | ||
740 | "times not supported by this server")); | ||
741 | rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, | ||
742 | SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, | ||
743 | CREATE_NOT_DIR, &netfid, &oplock, | ||
744 | NULL, cifs_sb->local_nls, | ||
745 | cifs_sb->mnt_cifs_flags & | ||
746 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
747 | |||
748 | if (rc != 0) { | ||
749 | if (rc == -EIO) | ||
750 | rc = -EINVAL; | ||
751 | goto out; | ||
752 | } | ||
753 | |||
754 | netpid = current->tgid; | ||
755 | |||
756 | set_via_filehandle: | ||
757 | rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid); | ||
758 | if (!rc) | ||
759 | cifsInode->cifsAttrs = dosattr; | ||
760 | |||
761 | if (open_file == NULL) | ||
762 | CIFSSMBClose(xid, pTcon, netfid); | ||
763 | else | ||
764 | atomic_dec(&open_file->wrtPending); | ||
765 | out: | ||
766 | return rc; | ||
767 | } | ||
768 | |||
769 | /* | ||
770 | * open the given file (if it isn't already), set the DELETE_ON_CLOSE bit | ||
771 | * and rename it to a random name that hopefully won't conflict with | ||
772 | * anything else. | ||
773 | */ | ||
774 | static int | ||
775 | cifs_rename_pending_delete(char *full_path, struct inode *inode, int xid) | ||
776 | { | ||
777 | int oplock = 0; | ||
778 | int rc; | ||
779 | __u16 netfid; | ||
780 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | ||
781 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
782 | struct cifsTconInfo *tcon = cifs_sb->tcon; | ||
783 | __u32 dosattr; | ||
784 | FILE_BASIC_INFO *info_buf; | ||
785 | |||
786 | rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, | ||
787 | DELETE|FILE_WRITE_ATTRIBUTES, | ||
788 | CREATE_NOT_DIR|CREATE_DELETE_ON_CLOSE, | ||
789 | &netfid, &oplock, NULL, cifs_sb->local_nls, | ||
790 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
791 | if (rc != 0) | ||
792 | goto out; | ||
793 | |||
794 | /* set ATTR_HIDDEN and clear ATTR_READONLY */ | ||
795 | cifsInode = CIFS_I(inode); | ||
796 | dosattr = cifsInode->cifsAttrs & ~ATTR_READONLY; | ||
797 | if (dosattr == 0) | ||
798 | dosattr |= ATTR_NORMAL; | ||
799 | dosattr |= ATTR_HIDDEN; | ||
800 | |||
801 | info_buf = kzalloc(sizeof(*info_buf), GFP_KERNEL); | ||
802 | if (info_buf == NULL) { | ||
803 | rc = -ENOMEM; | ||
804 | goto out_close; | ||
805 | } | ||
806 | info_buf->Attributes = cpu_to_le32(dosattr); | ||
807 | rc = CIFSSMBSetFileInfo(xid, tcon, info_buf, netfid, current->tgid); | ||
808 | kfree(info_buf); | ||
809 | if (rc != 0) | ||
810 | goto out_close; | ||
811 | cifsInode->cifsAttrs = dosattr; | ||
812 | |||
813 | /* silly-rename the file */ | ||
814 | CIFSSMBRenameOpenFile(xid, tcon, netfid, NULL, cifs_sb->local_nls, | ||
815 | cifs_sb->mnt_cifs_flags & | ||
816 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
817 | |||
818 | /* set DELETE_ON_CLOSE */ | ||
819 | rc = CIFSSMBSetFileDisposition(xid, tcon, true, netfid, current->tgid); | ||
820 | |||
821 | /* | ||
822 | * some samba versions return -ENOENT when we try to set the file | ||
823 | * disposition here. Likely a samba bug, but work around it for now | ||
824 | */ | ||
825 | if (rc == -ENOENT) | ||
826 | rc = 0; | ||
827 | |||
828 | out_close: | ||
829 | CIFSSMBClose(xid, tcon, netfid); | ||
830 | out: | ||
831 | return rc; | ||
832 | } | ||
833 | |||
834 | int cifs_unlink(struct inode *dir, struct dentry *dentry) | ||
667 | { | 835 | { |
668 | int rc = 0; | 836 | int rc = 0; |
669 | int xid; | 837 | int xid; |
670 | struct cifs_sb_info *cifs_sb; | ||
671 | struct cifsTconInfo *pTcon; | ||
672 | char *full_path = NULL; | 838 | char *full_path = NULL; |
673 | struct cifsInodeInfo *cifsInode; | 839 | struct inode *inode = dentry->d_inode; |
674 | FILE_BASIC_INFO *pinfo_buf; | 840 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); |
841 | struct super_block *sb = dir->i_sb; | ||
842 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | ||
843 | struct cifsTconInfo *tcon = cifs_sb->tcon; | ||
844 | struct iattr *attrs = NULL; | ||
845 | __u32 dosattr = 0, origattr = 0; | ||
675 | 846 | ||
676 | cFYI(1, ("cifs_unlink, inode = 0x%p", inode)); | 847 | cFYI(1, ("cifs_unlink, dir=0x%p, dentry=0x%p", dir, dentry)); |
677 | 848 | ||
678 | xid = GetXid(); | 849 | xid = GetXid(); |
679 | 850 | ||
680 | if (inode) | 851 | /* Unlink can be called from rename so we can not take the |
681 | cifs_sb = CIFS_SB(inode->i_sb); | 852 | * sb->s_vfs_rename_mutex here */ |
682 | else | 853 | full_path = build_path_from_dentry(dentry); |
683 | cifs_sb = CIFS_SB(direntry->d_sb); | ||
684 | pTcon = cifs_sb->tcon; | ||
685 | |||
686 | /* Unlink can be called from rename so we can not grab the sem here | ||
687 | since we deadlock otherwise */ | ||
688 | /* mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);*/ | ||
689 | full_path = build_path_from_dentry(direntry); | ||
690 | /* mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);*/ | ||
691 | if (full_path == NULL) { | 854 | if (full_path == NULL) { |
692 | FreeXid(xid); | 855 | FreeXid(xid); |
693 | return -ENOMEM; | 856 | return -ENOMEM; |
694 | } | 857 | } |
695 | 858 | ||
696 | if ((pTcon->ses->capabilities & CAP_UNIX) && | 859 | if ((tcon->ses->capabilities & CAP_UNIX) && |
697 | (CIFS_UNIX_POSIX_PATH_OPS_CAP & | 860 | (CIFS_UNIX_POSIX_PATH_OPS_CAP & |
698 | le64_to_cpu(pTcon->fsUnixInfo.Capability))) { | 861 | le64_to_cpu(tcon->fsUnixInfo.Capability))) { |
699 | rc = CIFSPOSIXDelFile(xid, pTcon, full_path, | 862 | rc = CIFSPOSIXDelFile(xid, tcon, full_path, |
700 | SMB_POSIX_UNLINK_FILE_TARGET, cifs_sb->local_nls, | 863 | SMB_POSIX_UNLINK_FILE_TARGET, cifs_sb->local_nls, |
701 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 864 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
702 | cFYI(1, ("posix del rc %d", rc)); | 865 | cFYI(1, ("posix del rc %d", rc)); |
@@ -704,125 +867,60 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) | |||
704 | goto psx_del_no_retry; | 867 | goto psx_del_no_retry; |
705 | } | 868 | } |
706 | 869 | ||
707 | rc = CIFSSMBDelFile(xid, pTcon, full_path, cifs_sb->local_nls, | 870 | retry_std_delete: |
871 | rc = CIFSSMBDelFile(xid, tcon, full_path, cifs_sb->local_nls, | ||
708 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 872 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
873 | |||
709 | psx_del_no_retry: | 874 | psx_del_no_retry: |
710 | if (!rc) { | 875 | if (!rc) { |
711 | if (direntry->d_inode) | 876 | if (inode) |
712 | drop_nlink(direntry->d_inode); | 877 | drop_nlink(inode); |
713 | } else if (rc == -ENOENT) { | 878 | } else if (rc == -ENOENT) { |
714 | d_drop(direntry); | 879 | d_drop(dentry); |
715 | } else if (rc == -ETXTBSY) { | 880 | } else if (rc == -ETXTBSY) { |
716 | int oplock = 0; | 881 | rc = cifs_rename_pending_delete(full_path, inode, xid); |
717 | __u16 netfid; | 882 | if (rc == 0) |
718 | 883 | drop_nlink(inode); | |
719 | rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, DELETE, | 884 | } else if (rc == -EACCES && dosattr == 0) { |
720 | CREATE_NOT_DIR | CREATE_DELETE_ON_CLOSE, | 885 | attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); |
721 | &netfid, &oplock, NULL, cifs_sb->local_nls, | 886 | if (attrs == NULL) { |
722 | cifs_sb->mnt_cifs_flags & | 887 | rc = -ENOMEM; |
723 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 888 | goto out_reval; |
724 | if (rc == 0) { | ||
725 | CIFSSMBRenameOpenFile(xid, pTcon, netfid, NULL, | ||
726 | cifs_sb->local_nls, | ||
727 | cifs_sb->mnt_cifs_flags & | ||
728 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
729 | CIFSSMBClose(xid, pTcon, netfid); | ||
730 | if (direntry->d_inode) | ||
731 | drop_nlink(direntry->d_inode); | ||
732 | } | 889 | } |
733 | } else if (rc == -EACCES) { | ||
734 | /* try only if r/o attribute set in local lookup data? */ | ||
735 | pinfo_buf = kzalloc(sizeof(FILE_BASIC_INFO), GFP_KERNEL); | ||
736 | if (pinfo_buf) { | ||
737 | /* ATTRS set to normal clears r/o bit */ | ||
738 | pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL); | ||
739 | if (!(pTcon->ses->flags & CIFS_SES_NT4)) | ||
740 | rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, | ||
741 | pinfo_buf, | ||
742 | cifs_sb->local_nls, | ||
743 | cifs_sb->mnt_cifs_flags & | ||
744 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
745 | else | ||
746 | rc = -EOPNOTSUPP; | ||
747 | 890 | ||
748 | if (rc == -EOPNOTSUPP) { | 891 | /* try to reset dos attributes */ |
749 | int oplock = 0; | 892 | origattr = cifsInode->cifsAttrs; |
750 | __u16 netfid; | 893 | if (origattr == 0) |
751 | /* rc = CIFSSMBSetAttrLegacy(xid, pTcon, | 894 | origattr |= ATTR_NORMAL; |
752 | full_path, | 895 | dosattr = origattr & ~ATTR_READONLY; |
753 | (__u16)ATTR_NORMAL, | 896 | if (dosattr == 0) |
754 | cifs_sb->local_nls); | 897 | dosattr |= ATTR_NORMAL; |
755 | For some strange reason it seems that NT4 eats the | 898 | dosattr |= ATTR_HIDDEN; |
756 | old setattr call without actually setting the | 899 | |
757 | attributes so on to the third attempted workaround | 900 | rc = cifs_set_file_info(inode, attrs, xid, full_path, dosattr); |
758 | */ | 901 | if (rc != 0) |
759 | 902 | goto out_reval; | |
760 | /* BB could scan to see if we already have it open | 903 | |
761 | and pass in pid of opener to function */ | 904 | goto retry_std_delete; |
762 | rc = CIFSSMBOpen(xid, pTcon, full_path, | ||
763 | FILE_OPEN, SYNCHRONIZE | | ||
764 | FILE_WRITE_ATTRIBUTES, 0, | ||
765 | &netfid, &oplock, NULL, | ||
766 | cifs_sb->local_nls, | ||
767 | cifs_sb->mnt_cifs_flags & | ||
768 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
769 | if (rc == 0) { | ||
770 | rc = CIFSSMBSetFileInfo(xid, pTcon, | ||
771 | pinfo_buf, | ||
772 | netfid, | ||
773 | current->tgid); | ||
774 | CIFSSMBClose(xid, pTcon, netfid); | ||
775 | } | ||
776 | } | ||
777 | kfree(pinfo_buf); | ||
778 | } | ||
779 | if (rc == 0) { | ||
780 | rc = CIFSSMBDelFile(xid, pTcon, full_path, | ||
781 | cifs_sb->local_nls, | ||
782 | cifs_sb->mnt_cifs_flags & | ||
783 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
784 | if (!rc) { | ||
785 | if (direntry->d_inode) | ||
786 | drop_nlink(direntry->d_inode); | ||
787 | } else if (rc == -ETXTBSY) { | ||
788 | int oplock = 0; | ||
789 | __u16 netfid; | ||
790 | |||
791 | rc = CIFSSMBOpen(xid, pTcon, full_path, | ||
792 | FILE_OPEN, DELETE, | ||
793 | CREATE_NOT_DIR | | ||
794 | CREATE_DELETE_ON_CLOSE, | ||
795 | &netfid, &oplock, NULL, | ||
796 | cifs_sb->local_nls, | ||
797 | cifs_sb->mnt_cifs_flags & | ||
798 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
799 | if (rc == 0) { | ||
800 | CIFSSMBRenameOpenFile(xid, pTcon, | ||
801 | netfid, NULL, | ||
802 | cifs_sb->local_nls, | ||
803 | cifs_sb->mnt_cifs_flags & | ||
804 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
805 | CIFSSMBClose(xid, pTcon, netfid); | ||
806 | if (direntry->d_inode) | ||
807 | drop_nlink(direntry->d_inode); | ||
808 | } | ||
809 | /* BB if rc = -ETXTBUSY goto the rename logic BB */ | ||
810 | } | ||
811 | } | ||
812 | } | ||
813 | if (direntry->d_inode) { | ||
814 | cifsInode = CIFS_I(direntry->d_inode); | ||
815 | cifsInode->time = 0; /* will force revalidate to get info | ||
816 | when needed */ | ||
817 | direntry->d_inode->i_ctime = current_fs_time(inode->i_sb); | ||
818 | } | 905 | } |
906 | |||
907 | /* undo the setattr if we errored out and it's needed */ | ||
908 | if (rc != 0 && dosattr != 0) | ||
909 | cifs_set_file_info(inode, attrs, xid, full_path, origattr); | ||
910 | |||
911 | out_reval: | ||
819 | if (inode) { | 912 | if (inode) { |
820 | inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb); | ||
821 | cifsInode = CIFS_I(inode); | 913 | cifsInode = CIFS_I(inode); |
822 | cifsInode->time = 0; /* force revalidate of dir as well */ | 914 | cifsInode->time = 0; /* will force revalidate to get info |
915 | when needed */ | ||
916 | inode->i_ctime = current_fs_time(sb); | ||
823 | } | 917 | } |
918 | dir->i_ctime = dir->i_mtime = current_fs_time(sb); | ||
919 | cifsInode = CIFS_I(dir); | ||
920 | CIFS_I(dir)->time = 0; /* force revalidate of dir as well */ | ||
824 | 921 | ||
825 | kfree(full_path); | 922 | kfree(full_path); |
923 | kfree(attrs); | ||
826 | FreeXid(xid); | 924 | FreeXid(xid); |
827 | return rc; | 925 | return rc; |
828 | } | 926 | } |
@@ -867,7 +965,7 @@ static void posix_fill_in_inode(struct inode *tmp_inode, | |||
867 | 965 | ||
868 | int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) | 966 | int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) |
869 | { | 967 | { |
870 | int rc = 0; | 968 | int rc = 0, tmprc; |
871 | int xid; | 969 | int xid; |
872 | struct cifs_sb_info *cifs_sb; | 970 | struct cifs_sb_info *cifs_sb; |
873 | struct cifsTconInfo *pTcon; | 971 | struct cifsTconInfo *pTcon; |
@@ -929,6 +1027,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) | |||
929 | kfree(pInfo); | 1027 | kfree(pInfo); |
930 | goto mkdir_get_info; | 1028 | goto mkdir_get_info; |
931 | } | 1029 | } |
1030 | |||
932 | /* Is an i_ino of zero legal? */ | 1031 | /* Is an i_ino of zero legal? */ |
933 | /* Are there sanity checks we can use to ensure that | 1032 | /* Are there sanity checks we can use to ensure that |
934 | the server is really filling in that field? */ | 1033 | the server is really filling in that field? */ |
@@ -1017,12 +1116,20 @@ mkdir_get_info: | |||
1017 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && | 1116 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && |
1018 | (mode & S_IWUGO) == 0) { | 1117 | (mode & S_IWUGO) == 0) { |
1019 | FILE_BASIC_INFO pInfo; | 1118 | FILE_BASIC_INFO pInfo; |
1119 | struct cifsInodeInfo *cifsInode; | ||
1120 | u32 dosattrs; | ||
1121 | |||
1020 | memset(&pInfo, 0, sizeof(pInfo)); | 1122 | memset(&pInfo, 0, sizeof(pInfo)); |
1021 | pInfo.Attributes = cpu_to_le32(ATTR_READONLY); | 1123 | cifsInode = CIFS_I(newinode); |
1022 | CIFSSMBSetPathInfo(xid, pTcon, full_path, | 1124 | dosattrs = cifsInode->cifsAttrs|ATTR_READONLY; |
1023 | &pInfo, cifs_sb->local_nls, | 1125 | pInfo.Attributes = cpu_to_le32(dosattrs); |
1126 | tmprc = CIFSSMBSetPathInfo(xid, pTcon, | ||
1127 | full_path, &pInfo, | ||
1128 | cifs_sb->local_nls, | ||
1024 | cifs_sb->mnt_cifs_flags & | 1129 | cifs_sb->mnt_cifs_flags & |
1025 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1130 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
1131 | if (tmprc == 0) | ||
1132 | cifsInode->cifsAttrs = dosattrs; | ||
1026 | } | 1133 | } |
1027 | if (direntry->d_inode) { | 1134 | if (direntry->d_inode) { |
1028 | if (cifs_sb->mnt_cifs_flags & | 1135 | if (cifs_sb->mnt_cifs_flags & |
@@ -1094,117 +1201,141 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) | |||
1094 | return rc; | 1201 | return rc; |
1095 | } | 1202 | } |
1096 | 1203 | ||
1204 | static int | ||
1205 | cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath, | ||
1206 | struct dentry *to_dentry, const char *toPath) | ||
1207 | { | ||
1208 | struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb); | ||
1209 | struct cifsTconInfo *pTcon = cifs_sb->tcon; | ||
1210 | __u16 srcfid; | ||
1211 | int oplock, rc; | ||
1212 | |||
1213 | /* try path-based rename first */ | ||
1214 | rc = CIFSSMBRename(xid, pTcon, fromPath, toPath, cifs_sb->local_nls, | ||
1215 | cifs_sb->mnt_cifs_flags & | ||
1216 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1217 | |||
1218 | /* | ||
1219 | * don't bother with rename by filehandle unless file is busy and | ||
1220 | * source Note that cross directory moves do not work with | ||
1221 | * rename by filehandle to various Windows servers. | ||
1222 | */ | ||
1223 | if (rc == 0 || rc != -ETXTBSY) | ||
1224 | return rc; | ||
1225 | |||
1226 | /* open the file to be renamed -- we need DELETE perms */ | ||
1227 | rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE, | ||
1228 | CREATE_NOT_DIR, &srcfid, &oplock, NULL, | ||
1229 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | ||
1230 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1231 | |||
1232 | if (rc == 0) { | ||
1233 | rc = CIFSSMBRenameOpenFile(xid, pTcon, srcfid, | ||
1234 | (const char *) to_dentry->d_name.name, | ||
1235 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | ||
1236 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1237 | |||
1238 | CIFSSMBClose(xid, pTcon, srcfid); | ||
1239 | } | ||
1240 | |||
1241 | return rc; | ||
1242 | } | ||
1243 | |||
1097 | int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, | 1244 | int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, |
1098 | struct inode *target_inode, struct dentry *target_direntry) | 1245 | struct inode *target_inode, struct dentry *target_direntry) |
1099 | { | 1246 | { |
1100 | char *fromName; | 1247 | char *fromName = NULL; |
1101 | char *toName; | 1248 | char *toName = NULL; |
1102 | struct cifs_sb_info *cifs_sb_source; | 1249 | struct cifs_sb_info *cifs_sb_source; |
1103 | struct cifs_sb_info *cifs_sb_target; | 1250 | struct cifs_sb_info *cifs_sb_target; |
1104 | struct cifsTconInfo *pTcon; | 1251 | struct cifsTconInfo *pTcon; |
1252 | FILE_UNIX_BASIC_INFO *info_buf_source = NULL; | ||
1253 | FILE_UNIX_BASIC_INFO *info_buf_target; | ||
1105 | int xid; | 1254 | int xid; |
1106 | int rc = 0; | 1255 | int rc; |
1107 | |||
1108 | xid = GetXid(); | ||
1109 | 1256 | ||
1110 | cifs_sb_target = CIFS_SB(target_inode->i_sb); | 1257 | cifs_sb_target = CIFS_SB(target_inode->i_sb); |
1111 | cifs_sb_source = CIFS_SB(source_inode->i_sb); | 1258 | cifs_sb_source = CIFS_SB(source_inode->i_sb); |
1112 | pTcon = cifs_sb_source->tcon; | 1259 | pTcon = cifs_sb_source->tcon; |
1113 | 1260 | ||
1261 | xid = GetXid(); | ||
1262 | |||
1263 | /* | ||
1264 | * BB: this might be allowed if same server, but different share. | ||
1265 | * Consider adding support for this | ||
1266 | */ | ||
1114 | if (pTcon != cifs_sb_target->tcon) { | 1267 | if (pTcon != cifs_sb_target->tcon) { |
1115 | FreeXid(xid); | 1268 | rc = -EXDEV; |
1116 | return -EXDEV; /* BB actually could be allowed if same server, | 1269 | goto cifs_rename_exit; |
1117 | but different share. | ||
1118 | Might eventually add support for this */ | ||
1119 | } | 1270 | } |
1120 | 1271 | ||
1121 | /* we already have the rename sem so we do not need to grab it again | 1272 | /* |
1122 | here to protect the path integrity */ | 1273 | * we already have the rename sem so we do not need to |
1274 | * grab it again here to protect the path integrity | ||
1275 | */ | ||
1123 | fromName = build_path_from_dentry(source_direntry); | 1276 | fromName = build_path_from_dentry(source_direntry); |
1277 | if (fromName == NULL) { | ||
1278 | rc = -ENOMEM; | ||
1279 | goto cifs_rename_exit; | ||
1280 | } | ||
1281 | |||
1124 | toName = build_path_from_dentry(target_direntry); | 1282 | toName = build_path_from_dentry(target_direntry); |
1125 | if ((fromName == NULL) || (toName == NULL)) { | 1283 | if (toName == NULL) { |
1126 | rc = -ENOMEM; | 1284 | rc = -ENOMEM; |
1127 | goto cifs_rename_exit; | 1285 | goto cifs_rename_exit; |
1128 | } | 1286 | } |
1129 | 1287 | ||
1130 | rc = CIFSSMBRename(xid, pTcon, fromName, toName, | 1288 | rc = cifs_do_rename(xid, source_direntry, fromName, |
1131 | cifs_sb_source->local_nls, | 1289 | target_direntry, toName); |
1132 | cifs_sb_source->mnt_cifs_flags & | 1290 | |
1133 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1134 | if (rc == -EEXIST) { | 1291 | if (rc == -EEXIST) { |
1135 | /* check if they are the same file because rename of hardlinked | 1292 | if (pTcon->unix_ext) { |
1136 | files is a noop */ | 1293 | /* |
1137 | FILE_UNIX_BASIC_INFO *info_buf_source; | 1294 | * Are src and dst hardlinks of same inode? We can |
1138 | FILE_UNIX_BASIC_INFO *info_buf_target; | 1295 | * only tell with unix extensions enabled |
1139 | 1296 | */ | |
1140 | info_buf_source = | 1297 | info_buf_source = |
1141 | kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); | 1298 | kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), |
1142 | if (info_buf_source != NULL) { | 1299 | GFP_KERNEL); |
1300 | if (info_buf_source == NULL) | ||
1301 | goto unlink_target; | ||
1302 | |||
1143 | info_buf_target = info_buf_source + 1; | 1303 | info_buf_target = info_buf_source + 1; |
1144 | if (pTcon->unix_ext) | 1304 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName, |
1145 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName, | 1305 | info_buf_source, |
1146 | info_buf_source, | 1306 | cifs_sb_source->local_nls, |
1147 | cifs_sb_source->local_nls, | 1307 | cifs_sb_source->mnt_cifs_flags & |
1148 | cifs_sb_source->mnt_cifs_flags & | ||
1149 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1308 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
1150 | /* else rc is still EEXIST so will fall through to | 1309 | if (rc != 0) |
1151 | unlink the target and retry rename */ | 1310 | goto unlink_target; |
1152 | if (rc == 0) { | 1311 | |
1153 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, toName, | 1312 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, |
1154 | info_buf_target, | 1313 | toName, info_buf_target, |
1155 | cifs_sb_target->local_nls, | 1314 | cifs_sb_target->local_nls, |
1156 | /* remap based on source sb */ | 1315 | /* remap based on source sb */ |
1157 | cifs_sb_source->mnt_cifs_flags & | 1316 | cifs_sb_source->mnt_cifs_flags & |
1158 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1159 | } | ||
1160 | if ((rc == 0) && | ||
1161 | (info_buf_source->UniqueId == | ||
1162 | info_buf_target->UniqueId)) { | ||
1163 | /* do not rename since the files are hardlinked which | ||
1164 | is a noop */ | ||
1165 | } else { | ||
1166 | /* we either can not tell the files are hardlinked | ||
1167 | (as with Windows servers) or files are not | ||
1168 | hardlinked so delete the target manually before | ||
1169 | renaming to follow POSIX rather than Windows | ||
1170 | semantics */ | ||
1171 | cifs_unlink(target_inode, target_direntry); | ||
1172 | rc = CIFSSMBRename(xid, pTcon, fromName, | ||
1173 | toName, | ||
1174 | cifs_sb_source->local_nls, | ||
1175 | cifs_sb_source->mnt_cifs_flags | ||
1176 | & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1177 | } | ||
1178 | kfree(info_buf_source); | ||
1179 | } /* if we can not get memory just leave rc as EEXIST */ | ||
1180 | } | ||
1181 | |||
1182 | if (rc) | ||
1183 | cFYI(1, ("rename rc %d", rc)); | ||
1184 | |||
1185 | if ((rc == -EIO) || (rc == -EEXIST)) { | ||
1186 | int oplock = 0; | ||
1187 | __u16 netfid; | ||
1188 | |||
1189 | /* BB FIXME Is Generic Read correct for rename? */ | ||
1190 | /* if renaming directory - we should not say CREATE_NOT_DIR, | ||
1191 | need to test renaming open directory, also GENERIC_READ | ||
1192 | might not right be right access to request */ | ||
1193 | rc = CIFSSMBOpen(xid, pTcon, fromName, FILE_OPEN, GENERIC_READ, | ||
1194 | CREATE_NOT_DIR, &netfid, &oplock, NULL, | ||
1195 | cifs_sb_source->local_nls, | ||
1196 | cifs_sb_source->mnt_cifs_flags & | ||
1197 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1198 | if (rc == 0) { | ||
1199 | rc = CIFSSMBRenameOpenFile(xid, pTcon, netfid, toName, | ||
1200 | cifs_sb_source->local_nls, | ||
1201 | cifs_sb_source->mnt_cifs_flags & | ||
1202 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1317 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
1203 | CIFSSMBClose(xid, pTcon, netfid); | 1318 | |
1204 | } | 1319 | if (rc == 0 && (info_buf_source->UniqueId == |
1320 | info_buf_target->UniqueId)) | ||
1321 | /* same file, POSIX says that this is a noop */ | ||
1322 | goto cifs_rename_exit; | ||
1323 | } /* else ... BB we could add the same check for Windows by | ||
1324 | checking the UniqueId via FILE_INTERNAL_INFO */ | ||
1325 | unlink_target: | ||
1326 | /* | ||
1327 | * we either can not tell the files are hardlinked (as with | ||
1328 | * Windows servers) or files are not hardlinked. Delete the | ||
1329 | * target manually before renaming to follow POSIX rather than | ||
1330 | * Windows semantics | ||
1331 | */ | ||
1332 | cifs_unlink(target_inode, target_direntry); | ||
1333 | rc = cifs_do_rename(xid, source_direntry, fromName, | ||
1334 | target_direntry, toName); | ||
1205 | } | 1335 | } |
1206 | 1336 | ||
1207 | cifs_rename_exit: | 1337 | cifs_rename_exit: |
1338 | kfree(info_buf_source); | ||
1208 | kfree(fromName); | 1339 | kfree(fromName); |
1209 | kfree(toName); | 1340 | kfree(toName); |
1210 | FreeXid(xid); | 1341 | FreeXid(xid); |
@@ -1505,101 +1636,6 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, | |||
1505 | } | 1636 | } |
1506 | 1637 | ||
1507 | static int | 1638 | static int |
1508 | cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid, | ||
1509 | char *full_path, __u32 dosattr) | ||
1510 | { | ||
1511 | int rc; | ||
1512 | int oplock = 0; | ||
1513 | __u16 netfid; | ||
1514 | __u32 netpid; | ||
1515 | bool set_time = false; | ||
1516 | struct cifsFileInfo *open_file; | ||
1517 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | ||
1518 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
1519 | struct cifsTconInfo *pTcon = cifs_sb->tcon; | ||
1520 | FILE_BASIC_INFO info_buf; | ||
1521 | |||
1522 | if (attrs->ia_valid & ATTR_ATIME) { | ||
1523 | set_time = true; | ||
1524 | info_buf.LastAccessTime = | ||
1525 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); | ||
1526 | } else | ||
1527 | info_buf.LastAccessTime = 0; | ||
1528 | |||
1529 | if (attrs->ia_valid & ATTR_MTIME) { | ||
1530 | set_time = true; | ||
1531 | info_buf.LastWriteTime = | ||
1532 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime)); | ||
1533 | } else | ||
1534 | info_buf.LastWriteTime = 0; | ||
1535 | |||
1536 | /* | ||
1537 | * Samba throws this field away, but windows may actually use it. | ||
1538 | * Do not set ctime unless other time stamps are changed explicitly | ||
1539 | * (i.e. by utimes()) since we would then have a mix of client and | ||
1540 | * server times. | ||
1541 | */ | ||
1542 | if (set_time && (attrs->ia_valid & ATTR_CTIME)) { | ||
1543 | cFYI(1, ("CIFS - CTIME changed")); | ||
1544 | info_buf.ChangeTime = | ||
1545 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); | ||
1546 | } else | ||
1547 | info_buf.ChangeTime = 0; | ||
1548 | |||
1549 | info_buf.CreationTime = 0; /* don't change */ | ||
1550 | info_buf.Attributes = cpu_to_le32(dosattr); | ||
1551 | |||
1552 | /* | ||
1553 | * If the file is already open for write, just use that fileid | ||
1554 | */ | ||
1555 | open_file = find_writable_file(cifsInode); | ||
1556 | if (open_file) { | ||
1557 | netfid = open_file->netfid; | ||
1558 | netpid = open_file->pid; | ||
1559 | goto set_via_filehandle; | ||
1560 | } | ||
1561 | |||
1562 | /* | ||
1563 | * NT4 apparently returns success on this call, but it doesn't | ||
1564 | * really work. | ||
1565 | */ | ||
1566 | if (!(pTcon->ses->flags & CIFS_SES_NT4)) { | ||
1567 | rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, | ||
1568 | &info_buf, cifs_sb->local_nls, | ||
1569 | cifs_sb->mnt_cifs_flags & | ||
1570 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1571 | if (rc != -EOPNOTSUPP && rc != -EINVAL) | ||
1572 | goto out; | ||
1573 | } | ||
1574 | |||
1575 | cFYI(1, ("calling SetFileInfo since SetPathInfo for " | ||
1576 | "times not supported by this server")); | ||
1577 | rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, | ||
1578 | SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, | ||
1579 | CREATE_NOT_DIR, &netfid, &oplock, | ||
1580 | NULL, cifs_sb->local_nls, | ||
1581 | cifs_sb->mnt_cifs_flags & | ||
1582 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1583 | |||
1584 | if (rc != 0) { | ||
1585 | if (rc == -EIO) | ||
1586 | rc = -EINVAL; | ||
1587 | goto out; | ||
1588 | } | ||
1589 | |||
1590 | netpid = current->tgid; | ||
1591 | |||
1592 | set_via_filehandle: | ||
1593 | rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid); | ||
1594 | if (open_file == NULL) | ||
1595 | CIFSSMBClose(xid, pTcon, netfid); | ||
1596 | else | ||
1597 | atomic_dec(&open_file->wrtPending); | ||
1598 | out: | ||
1599 | return rc; | ||
1600 | } | ||
1601 | |||
1602 | static int | ||
1603 | cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) | 1639 | cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) |
1604 | { | 1640 | { |
1605 | int rc; | 1641 | int rc; |
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 4b17f8fe3157..88786ba02d27 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c | |||
@@ -150,8 +150,7 @@ cifs_buf_get(void) | |||
150 | but it may be more efficient to always alloc same size | 150 | but it may be more efficient to always alloc same size |
151 | albeit slightly larger than necessary and maxbuffersize | 151 | albeit slightly larger than necessary and maxbuffersize |
152 | defaults to this and can not be bigger */ | 152 | defaults to this and can not be bigger */ |
153 | ret_buf = (struct smb_hdr *) mempool_alloc(cifs_req_poolp, | 153 | ret_buf = mempool_alloc(cifs_req_poolp, GFP_NOFS); |
154 | GFP_KERNEL | GFP_NOFS); | ||
155 | 154 | ||
156 | /* clear the first few header bytes */ | 155 | /* clear the first few header bytes */ |
157 | /* for most paths, more is cleared in header_assemble */ | 156 | /* for most paths, more is cleared in header_assemble */ |
@@ -188,8 +187,7 @@ cifs_small_buf_get(void) | |||
188 | but it may be more efficient to always alloc same size | 187 | but it may be more efficient to always alloc same size |
189 | albeit slightly larger than necessary and maxbuffersize | 188 | albeit slightly larger than necessary and maxbuffersize |
190 | defaults to this and can not be bigger */ | 189 | defaults to this and can not be bigger */ |
191 | ret_buf = (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, | 190 | ret_buf = mempool_alloc(cifs_sm_req_poolp, GFP_NOFS); |
192 | GFP_KERNEL | GFP_NOFS); | ||
193 | if (ret_buf) { | 191 | if (ret_buf) { |
194 | /* No need to clear memory here, cleared in header assemble */ | 192 | /* No need to clear memory here, cleared in header assemble */ |
195 | /* memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/ | 193 | /* memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/ |
@@ -313,8 +311,6 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , | |||
313 | buffer->Flags2 = SMBFLG2_KNOWS_LONG_NAMES; | 311 | buffer->Flags2 = SMBFLG2_KNOWS_LONG_NAMES; |
314 | buffer->Pid = cpu_to_le16((__u16)current->tgid); | 312 | buffer->Pid = cpu_to_le16((__u16)current->tgid); |
315 | buffer->PidHigh = cpu_to_le16((__u16)(current->tgid >> 16)); | 313 | buffer->PidHigh = cpu_to_le16((__u16)(current->tgid >> 16)); |
316 | spin_lock(&GlobalMid_Lock); | ||
317 | spin_unlock(&GlobalMid_Lock); | ||
318 | if (treeCon) { | 314 | if (treeCon) { |
319 | buffer->Tid = treeCon->tid; | 315 | buffer->Tid = treeCon->tid; |
320 | if (treeCon->ses) { | 316 | if (treeCon->ses) { |
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 5f40ed3473f5..765adf12d54f 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
@@ -640,6 +640,70 @@ static int is_dir_changed(struct file *file) | |||
640 | 640 | ||
641 | } | 641 | } |
642 | 642 | ||
643 | static int cifs_save_resume_key(const char *current_entry, | ||
644 | struct cifsFileInfo *cifsFile) | ||
645 | { | ||
646 | int rc = 0; | ||
647 | unsigned int len = 0; | ||
648 | __u16 level; | ||
649 | char *filename; | ||
650 | |||
651 | if ((cifsFile == NULL) || (current_entry == NULL)) | ||
652 | return -EINVAL; | ||
653 | |||
654 | level = cifsFile->srch_inf.info_level; | ||
655 | |||
656 | if (level == SMB_FIND_FILE_UNIX) { | ||
657 | FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry; | ||
658 | |||
659 | filename = &pFindData->FileName[0]; | ||
660 | if (cifsFile->srch_inf.unicode) { | ||
661 | len = cifs_unicode_bytelen(filename); | ||
662 | } else { | ||
663 | /* BB should we make this strnlen of PATH_MAX? */ | ||
664 | len = strnlen(filename, PATH_MAX); | ||
665 | } | ||
666 | cifsFile->srch_inf.resume_key = pFindData->ResumeKey; | ||
667 | } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { | ||
668 | FILE_DIRECTORY_INFO *pFindData = | ||
669 | (FILE_DIRECTORY_INFO *)current_entry; | ||
670 | filename = &pFindData->FileName[0]; | ||
671 | len = le32_to_cpu(pFindData->FileNameLength); | ||
672 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
673 | } else if (level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) { | ||
674 | FILE_FULL_DIRECTORY_INFO *pFindData = | ||
675 | (FILE_FULL_DIRECTORY_INFO *)current_entry; | ||
676 | filename = &pFindData->FileName[0]; | ||
677 | len = le32_to_cpu(pFindData->FileNameLength); | ||
678 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
679 | } else if (level == SMB_FIND_FILE_ID_FULL_DIR_INFO) { | ||
680 | SEARCH_ID_FULL_DIR_INFO *pFindData = | ||
681 | (SEARCH_ID_FULL_DIR_INFO *)current_entry; | ||
682 | filename = &pFindData->FileName[0]; | ||
683 | len = le32_to_cpu(pFindData->FileNameLength); | ||
684 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
685 | } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { | ||
686 | FILE_BOTH_DIRECTORY_INFO *pFindData = | ||
687 | (FILE_BOTH_DIRECTORY_INFO *)current_entry; | ||
688 | filename = &pFindData->FileName[0]; | ||
689 | len = le32_to_cpu(pFindData->FileNameLength); | ||
690 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
691 | } else if (level == SMB_FIND_FILE_INFO_STANDARD) { | ||
692 | FIND_FILE_STANDARD_INFO *pFindData = | ||
693 | (FIND_FILE_STANDARD_INFO *)current_entry; | ||
694 | filename = &pFindData->FileName[0]; | ||
695 | /* one byte length, no name conversion */ | ||
696 | len = (unsigned int)pFindData->FileNameLength; | ||
697 | cifsFile->srch_inf.resume_key = pFindData->ResumeKey; | ||
698 | } else { | ||
699 | cFYI(1, ("Unknown findfirst level %d", level)); | ||
700 | return -EINVAL; | ||
701 | } | ||
702 | cifsFile->srch_inf.resume_name_len = len; | ||
703 | cifsFile->srch_inf.presume_name = filename; | ||
704 | return rc; | ||
705 | } | ||
706 | |||
643 | /* find the corresponding entry in the search */ | 707 | /* find the corresponding entry in the search */ |
644 | /* Note that the SMB server returns search entries for . and .. which | 708 | /* Note that the SMB server returns search entries for . and .. which |
645 | complicates logic here if we choose to parse for them and we do not | 709 | complicates logic here if we choose to parse for them and we do not |
@@ -703,6 +767,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, | |||
703 | while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && | 767 | while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && |
704 | (rc == 0) && !cifsFile->srch_inf.endOfSearch) { | 768 | (rc == 0) && !cifsFile->srch_inf.endOfSearch) { |
705 | cFYI(1, ("calling findnext2")); | 769 | cFYI(1, ("calling findnext2")); |
770 | cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); | ||
706 | rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, | 771 | rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, |
707 | &cifsFile->srch_inf); | 772 | &cifsFile->srch_inf); |
708 | if (rc) | 773 | if (rc) |
@@ -919,69 +984,6 @@ static int cifs_filldir(char *pfindEntry, struct file *file, | |||
919 | return rc; | 984 | return rc; |
920 | } | 985 | } |
921 | 986 | ||
922 | static int cifs_save_resume_key(const char *current_entry, | ||
923 | struct cifsFileInfo *cifsFile) | ||
924 | { | ||
925 | int rc = 0; | ||
926 | unsigned int len = 0; | ||
927 | __u16 level; | ||
928 | char *filename; | ||
929 | |||
930 | if ((cifsFile == NULL) || (current_entry == NULL)) | ||
931 | return -EINVAL; | ||
932 | |||
933 | level = cifsFile->srch_inf.info_level; | ||
934 | |||
935 | if (level == SMB_FIND_FILE_UNIX) { | ||
936 | FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry; | ||
937 | |||
938 | filename = &pFindData->FileName[0]; | ||
939 | if (cifsFile->srch_inf.unicode) { | ||
940 | len = cifs_unicode_bytelen(filename); | ||
941 | } else { | ||
942 | /* BB should we make this strnlen of PATH_MAX? */ | ||
943 | len = strnlen(filename, PATH_MAX); | ||
944 | } | ||
945 | cifsFile->srch_inf.resume_key = pFindData->ResumeKey; | ||
946 | } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { | ||
947 | FILE_DIRECTORY_INFO *pFindData = | ||
948 | (FILE_DIRECTORY_INFO *)current_entry; | ||
949 | filename = &pFindData->FileName[0]; | ||
950 | len = le32_to_cpu(pFindData->FileNameLength); | ||
951 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
952 | } else if (level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) { | ||
953 | FILE_FULL_DIRECTORY_INFO *pFindData = | ||
954 | (FILE_FULL_DIRECTORY_INFO *)current_entry; | ||
955 | filename = &pFindData->FileName[0]; | ||
956 | len = le32_to_cpu(pFindData->FileNameLength); | ||
957 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
958 | } else if (level == SMB_FIND_FILE_ID_FULL_DIR_INFO) { | ||
959 | SEARCH_ID_FULL_DIR_INFO *pFindData = | ||
960 | (SEARCH_ID_FULL_DIR_INFO *)current_entry; | ||
961 | filename = &pFindData->FileName[0]; | ||
962 | len = le32_to_cpu(pFindData->FileNameLength); | ||
963 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
964 | } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { | ||
965 | FILE_BOTH_DIRECTORY_INFO *pFindData = | ||
966 | (FILE_BOTH_DIRECTORY_INFO *)current_entry; | ||
967 | filename = &pFindData->FileName[0]; | ||
968 | len = le32_to_cpu(pFindData->FileNameLength); | ||
969 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
970 | } else if (level == SMB_FIND_FILE_INFO_STANDARD) { | ||
971 | FIND_FILE_STANDARD_INFO *pFindData = | ||
972 | (FIND_FILE_STANDARD_INFO *)current_entry; | ||
973 | filename = &pFindData->FileName[0]; | ||
974 | /* one byte length, no name conversion */ | ||
975 | len = (unsigned int)pFindData->FileNameLength; | ||
976 | cifsFile->srch_inf.resume_key = pFindData->ResumeKey; | ||
977 | } else { | ||
978 | cFYI(1, ("Unknown findfirst level %d", level)); | ||
979 | return -EINVAL; | ||
980 | } | ||
981 | cifsFile->srch_inf.resume_name_len = len; | ||
982 | cifsFile->srch_inf.presume_name = filename; | ||
983 | return rc; | ||
984 | } | ||
985 | 987 | ||
986 | int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | 988 | int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) |
987 | { | 989 | { |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index ed150efbe27c..2851d5da0c8c 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
@@ -409,6 +409,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
409 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | 409 | #ifdef CONFIG_CIFS_WEAK_PW_HASH |
410 | char lnm_session_key[CIFS_SESS_KEY_SIZE]; | 410 | char lnm_session_key[CIFS_SESS_KEY_SIZE]; |
411 | 411 | ||
412 | pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; | ||
413 | |||
412 | /* no capabilities flags in old lanman negotiation */ | 414 | /* no capabilities flags in old lanman negotiation */ |
413 | 415 | ||
414 | pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); | 416 | pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); |
@@ -505,7 +507,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
505 | unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); | 507 | unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); |
506 | } else | 508 | } else |
507 | ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); | 509 | ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); |
508 | } else if (type == Kerberos) { | 510 | } else if (type == Kerberos || type == MSKerberos) { |
509 | #ifdef CONFIG_CIFS_UPCALL | 511 | #ifdef CONFIG_CIFS_UPCALL |
510 | struct cifs_spnego_msg *msg; | 512 | struct cifs_spnego_msg *msg; |
511 | spnego_key = cifs_get_spnego_key(ses); | 513 | spnego_key = cifs_get_spnego_key(ses); |
@@ -516,6 +518,15 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
516 | } | 518 | } |
517 | 519 | ||
518 | msg = spnego_key->payload.data; | 520 | msg = spnego_key->payload.data; |
521 | /* check version field to make sure that cifs.upcall is | ||
522 | sending us a response in an expected form */ | ||
523 | if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) { | ||
524 | cERROR(1, ("incorrect version of cifs.upcall (expected" | ||
525 | " %d but got %d)", | ||
526 | CIFS_SPNEGO_UPCALL_VERSION, msg->version)); | ||
527 | rc = -EKEYREJECTED; | ||
528 | goto ssetup_exit; | ||
529 | } | ||
519 | /* bail out if key is too long */ | 530 | /* bail out if key is too long */ |
520 | if (msg->sesskey_len > | 531 | if (msg->sesskey_len > |
521 | sizeof(ses->server->mac_signing_key.data.krb5)) { | 532 | sizeof(ses->server->mac_signing_key.data.krb5)) { |
@@ -613,8 +624,10 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
613 | ses, nls_cp); | 624 | ses, nls_cp); |
614 | 625 | ||
615 | ssetup_exit: | 626 | ssetup_exit: |
616 | if (spnego_key) | 627 | if (spnego_key) { |
628 | key_revoke(spnego_key); | ||
617 | key_put(spnego_key); | 629 | key_put(spnego_key); |
630 | } | ||
618 | kfree(str_area); | 631 | kfree(str_area); |
619 | if (resp_buf_type == CIFS_SMALL_BUFFER) { | 632 | if (resp_buf_type == CIFS_SMALL_BUFFER) { |
620 | cFYI(1, ("ssetup freeing small buf %p", iov[0].iov_base)); | 633 | cFYI(1, ("ssetup freeing small buf %p", iov[0].iov_base)); |
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index e286db9f5ee2..bf0e6d8e382a 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
@@ -50,8 +50,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) | |||
50 | return NULL; | 50 | return NULL; |
51 | } | 51 | } |
52 | 52 | ||
53 | temp = (struct mid_q_entry *) mempool_alloc(cifs_mid_poolp, | 53 | temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); |
54 | GFP_KERNEL | GFP_NOFS); | ||
55 | if (temp == NULL) | 54 | if (temp == NULL) |
56 | return temp; | 55 | return temp; |
57 | else { | 56 | else { |
diff --git a/fs/compat.c b/fs/compat.c index c9d1472e65c5..075d0509970d 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -792,8 +792,10 @@ static int compat_fillonedir(void *__buf, const char *name, int namlen, | |||
792 | if (buf->result) | 792 | if (buf->result) |
793 | return -EINVAL; | 793 | return -EINVAL; |
794 | d_ino = ino; | 794 | d_ino = ino; |
795 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | 795 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
796 | buf->result = -EOVERFLOW; | ||
796 | return -EOVERFLOW; | 797 | return -EOVERFLOW; |
798 | } | ||
797 | buf->result++; | 799 | buf->result++; |
798 | dirent = buf->dirent; | 800 | dirent = buf->dirent; |
799 | if (!access_ok(VERIFY_WRITE, dirent, | 801 | if (!access_ok(VERIFY_WRITE, dirent, |
@@ -862,8 +864,10 @@ static int compat_filldir(void *__buf, const char *name, int namlen, | |||
862 | if (reclen > buf->count) | 864 | if (reclen > buf->count) |
863 | return -EINVAL; | 865 | return -EINVAL; |
864 | d_ino = ino; | 866 | d_ino = ino; |
865 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | 867 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
868 | buf->error = -EOVERFLOW; | ||
866 | return -EOVERFLOW; | 869 | return -EOVERFLOW; |
870 | } | ||
867 | dirent = buf->previous; | 871 | dirent = buf->previous; |
868 | if (dirent) { | 872 | if (dirent) { |
869 | if (__put_user(offset, &dirent->d_off)) | 873 | if (__put_user(offset, &dirent->d_off)) |
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 7a8db78a91d2..8e93341f3e82 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
@@ -1311,16 +1311,18 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1311 | * Ensure that no racing symlink() will make detach_prep() fail while | 1311 | * Ensure that no racing symlink() will make detach_prep() fail while |
1312 | * the new link is temporarily attached | 1312 | * the new link is temporarily attached |
1313 | */ | 1313 | */ |
1314 | mutex_lock(&configfs_symlink_mutex); | ||
1315 | spin_lock(&configfs_dirent_lock); | ||
1316 | do { | 1314 | do { |
1317 | struct mutex *wait_mutex; | 1315 | struct mutex *wait_mutex; |
1318 | 1316 | ||
1317 | mutex_lock(&configfs_symlink_mutex); | ||
1318 | spin_lock(&configfs_dirent_lock); | ||
1319 | ret = configfs_detach_prep(dentry, &wait_mutex); | 1319 | ret = configfs_detach_prep(dentry, &wait_mutex); |
1320 | if (ret) { | 1320 | if (ret) |
1321 | configfs_detach_rollback(dentry); | 1321 | configfs_detach_rollback(dentry); |
1322 | spin_unlock(&configfs_dirent_lock); | 1322 | spin_unlock(&configfs_dirent_lock); |
1323 | mutex_unlock(&configfs_symlink_mutex); | 1323 | mutex_unlock(&configfs_symlink_mutex); |
1324 | |||
1325 | if (ret) { | ||
1324 | if (ret != -EAGAIN) { | 1326 | if (ret != -EAGAIN) { |
1325 | config_item_put(parent_item); | 1327 | config_item_put(parent_item); |
1326 | return ret; | 1328 | return ret; |
@@ -1329,13 +1331,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1329 | /* Wait until the racing operation terminates */ | 1331 | /* Wait until the racing operation terminates */ |
1330 | mutex_lock(wait_mutex); | 1332 | mutex_lock(wait_mutex); |
1331 | mutex_unlock(wait_mutex); | 1333 | mutex_unlock(wait_mutex); |
1332 | |||
1333 | mutex_lock(&configfs_symlink_mutex); | ||
1334 | spin_lock(&configfs_dirent_lock); | ||
1335 | } | 1334 | } |
1336 | } while (ret == -EAGAIN); | 1335 | } while (ret == -EAGAIN); |
1337 | spin_unlock(&configfs_dirent_lock); | ||
1338 | mutex_unlock(&configfs_symlink_mutex); | ||
1339 | 1336 | ||
1340 | /* Get a working ref for the duration of this function */ | 1337 | /* Get a working ref for the duration of this function */ |
1341 | item = configfs_get_config_item(dentry); | 1338 | item = configfs_get_config_item(dentry); |
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 0c3b618c15b3..f40423eb1a14 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c | |||
@@ -43,58 +43,13 @@ static DEFINE_MUTEX(read_mutex); | |||
43 | static int cramfs_iget5_test(struct inode *inode, void *opaque) | 43 | static int cramfs_iget5_test(struct inode *inode, void *opaque) |
44 | { | 44 | { |
45 | struct cramfs_inode *cramfs_inode = opaque; | 45 | struct cramfs_inode *cramfs_inode = opaque; |
46 | 46 | return inode->i_ino == CRAMINO(cramfs_inode) && inode->i_ino != 1; | |
47 | if (inode->i_ino != CRAMINO(cramfs_inode)) | ||
48 | return 0; /* does not match */ | ||
49 | |||
50 | if (inode->i_ino != 1) | ||
51 | return 1; | ||
52 | |||
53 | /* all empty directories, char, block, pipe, and sock, share inode #1 */ | ||
54 | |||
55 | if ((inode->i_mode != cramfs_inode->mode) || | ||
56 | (inode->i_gid != cramfs_inode->gid) || | ||
57 | (inode->i_uid != cramfs_inode->uid)) | ||
58 | return 0; /* does not match */ | ||
59 | |||
60 | if ((S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) && | ||
61 | (inode->i_rdev != old_decode_dev(cramfs_inode->size))) | ||
62 | return 0; /* does not match */ | ||
63 | |||
64 | return 1; /* matches */ | ||
65 | } | 47 | } |
66 | 48 | ||
67 | static int cramfs_iget5_set(struct inode *inode, void *opaque) | 49 | static int cramfs_iget5_set(struct inode *inode, void *opaque) |
68 | { | 50 | { |
69 | static struct timespec zerotime; | ||
70 | struct cramfs_inode *cramfs_inode = opaque; | 51 | struct cramfs_inode *cramfs_inode = opaque; |
71 | inode->i_mode = cramfs_inode->mode; | ||
72 | inode->i_uid = cramfs_inode->uid; | ||
73 | inode->i_size = cramfs_inode->size; | ||
74 | inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; | ||
75 | inode->i_gid = cramfs_inode->gid; | ||
76 | /* Struct copy intentional */ | ||
77 | inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime; | ||
78 | inode->i_ino = CRAMINO(cramfs_inode); | 52 | inode->i_ino = CRAMINO(cramfs_inode); |
79 | /* inode->i_nlink is left 1 - arguably wrong for directories, | ||
80 | but it's the best we can do without reading the directory | ||
81 | contents. 1 yields the right result in GNU find, even | ||
82 | without -noleaf option. */ | ||
83 | if (S_ISREG(inode->i_mode)) { | ||
84 | inode->i_fop = &generic_ro_fops; | ||
85 | inode->i_data.a_ops = &cramfs_aops; | ||
86 | } else if (S_ISDIR(inode->i_mode)) { | ||
87 | inode->i_op = &cramfs_dir_inode_operations; | ||
88 | inode->i_fop = &cramfs_directory_operations; | ||
89 | } else if (S_ISLNK(inode->i_mode)) { | ||
90 | inode->i_op = &page_symlink_inode_operations; | ||
91 | inode->i_data.a_ops = &cramfs_aops; | ||
92 | } else { | ||
93 | inode->i_size = 0; | ||
94 | inode->i_blocks = 0; | ||
95 | init_special_inode(inode, inode->i_mode, | ||
96 | old_decode_dev(cramfs_inode->size)); | ||
97 | } | ||
98 | return 0; | 53 | return 0; |
99 | } | 54 | } |
100 | 55 | ||
@@ -104,12 +59,48 @@ static struct inode *get_cramfs_inode(struct super_block *sb, | |||
104 | struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode), | 59 | struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode), |
105 | cramfs_iget5_test, cramfs_iget5_set, | 60 | cramfs_iget5_test, cramfs_iget5_set, |
106 | cramfs_inode); | 61 | cramfs_inode); |
62 | static struct timespec zerotime; | ||
63 | |||
107 | if (inode && (inode->i_state & I_NEW)) { | 64 | if (inode && (inode->i_state & I_NEW)) { |
65 | inode->i_mode = cramfs_inode->mode; | ||
66 | inode->i_uid = cramfs_inode->uid; | ||
67 | inode->i_size = cramfs_inode->size; | ||
68 | inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; | ||
69 | inode->i_gid = cramfs_inode->gid; | ||
70 | /* Struct copy intentional */ | ||
71 | inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime; | ||
72 | /* inode->i_nlink is left 1 - arguably wrong for directories, | ||
73 | but it's the best we can do without reading the directory | ||
74 | contents. 1 yields the right result in GNU find, even | ||
75 | without -noleaf option. */ | ||
76 | if (S_ISREG(inode->i_mode)) { | ||
77 | inode->i_fop = &generic_ro_fops; | ||
78 | inode->i_data.a_ops = &cramfs_aops; | ||
79 | } else if (S_ISDIR(inode->i_mode)) { | ||
80 | inode->i_op = &cramfs_dir_inode_operations; | ||
81 | inode->i_fop = &cramfs_directory_operations; | ||
82 | } else if (S_ISLNK(inode->i_mode)) { | ||
83 | inode->i_op = &page_symlink_inode_operations; | ||
84 | inode->i_data.a_ops = &cramfs_aops; | ||
85 | } else { | ||
86 | inode->i_size = 0; | ||
87 | inode->i_blocks = 0; | ||
88 | init_special_inode(inode, inode->i_mode, | ||
89 | old_decode_dev(cramfs_inode->size)); | ||
90 | } | ||
108 | unlock_new_inode(inode); | 91 | unlock_new_inode(inode); |
109 | } | 92 | } |
110 | return inode; | 93 | return inode; |
111 | } | 94 | } |
112 | 95 | ||
96 | static void cramfs_drop_inode(struct inode *inode) | ||
97 | { | ||
98 | if (inode->i_ino == 1) | ||
99 | generic_delete_inode(inode); | ||
100 | else | ||
101 | generic_drop_inode(inode); | ||
102 | } | ||
103 | |||
113 | /* | 104 | /* |
114 | * We have our own block cache: don't fill up the buffer cache | 105 | * We have our own block cache: don't fill up the buffer cache |
115 | * with the rom-image, because the way the filesystem is set | 106 | * with the rom-image, because the way the filesystem is set |
@@ -534,6 +525,7 @@ static const struct super_operations cramfs_ops = { | |||
534 | .put_super = cramfs_put_super, | 525 | .put_super = cramfs_put_super, |
535 | .remount_fs = cramfs_remount, | 526 | .remount_fs = cramfs_remount, |
536 | .statfs = cramfs_statfs, | 527 | .statfs = cramfs_statfs, |
528 | .drop_inode = cramfs_drop_inode, | ||
537 | }; | 529 | }; |
538 | 530 | ||
539 | static int cramfs_get_sb(struct file_system_type *fs_type, | 531 | static int cramfs_get_sb(struct file_system_type *fs_type, |
diff --git a/fs/dcache.c b/fs/dcache.c index 101663d15e9f..e7a1a99b7464 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -1236,7 +1236,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | |||
1236 | * If no entry exists with the exact case name, allocate new dentry with | 1236 | * If no entry exists with the exact case name, allocate new dentry with |
1237 | * the exact case, and return the spliced entry. | 1237 | * the exact case, and return the spliced entry. |
1238 | */ | 1238 | */ |
1239 | struct dentry *d_add_ci(struct inode *inode, struct dentry *dentry, | 1239 | struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, |
1240 | struct qstr *name) | 1240 | struct qstr *name) |
1241 | { | 1241 | { |
1242 | int error; | 1242 | int error; |
@@ -1395,6 +1395,10 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | |||
1395 | if (dentry->d_parent != parent) | 1395 | if (dentry->d_parent != parent) |
1396 | goto next; | 1396 | goto next; |
1397 | 1397 | ||
1398 | /* non-existing due to RCU? */ | ||
1399 | if (d_unhashed(dentry)) | ||
1400 | goto next; | ||
1401 | |||
1398 | /* | 1402 | /* |
1399 | * It is safe to compare names since d_move() cannot | 1403 | * It is safe to compare names since d_move() cannot |
1400 | * change the qstr (protected by d_lock). | 1404 | * change the qstr (protected by d_lock). |
@@ -1410,10 +1414,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | |||
1410 | goto next; | 1414 | goto next; |
1411 | } | 1415 | } |
1412 | 1416 | ||
1413 | if (!d_unhashed(dentry)) { | 1417 | atomic_inc(&dentry->d_count); |
1414 | atomic_inc(&dentry->d_count); | 1418 | found = dentry; |
1415 | found = dentry; | ||
1416 | } | ||
1417 | spin_unlock(&dentry->d_lock); | 1419 | spin_unlock(&dentry->d_lock); |
1418 | break; | 1420 | break; |
1419 | next: | 1421 | next: |
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 08e28c9bb416..3dbe2169cf36 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c | |||
@@ -26,8 +26,7 @@ | |||
26 | #include <linux/debugfs.h> | 26 | #include <linux/debugfs.h> |
27 | #include <linux/fsnotify.h> | 27 | #include <linux/fsnotify.h> |
28 | #include <linux/string.h> | 28 | #include <linux/string.h> |
29 | 29 | #include <linux/magic.h> | |
30 | #define DEBUGFS_MAGIC 0x64626720 | ||
31 | 30 | ||
32 | static struct vfsmount *debugfs_mount; | 31 | static struct vfsmount *debugfs_mount; |
33 | static int debugfs_mount_count; | 32 | static int debugfs_mount_count; |
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 488eb424f662..4a714f6c1bed 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #define DEVPTS_SUPER_MAGIC 0x1cd1 | 27 | #define DEVPTS_SUPER_MAGIC 0x1cd1 |
28 | 28 | ||
29 | #define DEVPTS_DEFAULT_MODE 0600 | 29 | #define DEVPTS_DEFAULT_MODE 0600 |
30 | #define PTMX_MINOR 2 | ||
30 | 31 | ||
31 | extern int pty_limit; /* Config limit on Unix98 ptys */ | 32 | extern int pty_limit; /* Config limit on Unix98 ptys */ |
32 | static DEFINE_IDA(allocated_ptys); | 33 | static DEFINE_IDA(allocated_ptys); |
@@ -48,7 +49,7 @@ enum { | |||
48 | Opt_err | 49 | Opt_err |
49 | }; | 50 | }; |
50 | 51 | ||
51 | static match_table_t tokens = { | 52 | static const match_table_t tokens = { |
52 | {Opt_uid, "uid=%u"}, | 53 | {Opt_uid, "uid=%u"}, |
53 | {Opt_gid, "gid=%u"}, | 54 | {Opt_gid, "gid=%u"}, |
54 | {Opt_mode, "mode=%o"}, | 55 | {Opt_mode, "mode=%o"}, |
@@ -169,15 +170,7 @@ static struct file_system_type devpts_fs_type = { | |||
169 | * to the System V naming convention | 170 | * to the System V naming convention |
170 | */ | 171 | */ |
171 | 172 | ||
172 | static struct dentry *get_node(int num) | 173 | int devpts_new_index(struct inode *ptmx_inode) |
173 | { | ||
174 | char s[12]; | ||
175 | struct dentry *root = devpts_root; | ||
176 | mutex_lock(&root->d_inode->i_mutex); | ||
177 | return lookup_one_len(s, root, sprintf(s, "%d", num)); | ||
178 | } | ||
179 | |||
180 | int devpts_new_index(void) | ||
181 | { | 174 | { |
182 | int index; | 175 | int index; |
183 | int ida_ret; | 176 | int ida_ret; |
@@ -205,20 +198,21 @@ retry: | |||
205 | return index; | 198 | return index; |
206 | } | 199 | } |
207 | 200 | ||
208 | void devpts_kill_index(int idx) | 201 | void devpts_kill_index(struct inode *ptmx_inode, int idx) |
209 | { | 202 | { |
210 | mutex_lock(&allocated_ptys_lock); | 203 | mutex_lock(&allocated_ptys_lock); |
211 | ida_remove(&allocated_ptys, idx); | 204 | ida_remove(&allocated_ptys, idx); |
212 | mutex_unlock(&allocated_ptys_lock); | 205 | mutex_unlock(&allocated_ptys_lock); |
213 | } | 206 | } |
214 | 207 | ||
215 | int devpts_pty_new(struct tty_struct *tty) | 208 | int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty) |
216 | { | 209 | { |
217 | int number = tty->index; /* tty layer puts index from devpts_new_index() in here */ | 210 | int number = tty->index; /* tty layer puts index from devpts_new_index() in here */ |
218 | struct tty_driver *driver = tty->driver; | 211 | struct tty_driver *driver = tty->driver; |
219 | dev_t device = MKDEV(driver->major, driver->minor_start+number); | 212 | dev_t device = MKDEV(driver->major, driver->minor_start+number); |
220 | struct dentry *dentry; | 213 | struct dentry *dentry; |
221 | struct inode *inode = new_inode(devpts_mnt->mnt_sb); | 214 | struct inode *inode = new_inode(devpts_mnt->mnt_sb); |
215 | char s[12]; | ||
222 | 216 | ||
223 | /* We're supposed to be given the slave end of a pty */ | 217 | /* We're supposed to be given the slave end of a pty */ |
224 | BUG_ON(driver->type != TTY_DRIVER_TYPE_PTY); | 218 | BUG_ON(driver->type != TTY_DRIVER_TYPE_PTY); |
@@ -233,10 +227,15 @@ int devpts_pty_new(struct tty_struct *tty) | |||
233 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 227 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
234 | init_special_inode(inode, S_IFCHR|config.mode, device); | 228 | init_special_inode(inode, S_IFCHR|config.mode, device); |
235 | inode->i_private = tty; | 229 | inode->i_private = tty; |
230 | tty->driver_data = inode; | ||
236 | 231 | ||
237 | dentry = get_node(number); | 232 | sprintf(s, "%d", number); |
238 | if (!IS_ERR(dentry) && !dentry->d_inode) { | 233 | |
239 | d_instantiate(dentry, inode); | 234 | mutex_lock(&devpts_root->d_inode->i_mutex); |
235 | |||
236 | dentry = d_alloc_name(devpts_root, s); | ||
237 | if (!IS_ERR(dentry)) { | ||
238 | d_add(dentry, inode); | ||
240 | fsnotify_create(devpts_root->d_inode, dentry); | 239 | fsnotify_create(devpts_root->d_inode, dentry); |
241 | } | 240 | } |
242 | 241 | ||
@@ -245,36 +244,31 @@ int devpts_pty_new(struct tty_struct *tty) | |||
245 | return 0; | 244 | return 0; |
246 | } | 245 | } |
247 | 246 | ||
248 | struct tty_struct *devpts_get_tty(int number) | 247 | struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number) |
249 | { | 248 | { |
250 | struct dentry *dentry = get_node(number); | 249 | BUG_ON(pts_inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR)); |
251 | struct tty_struct *tty; | ||
252 | |||
253 | tty = NULL; | ||
254 | if (!IS_ERR(dentry)) { | ||
255 | if (dentry->d_inode) | ||
256 | tty = dentry->d_inode->i_private; | ||
257 | dput(dentry); | ||
258 | } | ||
259 | 250 | ||
260 | mutex_unlock(&devpts_root->d_inode->i_mutex); | 251 | if (pts_inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) |
261 | 252 | return (struct tty_struct *)pts_inode->i_private; | |
262 | return tty; | 253 | return NULL; |
263 | } | 254 | } |
264 | 255 | ||
265 | void devpts_pty_kill(int number) | 256 | void devpts_pty_kill(struct tty_struct *tty) |
266 | { | 257 | { |
267 | struct dentry *dentry = get_node(number); | 258 | struct inode *inode = tty->driver_data; |
259 | struct dentry *dentry; | ||
268 | 260 | ||
269 | if (!IS_ERR(dentry)) { | 261 | BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR)); |
270 | struct inode *inode = dentry->d_inode; | 262 | |
271 | if (inode) { | 263 | mutex_lock(&devpts_root->d_inode->i_mutex); |
272 | inode->i_nlink--; | 264 | |
273 | d_delete(dentry); | 265 | dentry = d_find_alias(inode); |
274 | dput(dentry); | 266 | if (dentry && !IS_ERR(dentry)) { |
275 | } | 267 | inode->i_nlink--; |
268 | d_delete(dentry); | ||
276 | dput(dentry); | 269 | dput(dentry); |
277 | } | 270 | } |
271 | |||
278 | mutex_unlock(&devpts_root->d_inode->i_mutex); | 272 | mutex_unlock(&devpts_root->d_inode->i_mutex); |
279 | } | 273 | } |
280 | 274 | ||
diff --git a/fs/dlm/config.c b/fs/dlm/config.c index c4e7d721bd8d..fd9859f92fad 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -14,6 +14,9 @@ | |||
14 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/configfs.h> | 16 | #include <linux/configfs.h> |
17 | #include <linux/in.h> | ||
18 | #include <linux/in6.h> | ||
19 | #include <net/ipv6.h> | ||
17 | #include <net/sock.h> | 20 | #include <net/sock.h> |
18 | 21 | ||
19 | #include "config.h" | 22 | #include "config.h" |
@@ -30,16 +33,16 @@ | |||
30 | 33 | ||
31 | static struct config_group *space_list; | 34 | static struct config_group *space_list; |
32 | static struct config_group *comm_list; | 35 | static struct config_group *comm_list; |
33 | static struct comm *local_comm; | 36 | static struct dlm_comm *local_comm; |
34 | 37 | ||
35 | struct clusters; | 38 | struct dlm_clusters; |
36 | struct cluster; | 39 | struct dlm_cluster; |
37 | struct spaces; | 40 | struct dlm_spaces; |
38 | struct space; | 41 | struct dlm_space; |
39 | struct comms; | 42 | struct dlm_comms; |
40 | struct comm; | 43 | struct dlm_comm; |
41 | struct nodes; | 44 | struct dlm_nodes; |
42 | struct node; | 45 | struct dlm_node; |
43 | 46 | ||
44 | static struct config_group *make_cluster(struct config_group *, const char *); | 47 | static struct config_group *make_cluster(struct config_group *, const char *); |
45 | static void drop_cluster(struct config_group *, struct config_item *); | 48 | static void drop_cluster(struct config_group *, struct config_item *); |
@@ -68,17 +71,22 @@ static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, | |||
68 | static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, | 71 | static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, |
69 | const char *buf, size_t len); | 72 | const char *buf, size_t len); |
70 | 73 | ||
71 | static ssize_t comm_nodeid_read(struct comm *cm, char *buf); | 74 | static ssize_t comm_nodeid_read(struct dlm_comm *cm, char *buf); |
72 | static ssize_t comm_nodeid_write(struct comm *cm, const char *buf, size_t len); | 75 | static ssize_t comm_nodeid_write(struct dlm_comm *cm, const char *buf, |
73 | static ssize_t comm_local_read(struct comm *cm, char *buf); | 76 | size_t len); |
74 | static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len); | 77 | static ssize_t comm_local_read(struct dlm_comm *cm, char *buf); |
75 | static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len); | 78 | static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, |
76 | static ssize_t node_nodeid_read(struct node *nd, char *buf); | 79 | size_t len); |
77 | static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len); | 80 | static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, |
78 | static ssize_t node_weight_read(struct node *nd, char *buf); | 81 | size_t len); |
79 | static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len); | 82 | static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf); |
80 | 83 | static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, | |
81 | struct cluster { | 84 | size_t len); |
85 | static ssize_t node_weight_read(struct dlm_node *nd, char *buf); | ||
86 | static ssize_t node_weight_write(struct dlm_node *nd, const char *buf, | ||
87 | size_t len); | ||
88 | |||
89 | struct dlm_cluster { | ||
82 | struct config_group group; | 90 | struct config_group group; |
83 | unsigned int cl_tcp_port; | 91 | unsigned int cl_tcp_port; |
84 | unsigned int cl_buffer_size; | 92 | unsigned int cl_buffer_size; |
@@ -109,11 +117,11 @@ enum { | |||
109 | 117 | ||
110 | struct cluster_attribute { | 118 | struct cluster_attribute { |
111 | struct configfs_attribute attr; | 119 | struct configfs_attribute attr; |
112 | ssize_t (*show)(struct cluster *, char *); | 120 | ssize_t (*show)(struct dlm_cluster *, char *); |
113 | ssize_t (*store)(struct cluster *, const char *, size_t); | 121 | ssize_t (*store)(struct dlm_cluster *, const char *, size_t); |
114 | }; | 122 | }; |
115 | 123 | ||
116 | static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, | 124 | static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field, |
117 | int *info_field, int check_zero, | 125 | int *info_field, int check_zero, |
118 | const char *buf, size_t len) | 126 | const char *buf, size_t len) |
119 | { | 127 | { |
@@ -134,12 +142,12 @@ static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, | |||
134 | } | 142 | } |
135 | 143 | ||
136 | #define CLUSTER_ATTR(name, check_zero) \ | 144 | #define CLUSTER_ATTR(name, check_zero) \ |
137 | static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \ | 145 | static ssize_t name##_write(struct dlm_cluster *cl, const char *buf, size_t len) \ |
138 | { \ | 146 | { \ |
139 | return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \ | 147 | return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \ |
140 | check_zero, buf, len); \ | 148 | check_zero, buf, len); \ |
141 | } \ | 149 | } \ |
142 | static ssize_t name##_read(struct cluster *cl, char *buf) \ | 150 | static ssize_t name##_read(struct dlm_cluster *cl, char *buf) \ |
143 | { \ | 151 | { \ |
144 | return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \ | 152 | return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \ |
145 | } \ | 153 | } \ |
@@ -181,8 +189,8 @@ enum { | |||
181 | 189 | ||
182 | struct comm_attribute { | 190 | struct comm_attribute { |
183 | struct configfs_attribute attr; | 191 | struct configfs_attribute attr; |
184 | ssize_t (*show)(struct comm *, char *); | 192 | ssize_t (*show)(struct dlm_comm *, char *); |
185 | ssize_t (*store)(struct comm *, const char *, size_t); | 193 | ssize_t (*store)(struct dlm_comm *, const char *, size_t); |
186 | }; | 194 | }; |
187 | 195 | ||
188 | static struct comm_attribute comm_attr_nodeid = { | 196 | static struct comm_attribute comm_attr_nodeid = { |
@@ -222,8 +230,8 @@ enum { | |||
222 | 230 | ||
223 | struct node_attribute { | 231 | struct node_attribute { |
224 | struct configfs_attribute attr; | 232 | struct configfs_attribute attr; |
225 | ssize_t (*show)(struct node *, char *); | 233 | ssize_t (*show)(struct dlm_node *, char *); |
226 | ssize_t (*store)(struct node *, const char *, size_t); | 234 | ssize_t (*store)(struct dlm_node *, const char *, size_t); |
227 | }; | 235 | }; |
228 | 236 | ||
229 | static struct node_attribute node_attr_nodeid = { | 237 | static struct node_attribute node_attr_nodeid = { |
@@ -248,26 +256,26 @@ static struct configfs_attribute *node_attrs[] = { | |||
248 | NULL, | 256 | NULL, |
249 | }; | 257 | }; |
250 | 258 | ||
251 | struct clusters { | 259 | struct dlm_clusters { |
252 | struct configfs_subsystem subsys; | 260 | struct configfs_subsystem subsys; |
253 | }; | 261 | }; |
254 | 262 | ||
255 | struct spaces { | 263 | struct dlm_spaces { |
256 | struct config_group ss_group; | 264 | struct config_group ss_group; |
257 | }; | 265 | }; |
258 | 266 | ||
259 | struct space { | 267 | struct dlm_space { |
260 | struct config_group group; | 268 | struct config_group group; |
261 | struct list_head members; | 269 | struct list_head members; |
262 | struct mutex members_lock; | 270 | struct mutex members_lock; |
263 | int members_count; | 271 | int members_count; |
264 | }; | 272 | }; |
265 | 273 | ||
266 | struct comms { | 274 | struct dlm_comms { |
267 | struct config_group cs_group; | 275 | struct config_group cs_group; |
268 | }; | 276 | }; |
269 | 277 | ||
270 | struct comm { | 278 | struct dlm_comm { |
271 | struct config_item item; | 279 | struct config_item item; |
272 | int nodeid; | 280 | int nodeid; |
273 | int local; | 281 | int local; |
@@ -275,11 +283,11 @@ struct comm { | |||
275 | struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; | 283 | struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; |
276 | }; | 284 | }; |
277 | 285 | ||
278 | struct nodes { | 286 | struct dlm_nodes { |
279 | struct config_group ns_group; | 287 | struct config_group ns_group; |
280 | }; | 288 | }; |
281 | 289 | ||
282 | struct node { | 290 | struct dlm_node { |
283 | struct config_item item; | 291 | struct config_item item; |
284 | struct list_head list; /* space->members */ | 292 | struct list_head list; /* space->members */ |
285 | int nodeid; | 293 | int nodeid; |
@@ -372,38 +380,40 @@ static struct config_item_type node_type = { | |||
372 | .ct_owner = THIS_MODULE, | 380 | .ct_owner = THIS_MODULE, |
373 | }; | 381 | }; |
374 | 382 | ||
375 | static struct cluster *to_cluster(struct config_item *i) | 383 | static struct dlm_cluster *config_item_to_cluster(struct config_item *i) |
376 | { | 384 | { |
377 | return i ? container_of(to_config_group(i), struct cluster, group):NULL; | 385 | return i ? container_of(to_config_group(i), struct dlm_cluster, group) : |
386 | NULL; | ||
378 | } | 387 | } |
379 | 388 | ||
380 | static struct space *to_space(struct config_item *i) | 389 | static struct dlm_space *config_item_to_space(struct config_item *i) |
381 | { | 390 | { |
382 | return i ? container_of(to_config_group(i), struct space, group) : NULL; | 391 | return i ? container_of(to_config_group(i), struct dlm_space, group) : |
392 | NULL; | ||
383 | } | 393 | } |
384 | 394 | ||
385 | static struct comm *to_comm(struct config_item *i) | 395 | static struct dlm_comm *config_item_to_comm(struct config_item *i) |
386 | { | 396 | { |
387 | return i ? container_of(i, struct comm, item) : NULL; | 397 | return i ? container_of(i, struct dlm_comm, item) : NULL; |
388 | } | 398 | } |
389 | 399 | ||
390 | static struct node *to_node(struct config_item *i) | 400 | static struct dlm_node *config_item_to_node(struct config_item *i) |
391 | { | 401 | { |
392 | return i ? container_of(i, struct node, item) : NULL; | 402 | return i ? container_of(i, struct dlm_node, item) : NULL; |
393 | } | 403 | } |
394 | 404 | ||
395 | static struct config_group *make_cluster(struct config_group *g, | 405 | static struct config_group *make_cluster(struct config_group *g, |
396 | const char *name) | 406 | const char *name) |
397 | { | 407 | { |
398 | struct cluster *cl = NULL; | 408 | struct dlm_cluster *cl = NULL; |
399 | struct spaces *sps = NULL; | 409 | struct dlm_spaces *sps = NULL; |
400 | struct comms *cms = NULL; | 410 | struct dlm_comms *cms = NULL; |
401 | void *gps = NULL; | 411 | void *gps = NULL; |
402 | 412 | ||
403 | cl = kzalloc(sizeof(struct cluster), GFP_KERNEL); | 413 | cl = kzalloc(sizeof(struct dlm_cluster), GFP_KERNEL); |
404 | gps = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL); | 414 | gps = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL); |
405 | sps = kzalloc(sizeof(struct spaces), GFP_KERNEL); | 415 | sps = kzalloc(sizeof(struct dlm_spaces), GFP_KERNEL); |
406 | cms = kzalloc(sizeof(struct comms), GFP_KERNEL); | 416 | cms = kzalloc(sizeof(struct dlm_comms), GFP_KERNEL); |
407 | 417 | ||
408 | if (!cl || !gps || !sps || !cms) | 418 | if (!cl || !gps || !sps || !cms) |
409 | goto fail; | 419 | goto fail; |
@@ -443,7 +453,7 @@ static struct config_group *make_cluster(struct config_group *g, | |||
443 | 453 | ||
444 | static void drop_cluster(struct config_group *g, struct config_item *i) | 454 | static void drop_cluster(struct config_group *g, struct config_item *i) |
445 | { | 455 | { |
446 | struct cluster *cl = to_cluster(i); | 456 | struct dlm_cluster *cl = config_item_to_cluster(i); |
447 | struct config_item *tmp; | 457 | struct config_item *tmp; |
448 | int j; | 458 | int j; |
449 | 459 | ||
@@ -461,20 +471,20 @@ static void drop_cluster(struct config_group *g, struct config_item *i) | |||
461 | 471 | ||
462 | static void release_cluster(struct config_item *i) | 472 | static void release_cluster(struct config_item *i) |
463 | { | 473 | { |
464 | struct cluster *cl = to_cluster(i); | 474 | struct dlm_cluster *cl = config_item_to_cluster(i); |
465 | kfree(cl->group.default_groups); | 475 | kfree(cl->group.default_groups); |
466 | kfree(cl); | 476 | kfree(cl); |
467 | } | 477 | } |
468 | 478 | ||
469 | static struct config_group *make_space(struct config_group *g, const char *name) | 479 | static struct config_group *make_space(struct config_group *g, const char *name) |
470 | { | 480 | { |
471 | struct space *sp = NULL; | 481 | struct dlm_space *sp = NULL; |
472 | struct nodes *nds = NULL; | 482 | struct dlm_nodes *nds = NULL; |
473 | void *gps = NULL; | 483 | void *gps = NULL; |
474 | 484 | ||
475 | sp = kzalloc(sizeof(struct space), GFP_KERNEL); | 485 | sp = kzalloc(sizeof(struct dlm_space), GFP_KERNEL); |
476 | gps = kcalloc(2, sizeof(struct config_group *), GFP_KERNEL); | 486 | gps = kcalloc(2, sizeof(struct config_group *), GFP_KERNEL); |
477 | nds = kzalloc(sizeof(struct nodes), GFP_KERNEL); | 487 | nds = kzalloc(sizeof(struct dlm_nodes), GFP_KERNEL); |
478 | 488 | ||
479 | if (!sp || !gps || !nds) | 489 | if (!sp || !gps || !nds) |
480 | goto fail; | 490 | goto fail; |
@@ -500,7 +510,7 @@ static struct config_group *make_space(struct config_group *g, const char *name) | |||
500 | 510 | ||
501 | static void drop_space(struct config_group *g, struct config_item *i) | 511 | static void drop_space(struct config_group *g, struct config_item *i) |
502 | { | 512 | { |
503 | struct space *sp = to_space(i); | 513 | struct dlm_space *sp = config_item_to_space(i); |
504 | struct config_item *tmp; | 514 | struct config_item *tmp; |
505 | int j; | 515 | int j; |
506 | 516 | ||
@@ -517,16 +527,16 @@ static void drop_space(struct config_group *g, struct config_item *i) | |||
517 | 527 | ||
518 | static void release_space(struct config_item *i) | 528 | static void release_space(struct config_item *i) |
519 | { | 529 | { |
520 | struct space *sp = to_space(i); | 530 | struct dlm_space *sp = config_item_to_space(i); |
521 | kfree(sp->group.default_groups); | 531 | kfree(sp->group.default_groups); |
522 | kfree(sp); | 532 | kfree(sp); |
523 | } | 533 | } |
524 | 534 | ||
525 | static struct config_item *make_comm(struct config_group *g, const char *name) | 535 | static struct config_item *make_comm(struct config_group *g, const char *name) |
526 | { | 536 | { |
527 | struct comm *cm; | 537 | struct dlm_comm *cm; |
528 | 538 | ||
529 | cm = kzalloc(sizeof(struct comm), GFP_KERNEL); | 539 | cm = kzalloc(sizeof(struct dlm_comm), GFP_KERNEL); |
530 | if (!cm) | 540 | if (!cm) |
531 | return ERR_PTR(-ENOMEM); | 541 | return ERR_PTR(-ENOMEM); |
532 | 542 | ||
@@ -539,7 +549,7 @@ static struct config_item *make_comm(struct config_group *g, const char *name) | |||
539 | 549 | ||
540 | static void drop_comm(struct config_group *g, struct config_item *i) | 550 | static void drop_comm(struct config_group *g, struct config_item *i) |
541 | { | 551 | { |
542 | struct comm *cm = to_comm(i); | 552 | struct dlm_comm *cm = config_item_to_comm(i); |
543 | if (local_comm == cm) | 553 | if (local_comm == cm) |
544 | local_comm = NULL; | 554 | local_comm = NULL; |
545 | dlm_lowcomms_close(cm->nodeid); | 555 | dlm_lowcomms_close(cm->nodeid); |
@@ -550,16 +560,16 @@ static void drop_comm(struct config_group *g, struct config_item *i) | |||
550 | 560 | ||
551 | static void release_comm(struct config_item *i) | 561 | static void release_comm(struct config_item *i) |
552 | { | 562 | { |
553 | struct comm *cm = to_comm(i); | 563 | struct dlm_comm *cm = config_item_to_comm(i); |
554 | kfree(cm); | 564 | kfree(cm); |
555 | } | 565 | } |
556 | 566 | ||
557 | static struct config_item *make_node(struct config_group *g, const char *name) | 567 | static struct config_item *make_node(struct config_group *g, const char *name) |
558 | { | 568 | { |
559 | struct space *sp = to_space(g->cg_item.ci_parent); | 569 | struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent); |
560 | struct node *nd; | 570 | struct dlm_node *nd; |
561 | 571 | ||
562 | nd = kzalloc(sizeof(struct node), GFP_KERNEL); | 572 | nd = kzalloc(sizeof(struct dlm_node), GFP_KERNEL); |
563 | if (!nd) | 573 | if (!nd) |
564 | return ERR_PTR(-ENOMEM); | 574 | return ERR_PTR(-ENOMEM); |
565 | 575 | ||
@@ -578,8 +588,8 @@ static struct config_item *make_node(struct config_group *g, const char *name) | |||
578 | 588 | ||
579 | static void drop_node(struct config_group *g, struct config_item *i) | 589 | static void drop_node(struct config_group *g, struct config_item *i) |
580 | { | 590 | { |
581 | struct space *sp = to_space(g->cg_item.ci_parent); | 591 | struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent); |
582 | struct node *nd = to_node(i); | 592 | struct dlm_node *nd = config_item_to_node(i); |
583 | 593 | ||
584 | mutex_lock(&sp->members_lock); | 594 | mutex_lock(&sp->members_lock); |
585 | list_del(&nd->list); | 595 | list_del(&nd->list); |
@@ -591,11 +601,11 @@ static void drop_node(struct config_group *g, struct config_item *i) | |||
591 | 601 | ||
592 | static void release_node(struct config_item *i) | 602 | static void release_node(struct config_item *i) |
593 | { | 603 | { |
594 | struct node *nd = to_node(i); | 604 | struct dlm_node *nd = config_item_to_node(i); |
595 | kfree(nd); | 605 | kfree(nd); |
596 | } | 606 | } |
597 | 607 | ||
598 | static struct clusters clusters_root = { | 608 | static struct dlm_clusters clusters_root = { |
599 | .subsys = { | 609 | .subsys = { |
600 | .su_group = { | 610 | .su_group = { |
601 | .cg_item = { | 611 | .cg_item = { |
@@ -625,7 +635,7 @@ void dlm_config_exit(void) | |||
625 | static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, | 635 | static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, |
626 | char *buf) | 636 | char *buf) |
627 | { | 637 | { |
628 | struct cluster *cl = to_cluster(i); | 638 | struct dlm_cluster *cl = config_item_to_cluster(i); |
629 | struct cluster_attribute *cla = | 639 | struct cluster_attribute *cla = |
630 | container_of(a, struct cluster_attribute, attr); | 640 | container_of(a, struct cluster_attribute, attr); |
631 | return cla->show ? cla->show(cl, buf) : 0; | 641 | return cla->show ? cla->show(cl, buf) : 0; |
@@ -635,7 +645,7 @@ static ssize_t store_cluster(struct config_item *i, | |||
635 | struct configfs_attribute *a, | 645 | struct configfs_attribute *a, |
636 | const char *buf, size_t len) | 646 | const char *buf, size_t len) |
637 | { | 647 | { |
638 | struct cluster *cl = to_cluster(i); | 648 | struct dlm_cluster *cl = config_item_to_cluster(i); |
639 | struct cluster_attribute *cla = | 649 | struct cluster_attribute *cla = |
640 | container_of(a, struct cluster_attribute, attr); | 650 | container_of(a, struct cluster_attribute, attr); |
641 | return cla->store ? cla->store(cl, buf, len) : -EINVAL; | 651 | return cla->store ? cla->store(cl, buf, len) : -EINVAL; |
@@ -644,7 +654,7 @@ static ssize_t store_cluster(struct config_item *i, | |||
644 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, | 654 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, |
645 | char *buf) | 655 | char *buf) |
646 | { | 656 | { |
647 | struct comm *cm = to_comm(i); | 657 | struct dlm_comm *cm = config_item_to_comm(i); |
648 | struct comm_attribute *cma = | 658 | struct comm_attribute *cma = |
649 | container_of(a, struct comm_attribute, attr); | 659 | container_of(a, struct comm_attribute, attr); |
650 | return cma->show ? cma->show(cm, buf) : 0; | 660 | return cma->show ? cma->show(cm, buf) : 0; |
@@ -653,29 +663,31 @@ static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, | |||
653 | static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, | 663 | static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, |
654 | const char *buf, size_t len) | 664 | const char *buf, size_t len) |
655 | { | 665 | { |
656 | struct comm *cm = to_comm(i); | 666 | struct dlm_comm *cm = config_item_to_comm(i); |
657 | struct comm_attribute *cma = | 667 | struct comm_attribute *cma = |
658 | container_of(a, struct comm_attribute, attr); | 668 | container_of(a, struct comm_attribute, attr); |
659 | return cma->store ? cma->store(cm, buf, len) : -EINVAL; | 669 | return cma->store ? cma->store(cm, buf, len) : -EINVAL; |
660 | } | 670 | } |
661 | 671 | ||
662 | static ssize_t comm_nodeid_read(struct comm *cm, char *buf) | 672 | static ssize_t comm_nodeid_read(struct dlm_comm *cm, char *buf) |
663 | { | 673 | { |
664 | return sprintf(buf, "%d\n", cm->nodeid); | 674 | return sprintf(buf, "%d\n", cm->nodeid); |
665 | } | 675 | } |
666 | 676 | ||
667 | static ssize_t comm_nodeid_write(struct comm *cm, const char *buf, size_t len) | 677 | static ssize_t comm_nodeid_write(struct dlm_comm *cm, const char *buf, |
678 | size_t len) | ||
668 | { | 679 | { |
669 | cm->nodeid = simple_strtol(buf, NULL, 0); | 680 | cm->nodeid = simple_strtol(buf, NULL, 0); |
670 | return len; | 681 | return len; |
671 | } | 682 | } |
672 | 683 | ||
673 | static ssize_t comm_local_read(struct comm *cm, char *buf) | 684 | static ssize_t comm_local_read(struct dlm_comm *cm, char *buf) |
674 | { | 685 | { |
675 | return sprintf(buf, "%d\n", cm->local); | 686 | return sprintf(buf, "%d\n", cm->local); |
676 | } | 687 | } |
677 | 688 | ||
678 | static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len) | 689 | static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, |
690 | size_t len) | ||
679 | { | 691 | { |
680 | cm->local= simple_strtol(buf, NULL, 0); | 692 | cm->local= simple_strtol(buf, NULL, 0); |
681 | if (cm->local && !local_comm) | 693 | if (cm->local && !local_comm) |
@@ -683,7 +695,7 @@ static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len) | |||
683 | return len; | 695 | return len; |
684 | } | 696 | } |
685 | 697 | ||
686 | static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len) | 698 | static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) |
687 | { | 699 | { |
688 | struct sockaddr_storage *addr; | 700 | struct sockaddr_storage *addr; |
689 | 701 | ||
@@ -705,7 +717,7 @@ static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len) | |||
705 | static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, | 717 | static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, |
706 | char *buf) | 718 | char *buf) |
707 | { | 719 | { |
708 | struct node *nd = to_node(i); | 720 | struct dlm_node *nd = config_item_to_node(i); |
709 | struct node_attribute *nda = | 721 | struct node_attribute *nda = |
710 | container_of(a, struct node_attribute, attr); | 722 | container_of(a, struct node_attribute, attr); |
711 | return nda->show ? nda->show(nd, buf) : 0; | 723 | return nda->show ? nda->show(nd, buf) : 0; |
@@ -714,29 +726,31 @@ static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, | |||
714 | static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, | 726 | static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, |
715 | const char *buf, size_t len) | 727 | const char *buf, size_t len) |
716 | { | 728 | { |
717 | struct node *nd = to_node(i); | 729 | struct dlm_node *nd = config_item_to_node(i); |
718 | struct node_attribute *nda = | 730 | struct node_attribute *nda = |
719 | container_of(a, struct node_attribute, attr); | 731 | container_of(a, struct node_attribute, attr); |
720 | return nda->store ? nda->store(nd, buf, len) : -EINVAL; | 732 | return nda->store ? nda->store(nd, buf, len) : -EINVAL; |
721 | } | 733 | } |
722 | 734 | ||
723 | static ssize_t node_nodeid_read(struct node *nd, char *buf) | 735 | static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf) |
724 | { | 736 | { |
725 | return sprintf(buf, "%d\n", nd->nodeid); | 737 | return sprintf(buf, "%d\n", nd->nodeid); |
726 | } | 738 | } |
727 | 739 | ||
728 | static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len) | 740 | static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, |
741 | size_t len) | ||
729 | { | 742 | { |
730 | nd->nodeid = simple_strtol(buf, NULL, 0); | 743 | nd->nodeid = simple_strtol(buf, NULL, 0); |
731 | return len; | 744 | return len; |
732 | } | 745 | } |
733 | 746 | ||
734 | static ssize_t node_weight_read(struct node *nd, char *buf) | 747 | static ssize_t node_weight_read(struct dlm_node *nd, char *buf) |
735 | { | 748 | { |
736 | return sprintf(buf, "%d\n", nd->weight); | 749 | return sprintf(buf, "%d\n", nd->weight); |
737 | } | 750 | } |
738 | 751 | ||
739 | static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len) | 752 | static ssize_t node_weight_write(struct dlm_node *nd, const char *buf, |
753 | size_t len) | ||
740 | { | 754 | { |
741 | nd->weight = simple_strtol(buf, NULL, 0); | 755 | nd->weight = simple_strtol(buf, NULL, 0); |
742 | return len; | 756 | return len; |
@@ -746,7 +760,7 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len) | |||
746 | * Functions for the dlm to get the info that's been configured | 760 | * Functions for the dlm to get the info that's been configured |
747 | */ | 761 | */ |
748 | 762 | ||
749 | static struct space *get_space(char *name) | 763 | static struct dlm_space *get_space(char *name) |
750 | { | 764 | { |
751 | struct config_item *i; | 765 | struct config_item *i; |
752 | 766 | ||
@@ -757,18 +771,45 @@ static struct space *get_space(char *name) | |||
757 | i = config_group_find_item(space_list, name); | 771 | i = config_group_find_item(space_list, name); |
758 | mutex_unlock(&space_list->cg_subsys->su_mutex); | 772 | mutex_unlock(&space_list->cg_subsys->su_mutex); |
759 | 773 | ||
760 | return to_space(i); | 774 | return config_item_to_space(i); |
761 | } | 775 | } |
762 | 776 | ||
763 | static void put_space(struct space *sp) | 777 | static void put_space(struct dlm_space *sp) |
764 | { | 778 | { |
765 | config_item_put(&sp->group.cg_item); | 779 | config_item_put(&sp->group.cg_item); |
766 | } | 780 | } |
767 | 781 | ||
768 | static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) | 782 | static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) |
783 | { | ||
784 | switch (x->ss_family) { | ||
785 | case AF_INET: { | ||
786 | struct sockaddr_in *sinx = (struct sockaddr_in *)x; | ||
787 | struct sockaddr_in *siny = (struct sockaddr_in *)y; | ||
788 | if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) | ||
789 | return 0; | ||
790 | if (sinx->sin_port != siny->sin_port) | ||
791 | return 0; | ||
792 | break; | ||
793 | } | ||
794 | case AF_INET6: { | ||
795 | struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x; | ||
796 | struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y; | ||
797 | if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) | ||
798 | return 0; | ||
799 | if (sinx->sin6_port != siny->sin6_port) | ||
800 | return 0; | ||
801 | break; | ||
802 | } | ||
803 | default: | ||
804 | return 0; | ||
805 | } | ||
806 | return 1; | ||
807 | } | ||
808 | |||
809 | static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) | ||
769 | { | 810 | { |
770 | struct config_item *i; | 811 | struct config_item *i; |
771 | struct comm *cm = NULL; | 812 | struct dlm_comm *cm = NULL; |
772 | int found = 0; | 813 | int found = 0; |
773 | 814 | ||
774 | if (!comm_list) | 815 | if (!comm_list) |
@@ -777,7 +818,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) | |||
777 | mutex_lock(&clusters_root.subsys.su_mutex); | 818 | mutex_lock(&clusters_root.subsys.su_mutex); |
778 | 819 | ||
779 | list_for_each_entry(i, &comm_list->cg_children, ci_entry) { | 820 | list_for_each_entry(i, &comm_list->cg_children, ci_entry) { |
780 | cm = to_comm(i); | 821 | cm = config_item_to_comm(i); |
781 | 822 | ||
782 | if (nodeid) { | 823 | if (nodeid) { |
783 | if (cm->nodeid != nodeid) | 824 | if (cm->nodeid != nodeid) |
@@ -786,8 +827,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) | |||
786 | config_item_get(i); | 827 | config_item_get(i); |
787 | break; | 828 | break; |
788 | } else { | 829 | } else { |
789 | if (!cm->addr_count || | 830 | if (!cm->addr_count || !addr_compare(cm->addr[0], addr)) |
790 | memcmp(cm->addr[0], addr, sizeof(*addr))) | ||
791 | continue; | 831 | continue; |
792 | found = 1; | 832 | found = 1; |
793 | config_item_get(i); | 833 | config_item_get(i); |
@@ -801,7 +841,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) | |||
801 | return cm; | 841 | return cm; |
802 | } | 842 | } |
803 | 843 | ||
804 | static void put_comm(struct comm *cm) | 844 | static void put_comm(struct dlm_comm *cm) |
805 | { | 845 | { |
806 | config_item_put(&cm->item); | 846 | config_item_put(&cm->item); |
807 | } | 847 | } |
@@ -810,8 +850,8 @@ static void put_comm(struct comm *cm) | |||
810 | int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, | 850 | int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, |
811 | int **new_out, int *new_count_out) | 851 | int **new_out, int *new_count_out) |
812 | { | 852 | { |
813 | struct space *sp; | 853 | struct dlm_space *sp; |
814 | struct node *nd; | 854 | struct dlm_node *nd; |
815 | int i = 0, rv = 0, ids_count = 0, new_count = 0; | 855 | int i = 0, rv = 0, ids_count = 0, new_count = 0; |
816 | int *ids, *new; | 856 | int *ids, *new; |
817 | 857 | ||
@@ -874,8 +914,8 @@ int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, | |||
874 | 914 | ||
875 | int dlm_node_weight(char *lsname, int nodeid) | 915 | int dlm_node_weight(char *lsname, int nodeid) |
876 | { | 916 | { |
877 | struct space *sp; | 917 | struct dlm_space *sp; |
878 | struct node *nd; | 918 | struct dlm_node *nd; |
879 | int w = -EEXIST; | 919 | int w = -EEXIST; |
880 | 920 | ||
881 | sp = get_space(lsname); | 921 | sp = get_space(lsname); |
@@ -897,7 +937,7 @@ int dlm_node_weight(char *lsname, int nodeid) | |||
897 | 937 | ||
898 | int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) | 938 | int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) |
899 | { | 939 | { |
900 | struct comm *cm = get_comm(nodeid, NULL); | 940 | struct dlm_comm *cm = get_comm(nodeid, NULL); |
901 | if (!cm) | 941 | if (!cm) |
902 | return -EEXIST; | 942 | return -EEXIST; |
903 | if (!cm->addr_count) | 943 | if (!cm->addr_count) |
@@ -909,7 +949,7 @@ int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) | |||
909 | 949 | ||
910 | int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) | 950 | int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) |
911 | { | 951 | { |
912 | struct comm *cm = get_comm(0, addr); | 952 | struct dlm_comm *cm = get_comm(0, addr); |
913 | if (!cm) | 953 | if (!cm) |
914 | return -EEXIST; | 954 | return -EEXIST; |
915 | *nodeid = cm->nodeid; | 955 | *nodeid = cm->nodeid; |
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 5a7ac33b629c..868e4c9ef127 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -441,8 +441,11 @@ struct dlm_ls { | |||
441 | uint32_t ls_global_id; /* global unique lockspace ID */ | 441 | uint32_t ls_global_id; /* global unique lockspace ID */ |
442 | uint32_t ls_exflags; | 442 | uint32_t ls_exflags; |
443 | int ls_lvblen; | 443 | int ls_lvblen; |
444 | int ls_count; /* reference count */ | 444 | int ls_count; /* refcount of processes in |
445 | the dlm using this ls */ | ||
446 | int ls_create_count; /* create/release refcount */ | ||
445 | unsigned long ls_flags; /* LSFL_ */ | 447 | unsigned long ls_flags; /* LSFL_ */ |
448 | unsigned long ls_scan_time; | ||
446 | struct kobject ls_kobj; | 449 | struct kobject ls_kobj; |
447 | 450 | ||
448 | struct dlm_rsbtable *ls_rsbtbl; | 451 | struct dlm_rsbtable *ls_rsbtbl; |
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 499e16759e96..d910501de6d2 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -23,6 +23,7 @@ | |||
23 | #include "lock.h" | 23 | #include "lock.h" |
24 | #include "recover.h" | 24 | #include "recover.h" |
25 | #include "requestqueue.h" | 25 | #include "requestqueue.h" |
26 | #include "user.h" | ||
26 | 27 | ||
27 | static int ls_count; | 28 | static int ls_count; |
28 | static struct mutex ls_lock; | 29 | static struct mutex ls_lock; |
@@ -211,19 +212,41 @@ void dlm_lockspace_exit(void) | |||
211 | kset_unregister(dlm_kset); | 212 | kset_unregister(dlm_kset); |
212 | } | 213 | } |
213 | 214 | ||
215 | static struct dlm_ls *find_ls_to_scan(void) | ||
216 | { | ||
217 | struct dlm_ls *ls; | ||
218 | |||
219 | spin_lock(&lslist_lock); | ||
220 | list_for_each_entry(ls, &lslist, ls_list) { | ||
221 | if (time_after_eq(jiffies, ls->ls_scan_time + | ||
222 | dlm_config.ci_scan_secs * HZ)) { | ||
223 | spin_unlock(&lslist_lock); | ||
224 | return ls; | ||
225 | } | ||
226 | } | ||
227 | spin_unlock(&lslist_lock); | ||
228 | return NULL; | ||
229 | } | ||
230 | |||
214 | static int dlm_scand(void *data) | 231 | static int dlm_scand(void *data) |
215 | { | 232 | { |
216 | struct dlm_ls *ls; | 233 | struct dlm_ls *ls; |
234 | int timeout_jiffies = dlm_config.ci_scan_secs * HZ; | ||
217 | 235 | ||
218 | while (!kthread_should_stop()) { | 236 | while (!kthread_should_stop()) { |
219 | list_for_each_entry(ls, &lslist, ls_list) { | 237 | ls = find_ls_to_scan(); |
238 | if (ls) { | ||
220 | if (dlm_lock_recovery_try(ls)) { | 239 | if (dlm_lock_recovery_try(ls)) { |
240 | ls->ls_scan_time = jiffies; | ||
221 | dlm_scan_rsbs(ls); | 241 | dlm_scan_rsbs(ls); |
222 | dlm_scan_timeout(ls); | 242 | dlm_scan_timeout(ls); |
223 | dlm_unlock_recovery(ls); | 243 | dlm_unlock_recovery(ls); |
244 | } else { | ||
245 | ls->ls_scan_time += HZ; | ||
224 | } | 246 | } |
247 | } else { | ||
248 | schedule_timeout_interruptible(timeout_jiffies); | ||
225 | } | 249 | } |
226 | schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ); | ||
227 | } | 250 | } |
228 | return 0; | 251 | return 0; |
229 | } | 252 | } |
@@ -246,23 +269,6 @@ static void dlm_scand_stop(void) | |||
246 | kthread_stop(scand_task); | 269 | kthread_stop(scand_task); |
247 | } | 270 | } |
248 | 271 | ||
249 | static struct dlm_ls *dlm_find_lockspace_name(char *name, int namelen) | ||
250 | { | ||
251 | struct dlm_ls *ls; | ||
252 | |||
253 | spin_lock(&lslist_lock); | ||
254 | |||
255 | list_for_each_entry(ls, &lslist, ls_list) { | ||
256 | if (ls->ls_namelen == namelen && | ||
257 | memcmp(ls->ls_name, name, namelen) == 0) | ||
258 | goto out; | ||
259 | } | ||
260 | ls = NULL; | ||
261 | out: | ||
262 | spin_unlock(&lslist_lock); | ||
263 | return ls; | ||
264 | } | ||
265 | |||
266 | struct dlm_ls *dlm_find_lockspace_global(uint32_t id) | 272 | struct dlm_ls *dlm_find_lockspace_global(uint32_t id) |
267 | { | 273 | { |
268 | struct dlm_ls *ls; | 274 | struct dlm_ls *ls; |
@@ -327,6 +333,7 @@ static void remove_lockspace(struct dlm_ls *ls) | |||
327 | for (;;) { | 333 | for (;;) { |
328 | spin_lock(&lslist_lock); | 334 | spin_lock(&lslist_lock); |
329 | if (ls->ls_count == 0) { | 335 | if (ls->ls_count == 0) { |
336 | WARN_ON(ls->ls_create_count != 0); | ||
330 | list_del(&ls->ls_list); | 337 | list_del(&ls->ls_list); |
331 | spin_unlock(&lslist_lock); | 338 | spin_unlock(&lslist_lock); |
332 | return; | 339 | return; |
@@ -381,7 +388,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
381 | uint32_t flags, int lvblen) | 388 | uint32_t flags, int lvblen) |
382 | { | 389 | { |
383 | struct dlm_ls *ls; | 390 | struct dlm_ls *ls; |
384 | int i, size, error = -ENOMEM; | 391 | int i, size, error; |
385 | int do_unreg = 0; | 392 | int do_unreg = 0; |
386 | 393 | ||
387 | if (namelen > DLM_LOCKSPACE_LEN) | 394 | if (namelen > DLM_LOCKSPACE_LEN) |
@@ -393,12 +400,37 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
393 | if (!try_module_get(THIS_MODULE)) | 400 | if (!try_module_get(THIS_MODULE)) |
394 | return -EINVAL; | 401 | return -EINVAL; |
395 | 402 | ||
396 | ls = dlm_find_lockspace_name(name, namelen); | 403 | if (!dlm_user_daemon_available()) { |
397 | if (ls) { | 404 | module_put(THIS_MODULE); |
398 | *lockspace = ls; | 405 | return -EUNATCH; |
406 | } | ||
407 | |||
408 | error = 0; | ||
409 | |||
410 | spin_lock(&lslist_lock); | ||
411 | list_for_each_entry(ls, &lslist, ls_list) { | ||
412 | WARN_ON(ls->ls_create_count <= 0); | ||
413 | if (ls->ls_namelen != namelen) | ||
414 | continue; | ||
415 | if (memcmp(ls->ls_name, name, namelen)) | ||
416 | continue; | ||
417 | if (flags & DLM_LSFL_NEWEXCL) { | ||
418 | error = -EEXIST; | ||
419 | break; | ||
420 | } | ||
421 | ls->ls_create_count++; | ||
399 | module_put(THIS_MODULE); | 422 | module_put(THIS_MODULE); |
400 | return -EEXIST; | 423 | error = 1; /* not an error, return 0 */ |
424 | break; | ||
401 | } | 425 | } |
426 | spin_unlock(&lslist_lock); | ||
427 | |||
428 | if (error < 0) | ||
429 | goto out; | ||
430 | if (error) | ||
431 | goto ret_zero; | ||
432 | |||
433 | error = -ENOMEM; | ||
402 | 434 | ||
403 | ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL); | 435 | ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL); |
404 | if (!ls) | 436 | if (!ls) |
@@ -408,6 +440,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
408 | ls->ls_lvblen = lvblen; | 440 | ls->ls_lvblen = lvblen; |
409 | ls->ls_count = 0; | 441 | ls->ls_count = 0; |
410 | ls->ls_flags = 0; | 442 | ls->ls_flags = 0; |
443 | ls->ls_scan_time = jiffies; | ||
411 | 444 | ||
412 | if (flags & DLM_LSFL_TIMEWARN) | 445 | if (flags & DLM_LSFL_TIMEWARN) |
413 | set_bit(LSFL_TIMEWARN, &ls->ls_flags); | 446 | set_bit(LSFL_TIMEWARN, &ls->ls_flags); |
@@ -418,8 +451,9 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
418 | ls->ls_allocation = GFP_KERNEL; | 451 | ls->ls_allocation = GFP_KERNEL; |
419 | 452 | ||
420 | /* ls_exflags are forced to match among nodes, and we don't | 453 | /* ls_exflags are forced to match among nodes, and we don't |
421 | need to require all nodes to have TIMEWARN or FS set */ | 454 | need to require all nodes to have some flags set */ |
422 | ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS)); | 455 | ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS | |
456 | DLM_LSFL_NEWEXCL)); | ||
423 | 457 | ||
424 | size = dlm_config.ci_rsbtbl_size; | 458 | size = dlm_config.ci_rsbtbl_size; |
425 | ls->ls_rsbtbl_size = size; | 459 | ls->ls_rsbtbl_size = size; |
@@ -510,6 +544,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
510 | down_write(&ls->ls_in_recovery); | 544 | down_write(&ls->ls_in_recovery); |
511 | 545 | ||
512 | spin_lock(&lslist_lock); | 546 | spin_lock(&lslist_lock); |
547 | ls->ls_create_count = 1; | ||
513 | list_add(&ls->ls_list, &lslist); | 548 | list_add(&ls->ls_list, &lslist); |
514 | spin_unlock(&lslist_lock); | 549 | spin_unlock(&lslist_lock); |
515 | 550 | ||
@@ -548,7 +583,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
548 | dlm_create_debug_file(ls); | 583 | dlm_create_debug_file(ls); |
549 | 584 | ||
550 | log_debug(ls, "join complete"); | 585 | log_debug(ls, "join complete"); |
551 | 586 | ret_zero: | |
552 | *lockspace = ls; | 587 | *lockspace = ls; |
553 | return 0; | 588 | return 0; |
554 | 589 | ||
@@ -635,13 +670,34 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
635 | struct dlm_lkb *lkb; | 670 | struct dlm_lkb *lkb; |
636 | struct dlm_rsb *rsb; | 671 | struct dlm_rsb *rsb; |
637 | struct list_head *head; | 672 | struct list_head *head; |
638 | int i; | 673 | int i, busy, rv; |
639 | int busy = lockspace_busy(ls); | 674 | |
675 | busy = lockspace_busy(ls); | ||
676 | |||
677 | spin_lock(&lslist_lock); | ||
678 | if (ls->ls_create_count == 1) { | ||
679 | if (busy > force) | ||
680 | rv = -EBUSY; | ||
681 | else { | ||
682 | /* remove_lockspace takes ls off lslist */ | ||
683 | ls->ls_create_count = 0; | ||
684 | rv = 0; | ||
685 | } | ||
686 | } else if (ls->ls_create_count > 1) { | ||
687 | rv = --ls->ls_create_count; | ||
688 | } else { | ||
689 | rv = -EINVAL; | ||
690 | } | ||
691 | spin_unlock(&lslist_lock); | ||
640 | 692 | ||
641 | if (busy > force) | 693 | if (rv) { |
642 | return -EBUSY; | 694 | log_debug(ls, "release_lockspace no remove %d", rv); |
695 | return rv; | ||
696 | } | ||
697 | |||
698 | dlm_device_deregister(ls); | ||
643 | 699 | ||
644 | if (force < 3) | 700 | if (force < 3 && dlm_user_daemon_available()) |
645 | do_uevent(ls, 0); | 701 | do_uevent(ls, 0); |
646 | 702 | ||
647 | dlm_recoverd_stop(ls); | 703 | dlm_recoverd_stop(ls); |
@@ -720,15 +776,10 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
720 | dlm_clear_members(ls); | 776 | dlm_clear_members(ls); |
721 | dlm_clear_members_gone(ls); | 777 | dlm_clear_members_gone(ls); |
722 | kfree(ls->ls_node_array); | 778 | kfree(ls->ls_node_array); |
779 | log_debug(ls, "release_lockspace final free"); | ||
723 | kobject_put(&ls->ls_kobj); | 780 | kobject_put(&ls->ls_kobj); |
724 | /* The ls structure will be freed when the kobject is done with */ | 781 | /* The ls structure will be freed when the kobject is done with */ |
725 | 782 | ||
726 | mutex_lock(&ls_lock); | ||
727 | ls_count--; | ||
728 | if (!ls_count) | ||
729 | threads_stop(); | ||
730 | mutex_unlock(&ls_lock); | ||
731 | |||
732 | module_put(THIS_MODULE); | 783 | module_put(THIS_MODULE); |
733 | return 0; | 784 | return 0; |
734 | } | 785 | } |
@@ -750,11 +801,38 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
750 | int dlm_release_lockspace(void *lockspace, int force) | 801 | int dlm_release_lockspace(void *lockspace, int force) |
751 | { | 802 | { |
752 | struct dlm_ls *ls; | 803 | struct dlm_ls *ls; |
804 | int error; | ||
753 | 805 | ||
754 | ls = dlm_find_lockspace_local(lockspace); | 806 | ls = dlm_find_lockspace_local(lockspace); |
755 | if (!ls) | 807 | if (!ls) |
756 | return -EINVAL; | 808 | return -EINVAL; |
757 | dlm_put_lockspace(ls); | 809 | dlm_put_lockspace(ls); |
758 | return release_lockspace(ls, force); | 810 | |
811 | mutex_lock(&ls_lock); | ||
812 | error = release_lockspace(ls, force); | ||
813 | if (!error) | ||
814 | ls_count--; | ||
815 | else if (!ls_count) | ||
816 | threads_stop(); | ||
817 | mutex_unlock(&ls_lock); | ||
818 | |||
819 | return error; | ||
820 | } | ||
821 | |||
822 | void dlm_stop_lockspaces(void) | ||
823 | { | ||
824 | struct dlm_ls *ls; | ||
825 | |||
826 | restart: | ||
827 | spin_lock(&lslist_lock); | ||
828 | list_for_each_entry(ls, &lslist, ls_list) { | ||
829 | if (!test_bit(LSFL_RUNNING, &ls->ls_flags)) | ||
830 | continue; | ||
831 | spin_unlock(&lslist_lock); | ||
832 | log_error(ls, "no userland control daemon, stopping lockspace"); | ||
833 | dlm_ls_stop(ls); | ||
834 | goto restart; | ||
835 | } | ||
836 | spin_unlock(&lslist_lock); | ||
759 | } | 837 | } |
760 | 838 | ||
diff --git a/fs/dlm/lockspace.h b/fs/dlm/lockspace.h index 891eabbdd021..f879f87901f8 100644 --- a/fs/dlm/lockspace.h +++ b/fs/dlm/lockspace.h | |||
@@ -20,6 +20,7 @@ struct dlm_ls *dlm_find_lockspace_global(uint32_t id); | |||
20 | struct dlm_ls *dlm_find_lockspace_local(void *id); | 20 | struct dlm_ls *dlm_find_lockspace_local(void *id); |
21 | struct dlm_ls *dlm_find_lockspace_device(int minor); | 21 | struct dlm_ls *dlm_find_lockspace_device(int minor); |
22 | void dlm_put_lockspace(struct dlm_ls *ls); | 22 | void dlm_put_lockspace(struct dlm_ls *ls); |
23 | void dlm_stop_lockspaces(void); | ||
23 | 24 | ||
24 | #endif /* __LOCKSPACE_DOT_H__ */ | 25 | #endif /* __LOCKSPACE_DOT_H__ */ |
25 | 26 | ||
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 929e48ae7591..b3832c67194a 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2006-2007 Red Hat, Inc. All rights reserved. | 2 | * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved. |
3 | * | 3 | * |
4 | * This copyrighted material is made available to anyone wishing to use, | 4 | * This copyrighted material is made available to anyone wishing to use, |
5 | * modify, copy, or redistribute it subject to the terms and conditions | 5 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -15,7 +15,6 @@ | |||
15 | #include <linux/poll.h> | 15 | #include <linux/poll.h> |
16 | #include <linux/signal.h> | 16 | #include <linux/signal.h> |
17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
18 | #include <linux/smp_lock.h> | ||
19 | #include <linux/dlm.h> | 18 | #include <linux/dlm.h> |
20 | #include <linux/dlm_device.h> | 19 | #include <linux/dlm_device.h> |
21 | 20 | ||
@@ -27,6 +26,8 @@ | |||
27 | 26 | ||
28 | static const char name_prefix[] = "dlm"; | 27 | static const char name_prefix[] = "dlm"; |
29 | static const struct file_operations device_fops; | 28 | static const struct file_operations device_fops; |
29 | static atomic_t dlm_monitor_opened; | ||
30 | static int dlm_monitor_unused = 1; | ||
30 | 31 | ||
31 | #ifdef CONFIG_COMPAT | 32 | #ifdef CONFIG_COMPAT |
32 | 33 | ||
@@ -340,10 +341,15 @@ static int device_user_deadlock(struct dlm_user_proc *proc, | |||
340 | return error; | 341 | return error; |
341 | } | 342 | } |
342 | 343 | ||
343 | static int create_misc_device(struct dlm_ls *ls, char *name) | 344 | static int dlm_device_register(struct dlm_ls *ls, char *name) |
344 | { | 345 | { |
345 | int error, len; | 346 | int error, len; |
346 | 347 | ||
348 | /* The device is already registered. This happens when the | ||
349 | lockspace is created multiple times from userspace. */ | ||
350 | if (ls->ls_device.name) | ||
351 | return 0; | ||
352 | |||
347 | error = -ENOMEM; | 353 | error = -ENOMEM; |
348 | len = strlen(name) + strlen(name_prefix) + 2; | 354 | len = strlen(name) + strlen(name_prefix) + 2; |
349 | ls->ls_device.name = kzalloc(len, GFP_KERNEL); | 355 | ls->ls_device.name = kzalloc(len, GFP_KERNEL); |
@@ -363,6 +369,22 @@ fail: | |||
363 | return error; | 369 | return error; |
364 | } | 370 | } |
365 | 371 | ||
372 | int dlm_device_deregister(struct dlm_ls *ls) | ||
373 | { | ||
374 | int error; | ||
375 | |||
376 | /* The device is not registered. This happens when the lockspace | ||
377 | was never used from userspace, or when device_create_lockspace() | ||
378 | calls dlm_release_lockspace() after the register fails. */ | ||
379 | if (!ls->ls_device.name) | ||
380 | return 0; | ||
381 | |||
382 | error = misc_deregister(&ls->ls_device); | ||
383 | if (!error) | ||
384 | kfree(ls->ls_device.name); | ||
385 | return error; | ||
386 | } | ||
387 | |||
366 | static int device_user_purge(struct dlm_user_proc *proc, | 388 | static int device_user_purge(struct dlm_user_proc *proc, |
367 | struct dlm_purge_params *params) | 389 | struct dlm_purge_params *params) |
368 | { | 390 | { |
@@ -397,7 +419,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params) | |||
397 | if (!ls) | 419 | if (!ls) |
398 | return -ENOENT; | 420 | return -ENOENT; |
399 | 421 | ||
400 | error = create_misc_device(ls, params->name); | 422 | error = dlm_device_register(ls, params->name); |
401 | dlm_put_lockspace(ls); | 423 | dlm_put_lockspace(ls); |
402 | 424 | ||
403 | if (error) | 425 | if (error) |
@@ -421,31 +443,22 @@ static int device_remove_lockspace(struct dlm_lspace_params *params) | |||
421 | if (!ls) | 443 | if (!ls) |
422 | return -ENOENT; | 444 | return -ENOENT; |
423 | 445 | ||
424 | /* Deregister the misc device first, so we don't have | ||
425 | * a device that's not attached to a lockspace. If | ||
426 | * dlm_release_lockspace fails then we can recreate it | ||
427 | */ | ||
428 | error = misc_deregister(&ls->ls_device); | ||
429 | if (error) { | ||
430 | dlm_put_lockspace(ls); | ||
431 | goto out; | ||
432 | } | ||
433 | kfree(ls->ls_device.name); | ||
434 | |||
435 | if (params->flags & DLM_USER_LSFLG_FORCEFREE) | 446 | if (params->flags & DLM_USER_LSFLG_FORCEFREE) |
436 | force = 2; | 447 | force = 2; |
437 | 448 | ||
438 | lockspace = ls->ls_local_handle; | 449 | lockspace = ls->ls_local_handle; |
450 | dlm_put_lockspace(ls); | ||
439 | 451 | ||
440 | /* dlm_release_lockspace waits for references to go to zero, | 452 | /* The final dlm_release_lockspace waits for references to go to |
441 | so all processes will need to close their device for the ls | 453 | zero, so all processes will need to close their device for the |
442 | before the release will procede */ | 454 | ls before the release will proceed. release also calls the |
455 | device_deregister above. Converting a positive return value | ||
456 | from release to zero means that userspace won't know when its | ||
457 | release was the final one, but it shouldn't need to know. */ | ||
443 | 458 | ||
444 | dlm_put_lockspace(ls); | ||
445 | error = dlm_release_lockspace(lockspace, force); | 459 | error = dlm_release_lockspace(lockspace, force); |
446 | if (error) | 460 | if (error > 0) |
447 | create_misc_device(ls, ls->ls_name); | 461 | error = 0; |
448 | out: | ||
449 | return error; | 462 | return error; |
450 | } | 463 | } |
451 | 464 | ||
@@ -527,8 +540,10 @@ static ssize_t device_write(struct file *file, const char __user *buf, | |||
527 | k32buf = (struct dlm_write_request32 *)kbuf; | 540 | k32buf = (struct dlm_write_request32 *)kbuf; |
528 | kbuf = kmalloc(count + 1 + (sizeof(struct dlm_write_request) - | 541 | kbuf = kmalloc(count + 1 + (sizeof(struct dlm_write_request) - |
529 | sizeof(struct dlm_write_request32)), GFP_KERNEL); | 542 | sizeof(struct dlm_write_request32)), GFP_KERNEL); |
530 | if (!kbuf) | 543 | if (!kbuf) { |
544 | kfree(k32buf); | ||
531 | return -ENOMEM; | 545 | return -ENOMEM; |
546 | } | ||
532 | 547 | ||
533 | if (proc) | 548 | if (proc) |
534 | set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); | 549 | set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); |
@@ -539,8 +554,10 @@ static ssize_t device_write(struct file *file, const char __user *buf, | |||
539 | 554 | ||
540 | /* do we really need this? can a write happen after a close? */ | 555 | /* do we really need this? can a write happen after a close? */ |
541 | if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) && | 556 | if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) && |
542 | (proc && test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))) | 557 | (proc && test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))) { |
543 | return -EINVAL; | 558 | error = -EINVAL; |
559 | goto out_free; | ||
560 | } | ||
544 | 561 | ||
545 | sigfillset(&allsigs); | 562 | sigfillset(&allsigs); |
546 | sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); | 563 | sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); |
@@ -619,17 +636,13 @@ static int device_open(struct inode *inode, struct file *file) | |||
619 | struct dlm_user_proc *proc; | 636 | struct dlm_user_proc *proc; |
620 | struct dlm_ls *ls; | 637 | struct dlm_ls *ls; |
621 | 638 | ||
622 | lock_kernel(); | ||
623 | ls = dlm_find_lockspace_device(iminor(inode)); | 639 | ls = dlm_find_lockspace_device(iminor(inode)); |
624 | if (!ls) { | 640 | if (!ls) |
625 | unlock_kernel(); | ||
626 | return -ENOENT; | 641 | return -ENOENT; |
627 | } | ||
628 | 642 | ||
629 | proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL); | 643 | proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL); |
630 | if (!proc) { | 644 | if (!proc) { |
631 | dlm_put_lockspace(ls); | 645 | dlm_put_lockspace(ls); |
632 | unlock_kernel(); | ||
633 | return -ENOMEM; | 646 | return -ENOMEM; |
634 | } | 647 | } |
635 | 648 | ||
@@ -641,7 +654,6 @@ static int device_open(struct inode *inode, struct file *file) | |||
641 | spin_lock_init(&proc->locks_spin); | 654 | spin_lock_init(&proc->locks_spin); |
642 | init_waitqueue_head(&proc->wait); | 655 | init_waitqueue_head(&proc->wait); |
643 | file->private_data = proc; | 656 | file->private_data = proc; |
644 | unlock_kernel(); | ||
645 | 657 | ||
646 | return 0; | 658 | return 0; |
647 | } | 659 | } |
@@ -874,9 +886,28 @@ static unsigned int device_poll(struct file *file, poll_table *wait) | |||
874 | return 0; | 886 | return 0; |
875 | } | 887 | } |
876 | 888 | ||
889 | int dlm_user_daemon_available(void) | ||
890 | { | ||
891 | /* dlm_controld hasn't started (or, has started, but not | ||
892 | properly populated configfs) */ | ||
893 | |||
894 | if (!dlm_our_nodeid()) | ||
895 | return 0; | ||
896 | |||
897 | /* This is to deal with versions of dlm_controld that don't | ||
898 | know about the monitor device. We assume that if the | ||
899 | dlm_controld was started (above), but the monitor device | ||
900 | was never opened, that it's an old version. dlm_controld | ||
901 | should open the monitor device before populating configfs. */ | ||
902 | |||
903 | if (dlm_monitor_unused) | ||
904 | return 1; | ||
905 | |||
906 | return atomic_read(&dlm_monitor_opened) ? 1 : 0; | ||
907 | } | ||
908 | |||
877 | static int ctl_device_open(struct inode *inode, struct file *file) | 909 | static int ctl_device_open(struct inode *inode, struct file *file) |
878 | { | 910 | { |
879 | cycle_kernel_lock(); | ||
880 | file->private_data = NULL; | 911 | file->private_data = NULL; |
881 | return 0; | 912 | return 0; |
882 | } | 913 | } |
@@ -886,6 +917,20 @@ static int ctl_device_close(struct inode *inode, struct file *file) | |||
886 | return 0; | 917 | return 0; |
887 | } | 918 | } |
888 | 919 | ||
920 | static int monitor_device_open(struct inode *inode, struct file *file) | ||
921 | { | ||
922 | atomic_inc(&dlm_monitor_opened); | ||
923 | dlm_monitor_unused = 0; | ||
924 | return 0; | ||
925 | } | ||
926 | |||
927 | static int monitor_device_close(struct inode *inode, struct file *file) | ||
928 | { | ||
929 | if (atomic_dec_and_test(&dlm_monitor_opened)) | ||
930 | dlm_stop_lockspaces(); | ||
931 | return 0; | ||
932 | } | ||
933 | |||
889 | static const struct file_operations device_fops = { | 934 | static const struct file_operations device_fops = { |
890 | .open = device_open, | 935 | .open = device_open, |
891 | .release = device_close, | 936 | .release = device_close, |
@@ -909,19 +954,42 @@ static struct miscdevice ctl_device = { | |||
909 | .minor = MISC_DYNAMIC_MINOR, | 954 | .minor = MISC_DYNAMIC_MINOR, |
910 | }; | 955 | }; |
911 | 956 | ||
957 | static const struct file_operations monitor_device_fops = { | ||
958 | .open = monitor_device_open, | ||
959 | .release = monitor_device_close, | ||
960 | .owner = THIS_MODULE, | ||
961 | }; | ||
962 | |||
963 | static struct miscdevice monitor_device = { | ||
964 | .name = "dlm-monitor", | ||
965 | .fops = &monitor_device_fops, | ||
966 | .minor = MISC_DYNAMIC_MINOR, | ||
967 | }; | ||
968 | |||
912 | int __init dlm_user_init(void) | 969 | int __init dlm_user_init(void) |
913 | { | 970 | { |
914 | int error; | 971 | int error; |
915 | 972 | ||
973 | atomic_set(&dlm_monitor_opened, 0); | ||
974 | |||
916 | error = misc_register(&ctl_device); | 975 | error = misc_register(&ctl_device); |
917 | if (error) | 976 | if (error) { |
918 | log_print("misc_register failed for control device"); | 977 | log_print("misc_register failed for control device"); |
978 | goto out; | ||
979 | } | ||
919 | 980 | ||
981 | error = misc_register(&monitor_device); | ||
982 | if (error) { | ||
983 | log_print("misc_register failed for monitor device"); | ||
984 | misc_deregister(&ctl_device); | ||
985 | } | ||
986 | out: | ||
920 | return error; | 987 | return error; |
921 | } | 988 | } |
922 | 989 | ||
923 | void dlm_user_exit(void) | 990 | void dlm_user_exit(void) |
924 | { | 991 | { |
925 | misc_deregister(&ctl_device); | 992 | misc_deregister(&ctl_device); |
993 | misc_deregister(&monitor_device); | ||
926 | } | 994 | } |
927 | 995 | ||
diff --git a/fs/dlm/user.h b/fs/dlm/user.h index d38e9f3e4151..35eb6a13d616 100644 --- a/fs/dlm/user.h +++ b/fs/dlm/user.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2006 Red Hat, Inc. All rights reserved. | 2 | * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved. |
3 | * | 3 | * |
4 | * This copyrighted material is made available to anyone wishing to use, | 4 | * This copyrighted material is made available to anyone wishing to use, |
5 | * modify, copy, or redistribute it subject to the terms and conditions | 5 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -12,5 +12,7 @@ | |||
12 | void dlm_user_add_ast(struct dlm_lkb *lkb, int type); | 12 | void dlm_user_add_ast(struct dlm_lkb *lkb, int type); |
13 | int dlm_user_init(void); | 13 | int dlm_user_init(void); |
14 | void dlm_user_exit(void); | 14 | void dlm_user_exit(void); |
15 | int dlm_device_deregister(struct dlm_ls *ls); | ||
16 | int dlm_user_daemon_available(void); | ||
15 | 17 | ||
16 | #endif | 18 | #endif |
diff --git a/fs/dquot.c b/fs/dquot.c index 8ec4d6cc7633..ad7e59003e04 100644 --- a/fs/dquot.c +++ b/fs/dquot.c | |||
@@ -895,10 +895,9 @@ static void print_warning(struct dquot *dquot, const int warntype) | |||
895 | warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot)) | 895 | warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot)) |
896 | return; | 896 | return; |
897 | 897 | ||
898 | mutex_lock(&tty_mutex); | ||
899 | tty = get_current_tty(); | 898 | tty = get_current_tty(); |
900 | if (!tty) | 899 | if (!tty) |
901 | goto out_lock; | 900 | return; |
902 | tty_write_message(tty, dquot->dq_sb->s_id); | 901 | tty_write_message(tty, dquot->dq_sb->s_id); |
903 | if (warntype == QUOTA_NL_ISOFTWARN || warntype == QUOTA_NL_BSOFTWARN) | 902 | if (warntype == QUOTA_NL_ISOFTWARN || warntype == QUOTA_NL_BSOFTWARN) |
904 | tty_write_message(tty, ": warning, "); | 903 | tty_write_message(tty, ": warning, "); |
@@ -926,8 +925,7 @@ static void print_warning(struct dquot *dquot, const int warntype) | |||
926 | break; | 925 | break; |
927 | } | 926 | } |
928 | tty_write_message(tty, msg); | 927 | tty_write_message(tty, msg); |
929 | out_lock: | 928 | tty_kref_put(tty); |
930 | mutex_unlock(&tty_mutex); | ||
931 | } | 929 | } |
932 | #endif | 930 | #endif |
933 | 931 | ||
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 448dfd597b5f..8ebe9a5d1d99 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -211,7 +211,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, | |||
211 | ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, | 211 | ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, |
212 | ecryptfs_opt_encrypted_view, ecryptfs_opt_err }; | 212 | ecryptfs_opt_encrypted_view, ecryptfs_opt_err }; |
213 | 213 | ||
214 | static match_table_t tokens = { | 214 | static const match_table_t tokens = { |
215 | {ecryptfs_opt_sig, "sig=%s"}, | 215 | {ecryptfs_opt_sig, "sig=%s"}, |
216 | {ecryptfs_opt_ecryptfs_sig, "ecryptfs_sig=%s"}, | 216 | {ecryptfs_opt_ecryptfs_sig, "ecryptfs_sig=%s"}, |
217 | {ecryptfs_opt_cipher, "cipher=%s"}, | 217 | {ecryptfs_opt_cipher, "cipher=%s"}, |
diff --git a/fs/efs/namei.c b/fs/efs/namei.c index 3a404e7fad53..291abb11e20e 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c | |||
@@ -74,8 +74,7 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei | |||
74 | } | 74 | } |
75 | unlock_kernel(); | 75 | unlock_kernel(); |
76 | 76 | ||
77 | d_add(dentry, inode); | 77 | return d_splice_alias(inode, dentry); |
78 | return NULL; | ||
79 | } | 78 | } |
80 | 79 | ||
81 | static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino, | 80 | static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino, |
diff --git a/fs/efs/super.c b/fs/efs/super.c index 567b134fa1f1..73b19cfc91fc 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c | |||
@@ -341,8 +341,6 @@ static int efs_statfs(struct dentry *dentry, struct kstatfs *buf) { | |||
341 | sb->inode_blocks * | 341 | sb->inode_blocks * |
342 | (EFS_BLOCKSIZE / sizeof(struct efs_dinode)); | 342 | (EFS_BLOCKSIZE / sizeof(struct efs_dinode)); |
343 | buf->f_ffree = sb->inode_free; /* free inodes */ | 343 | buf->f_ffree = sb->inode_free; /* free inodes */ |
344 | buf->f_fsid.val[0] = (sb->fs_magic >> 16) & 0xffff; /* fs ID */ | ||
345 | buf->f_fsid.val[1] = sb->fs_magic & 0xffff; /* fs ID */ | ||
346 | buf->f_namelen = EFS_MAXNAMELEN; /* max filename length */ | 344 | buf->f_namelen = EFS_MAXNAMELEN; /* max filename length */ |
347 | 345 | ||
348 | return 0; | 346 | return 0; |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 0c87474f7917..7cc0eb756b55 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -1041,10 +1041,7 @@ retry: | |||
1041 | } | 1041 | } |
1042 | 1042 | ||
1043 | /* | 1043 | /* |
1044 | * It opens an eventpoll file descriptor. The "size" parameter is there | 1044 | * Open an eventpoll file descriptor. |
1045 | * for historical reasons, when epoll was using an hash instead of an | ||
1046 | * RB tree. With the current implementation, the "size" parameter is ignored | ||
1047 | * (besides sanity checks). | ||
1048 | */ | 1045 | */ |
1049 | asmlinkage long sys_epoll_create1(int flags) | 1046 | asmlinkage long sys_epoll_create1(int flags) |
1050 | { | 1047 | { |
@@ -752,11 +752,11 @@ static int exec_mmap(struct mm_struct *mm) | |||
752 | tsk->active_mm = mm; | 752 | tsk->active_mm = mm; |
753 | activate_mm(active_mm, mm); | 753 | activate_mm(active_mm, mm); |
754 | task_unlock(tsk); | 754 | task_unlock(tsk); |
755 | mm_update_next_owner(old_mm); | ||
756 | arch_pick_mmap_layout(mm); | 755 | arch_pick_mmap_layout(mm); |
757 | if (old_mm) { | 756 | if (old_mm) { |
758 | up_read(&old_mm->mmap_sem); | 757 | up_read(&old_mm->mmap_sem); |
759 | BUG_ON(active_mm != old_mm); | 758 | BUG_ON(active_mm != old_mm); |
759 | mm_update_next_owner(old_mm); | ||
760 | mmput(old_mm); | 760 | mmput(old_mm); |
761 | return 0; | 761 | return 0; |
762 | } | 762 | } |
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 47d88da2d33b..bae998c1e44e 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h | |||
@@ -133,6 +133,8 @@ extern void ext2_truncate (struct inode *); | |||
133 | extern int ext2_setattr (struct dentry *, struct iattr *); | 133 | extern int ext2_setattr (struct dentry *, struct iattr *); |
134 | extern void ext2_set_inode_flags(struct inode *inode); | 134 | extern void ext2_set_inode_flags(struct inode *inode); |
135 | extern void ext2_get_inode_flags(struct ext2_inode_info *); | 135 | extern void ext2_get_inode_flags(struct ext2_inode_info *); |
136 | extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
137 | u64 start, u64 len); | ||
136 | int __ext2_write_begin(struct file *file, struct address_space *mapping, | 138 | int __ext2_write_begin(struct file *file, struct address_space *mapping, |
137 | loff_t pos, unsigned len, unsigned flags, | 139 | loff_t pos, unsigned len, unsigned flags, |
138 | struct page **pagep, void **fsdata); | 140 | struct page **pagep, void **fsdata); |
diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 5f2fa9c36293..45ed07122182 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c | |||
@@ -86,4 +86,5 @@ const struct inode_operations ext2_file_inode_operations = { | |||
86 | #endif | 86 | #endif |
87 | .setattr = ext2_setattr, | 87 | .setattr = ext2_setattr, |
88 | .permission = ext2_permission, | 88 | .permission = ext2_permission, |
89 | .fiemap = ext2_fiemap, | ||
89 | }; | 90 | }; |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 991d6dfeb51f..7658b33e2653 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/writeback.h> | 31 | #include <linux/writeback.h> |
32 | #include <linux/buffer_head.h> | 32 | #include <linux/buffer_head.h> |
33 | #include <linux/mpage.h> | 33 | #include <linux/mpage.h> |
34 | #include <linux/fiemap.h> | ||
34 | #include "ext2.h" | 35 | #include "ext2.h" |
35 | #include "acl.h" | 36 | #include "acl.h" |
36 | #include "xip.h" | 37 | #include "xip.h" |
@@ -704,6 +705,13 @@ int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_ | |||
704 | 705 | ||
705 | } | 706 | } |
706 | 707 | ||
708 | int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
709 | u64 start, u64 len) | ||
710 | { | ||
711 | return generic_block_fiemap(inode, fieinfo, start, len, | ||
712 | ext2_get_block); | ||
713 | } | ||
714 | |||
707 | static int ext2_writepage(struct page *page, struct writeback_control *wbc) | 715 | static int ext2_writepage(struct page *page, struct writeback_control *wbc) |
708 | { | 716 | { |
709 | return block_write_full_page(page, ext2_get_block, wbc); | 717 | return block_write_full_page(page, ext2_get_block, wbc); |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index fd88c7b43e66..647cd888ac87 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -393,7 +393,7 @@ enum { | |||
393 | Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation | 393 | Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation |
394 | }; | 394 | }; |
395 | 395 | ||
396 | static match_table_t tokens = { | 396 | static const match_table_t tokens = { |
397 | {Opt_bsd_df, "bsddf"}, | 397 | {Opt_bsd_df, "bsddf"}, |
398 | {Opt_minix_df, "minixdf"}, | 398 | {Opt_minix_df, "minixdf"}, |
399 | {Opt_grpid, "grpid"}, | 399 | {Opt_grpid, "grpid"}, |
diff --git a/fs/ext3/file.c b/fs/ext3/file.c index acc4913d3019..3be1e0689c9a 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c | |||
@@ -134,5 +134,6 @@ const struct inode_operations ext3_file_inode_operations = { | |||
134 | .removexattr = generic_removexattr, | 134 | .removexattr = generic_removexattr, |
135 | #endif | 135 | #endif |
136 | .permission = ext3_permission, | 136 | .permission = ext3_permission, |
137 | .fiemap = ext3_fiemap, | ||
137 | }; | 138 | }; |
138 | 139 | ||
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 507d8689b111..ebfec4d0148e 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/mpage.h> | 36 | #include <linux/mpage.h> |
37 | #include <linux/uio.h> | 37 | #include <linux/uio.h> |
38 | #include <linux/bio.h> | 38 | #include <linux/bio.h> |
39 | #include <linux/fiemap.h> | ||
39 | #include "xattr.h" | 40 | #include "xattr.h" |
40 | #include "acl.h" | 41 | #include "acl.h" |
41 | 42 | ||
@@ -981,6 +982,13 @@ out: | |||
981 | return ret; | 982 | return ret; |
982 | } | 983 | } |
983 | 984 | ||
985 | int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
986 | u64 start, u64 len) | ||
987 | { | ||
988 | return generic_block_fiemap(inode, fieinfo, start, len, | ||
989 | ext3_get_block); | ||
990 | } | ||
991 | |||
984 | /* | 992 | /* |
985 | * `handle' can be NULL if create is zero | 993 | * `handle' can be NULL if create is zero |
986 | */ | 994 | */ |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index f38a5afc39a1..399a96a6c556 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -760,7 +760,7 @@ enum { | |||
760 | Opt_grpquota | 760 | Opt_grpquota |
761 | }; | 761 | }; |
762 | 762 | ||
763 | static match_table_t tokens = { | 763 | static const match_table_t tokens = { |
764 | {Opt_bsd_df, "bsddf"}, | 764 | {Opt_bsd_df, "bsddf"}, |
765 | {Opt_minix_df, "minixdf"}, | 765 | {Opt_minix_df, "minixdf"}, |
766 | {Opt_grpid, "grpid"}, | 766 | {Opt_grpid, "grpid"}, |
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index ac6fa8ca0a2f..a8ff003a00f7 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile | |||
@@ -2,12 +2,12 @@ | |||
2 | # Makefile for the linux ext4-filesystem routines. | 2 | # Makefile for the linux ext4-filesystem routines. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o | 5 | obj-$(CONFIG_EXT4_FS) += ext4.o |
6 | 6 | ||
7 | ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ | 7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ |
8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ | 8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ |
9 | ext4_jbd2.o migrate.o mballoc.o | 9 | ext4_jbd2.o migrate.o mballoc.o |
10 | 10 | ||
11 | ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o | 11 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o |
12 | ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o | 12 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o |
13 | ext4dev-$(CONFIG_EXT4DEV_FS_SECURITY) += xattr_security.o | 13 | ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o |
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index cd2b855a07d6..cb45257a246e 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h | |||
@@ -51,18 +51,18 @@ static inline int ext4_acl_count(size_t size) | |||
51 | } | 51 | } |
52 | } | 52 | } |
53 | 53 | ||
54 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 54 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
55 | 55 | ||
56 | /* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl | 56 | /* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl |
57 | if the ACL has not been cached */ | 57 | if the ACL has not been cached */ |
58 | #define EXT4_ACL_NOT_CACHED ((void *)-1) | 58 | #define EXT4_ACL_NOT_CACHED ((void *)-1) |
59 | 59 | ||
60 | /* acl.c */ | 60 | /* acl.c */ |
61 | extern int ext4_permission (struct inode *, int); | 61 | extern int ext4_permission(struct inode *, int); |
62 | extern int ext4_acl_chmod (struct inode *); | 62 | extern int ext4_acl_chmod(struct inode *); |
63 | extern int ext4_init_acl (handle_t *, struct inode *, struct inode *); | 63 | extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); |
64 | 64 | ||
65 | #else /* CONFIG_EXT4DEV_FS_POSIX_ACL */ | 65 | #else /* CONFIG_EXT4_FS_POSIX_ACL */ |
66 | #include <linux/sched.h> | 66 | #include <linux/sched.h> |
67 | #define ext4_permission NULL | 67 | #define ext4_permission NULL |
68 | 68 | ||
@@ -77,5 +77,5 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) | |||
77 | { | 77 | { |
78 | return 0; | 78 | return 0; |
79 | } | 79 | } |
80 | #endif /* CONFIG_EXT4DEV_FS_POSIX_ACL */ | 80 | #endif /* CONFIG_EXT4_FS_POSIX_ACL */ |
81 | 81 | ||
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1ae5004e93fc..bd2ece228827 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -83,6 +83,7 @@ static int ext4_group_used_meta_blocks(struct super_block *sb, | |||
83 | } | 83 | } |
84 | return used_blocks; | 84 | return used_blocks; |
85 | } | 85 | } |
86 | |||
86 | /* Initializes an uninitialized block bitmap if given, and returns the | 87 | /* Initializes an uninitialized block bitmap if given, and returns the |
87 | * number of blocks free in the group. */ | 88 | * number of blocks free in the group. */ |
88 | unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | 89 | unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, |
@@ -132,7 +133,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
132 | */ | 133 | */ |
133 | group_blocks = ext4_blocks_count(sbi->s_es) - | 134 | group_blocks = ext4_blocks_count(sbi->s_es) - |
134 | le32_to_cpu(sbi->s_es->s_first_data_block) - | 135 | le32_to_cpu(sbi->s_es->s_first_data_block) - |
135 | (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1)); | 136 | (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1)); |
136 | } else { | 137 | } else { |
137 | group_blocks = EXT4_BLOCKS_PER_GROUP(sb); | 138 | group_blocks = EXT4_BLOCKS_PER_GROUP(sb); |
138 | } | 139 | } |
@@ -200,20 +201,20 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
200 | * @bh: pointer to the buffer head to store the block | 201 | * @bh: pointer to the buffer head to store the block |
201 | * group descriptor | 202 | * group descriptor |
202 | */ | 203 | */ |
203 | struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | 204 | struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, |
204 | ext4_group_t block_group, | 205 | ext4_group_t block_group, |
205 | struct buffer_head ** bh) | 206 | struct buffer_head **bh) |
206 | { | 207 | { |
207 | unsigned long group_desc; | 208 | unsigned long group_desc; |
208 | unsigned long offset; | 209 | unsigned long offset; |
209 | struct ext4_group_desc * desc; | 210 | struct ext4_group_desc *desc; |
210 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 211 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
211 | 212 | ||
212 | if (block_group >= sbi->s_groups_count) { | 213 | if (block_group >= sbi->s_groups_count) { |
213 | ext4_error (sb, "ext4_get_group_desc", | 214 | ext4_error(sb, "ext4_get_group_desc", |
214 | "block_group >= groups_count - " | 215 | "block_group >= groups_count - " |
215 | "block_group = %lu, groups_count = %lu", | 216 | "block_group = %lu, groups_count = %lu", |
216 | block_group, sbi->s_groups_count); | 217 | block_group, sbi->s_groups_count); |
217 | 218 | ||
218 | return NULL; | 219 | return NULL; |
219 | } | 220 | } |
@@ -222,10 +223,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | |||
222 | group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); | 223 | group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); |
223 | offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); | 224 | offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); |
224 | if (!sbi->s_group_desc[group_desc]) { | 225 | if (!sbi->s_group_desc[group_desc]) { |
225 | ext4_error (sb, "ext4_get_group_desc", | 226 | ext4_error(sb, "ext4_get_group_desc", |
226 | "Group descriptor not loaded - " | 227 | "Group descriptor not loaded - " |
227 | "block_group = %lu, group_desc = %lu, desc = %lu", | 228 | "block_group = %lu, group_desc = %lu, desc = %lu", |
228 | block_group, group_desc, offset); | 229 | block_group, group_desc, offset); |
229 | return NULL; | 230 | return NULL; |
230 | } | 231 | } |
231 | 232 | ||
@@ -302,8 +303,8 @@ err_out: | |||
302 | struct buffer_head * | 303 | struct buffer_head * |
303 | ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | 304 | ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) |
304 | { | 305 | { |
305 | struct ext4_group_desc * desc; | 306 | struct ext4_group_desc *desc; |
306 | struct buffer_head * bh = NULL; | 307 | struct buffer_head *bh = NULL; |
307 | ext4_fsblk_t bitmap_blk; | 308 | ext4_fsblk_t bitmap_blk; |
308 | 309 | ||
309 | desc = ext4_get_group_desc(sb, block_group, NULL); | 310 | desc = ext4_get_group_desc(sb, block_group, NULL); |
@@ -318,9 +319,11 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
318 | block_group, bitmap_blk); | 319 | block_group, bitmap_blk); |
319 | return NULL; | 320 | return NULL; |
320 | } | 321 | } |
321 | if (bh_uptodate_or_lock(bh)) | 322 | if (buffer_uptodate(bh) && |
323 | !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) | ||
322 | return bh; | 324 | return bh; |
323 | 325 | ||
326 | lock_buffer(bh); | ||
324 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 327 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); |
325 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 328 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
326 | ext4_init_block_bitmap(sb, bh, block_group, desc); | 329 | ext4_init_block_bitmap(sb, bh, block_group, desc); |
@@ -345,301 +348,6 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
345 | */ | 348 | */ |
346 | return bh; | 349 | return bh; |
347 | } | 350 | } |
348 | /* | ||
349 | * The reservation window structure operations | ||
350 | * -------------------------------------------- | ||
351 | * Operations include: | ||
352 | * dump, find, add, remove, is_empty, find_next_reservable_window, etc. | ||
353 | * | ||
354 | * We use a red-black tree to represent per-filesystem reservation | ||
355 | * windows. | ||
356 | * | ||
357 | */ | ||
358 | |||
359 | /** | ||
360 | * __rsv_window_dump() -- Dump the filesystem block allocation reservation map | ||
361 | * @rb_root: root of per-filesystem reservation rb tree | ||
362 | * @verbose: verbose mode | ||
363 | * @fn: function which wishes to dump the reservation map | ||
364 | * | ||
365 | * If verbose is turned on, it will print the whole block reservation | ||
366 | * windows(start, end). Otherwise, it will only print out the "bad" windows, | ||
367 | * those windows that overlap with their immediate neighbors. | ||
368 | */ | ||
369 | #if 1 | ||
370 | static void __rsv_window_dump(struct rb_root *root, int verbose, | ||
371 | const char *fn) | ||
372 | { | ||
373 | struct rb_node *n; | ||
374 | struct ext4_reserve_window_node *rsv, *prev; | ||
375 | int bad; | ||
376 | |||
377 | restart: | ||
378 | n = rb_first(root); | ||
379 | bad = 0; | ||
380 | prev = NULL; | ||
381 | |||
382 | printk("Block Allocation Reservation Windows Map (%s):\n", fn); | ||
383 | while (n) { | ||
384 | rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); | ||
385 | if (verbose) | ||
386 | printk("reservation window 0x%p " | ||
387 | "start: %llu, end: %llu\n", | ||
388 | rsv, rsv->rsv_start, rsv->rsv_end); | ||
389 | if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { | ||
390 | printk("Bad reservation %p (start >= end)\n", | ||
391 | rsv); | ||
392 | bad = 1; | ||
393 | } | ||
394 | if (prev && prev->rsv_end >= rsv->rsv_start) { | ||
395 | printk("Bad reservation %p (prev->end >= start)\n", | ||
396 | rsv); | ||
397 | bad = 1; | ||
398 | } | ||
399 | if (bad) { | ||
400 | if (!verbose) { | ||
401 | printk("Restarting reservation walk in verbose mode\n"); | ||
402 | verbose = 1; | ||
403 | goto restart; | ||
404 | } | ||
405 | } | ||
406 | n = rb_next(n); | ||
407 | prev = rsv; | ||
408 | } | ||
409 | printk("Window map complete.\n"); | ||
410 | BUG_ON(bad); | ||
411 | } | ||
412 | #define rsv_window_dump(root, verbose) \ | ||
413 | __rsv_window_dump((root), (verbose), __func__) | ||
414 | #else | ||
415 | #define rsv_window_dump(root, verbose) do {} while (0) | ||
416 | #endif | ||
417 | |||
418 | /** | ||
419 | * goal_in_my_reservation() | ||
420 | * @rsv: inode's reservation window | ||
421 | * @grp_goal: given goal block relative to the allocation block group | ||
422 | * @group: the current allocation block group | ||
423 | * @sb: filesystem super block | ||
424 | * | ||
425 | * Test if the given goal block (group relative) is within the file's | ||
426 | * own block reservation window range. | ||
427 | * | ||
428 | * If the reservation window is outside the goal allocation group, return 0; | ||
429 | * grp_goal (given goal block) could be -1, which means no specific | ||
430 | * goal block. In this case, always return 1. | ||
431 | * If the goal block is within the reservation window, return 1; | ||
432 | * otherwise, return 0; | ||
433 | */ | ||
434 | static int | ||
435 | goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal, | ||
436 | ext4_group_t group, struct super_block *sb) | ||
437 | { | ||
438 | ext4_fsblk_t group_first_block, group_last_block; | ||
439 | |||
440 | group_first_block = ext4_group_first_block_no(sb, group); | ||
441 | group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); | ||
442 | |||
443 | if ((rsv->_rsv_start > group_last_block) || | ||
444 | (rsv->_rsv_end < group_first_block)) | ||
445 | return 0; | ||
446 | if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) | ||
447 | || (grp_goal + group_first_block > rsv->_rsv_end))) | ||
448 | return 0; | ||
449 | return 1; | ||
450 | } | ||
451 | |||
452 | /** | ||
453 | * search_reserve_window() | ||
454 | * @rb_root: root of reservation tree | ||
455 | * @goal: target allocation block | ||
456 | * | ||
457 | * Find the reserved window which includes the goal, or the previous one | ||
458 | * if the goal is not in any window. | ||
459 | * Returns NULL if there are no windows or if all windows start after the goal. | ||
460 | */ | ||
461 | static struct ext4_reserve_window_node * | ||
462 | search_reserve_window(struct rb_root *root, ext4_fsblk_t goal) | ||
463 | { | ||
464 | struct rb_node *n = root->rb_node; | ||
465 | struct ext4_reserve_window_node *rsv; | ||
466 | |||
467 | if (!n) | ||
468 | return NULL; | ||
469 | |||
470 | do { | ||
471 | rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); | ||
472 | |||
473 | if (goal < rsv->rsv_start) | ||
474 | n = n->rb_left; | ||
475 | else if (goal > rsv->rsv_end) | ||
476 | n = n->rb_right; | ||
477 | else | ||
478 | return rsv; | ||
479 | } while (n); | ||
480 | /* | ||
481 | * We've fallen off the end of the tree: the goal wasn't inside | ||
482 | * any particular node. OK, the previous node must be to one | ||
483 | * side of the interval containing the goal. If it's the RHS, | ||
484 | * we need to back up one. | ||
485 | */ | ||
486 | if (rsv->rsv_start > goal) { | ||
487 | n = rb_prev(&rsv->rsv_node); | ||
488 | rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); | ||
489 | } | ||
490 | return rsv; | ||
491 | } | ||
492 | |||
493 | /** | ||
494 | * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree. | ||
495 | * @sb: super block | ||
496 | * @rsv: reservation window to add | ||
497 | * | ||
498 | * Must be called with rsv_lock hold. | ||
499 | */ | ||
500 | void ext4_rsv_window_add(struct super_block *sb, | ||
501 | struct ext4_reserve_window_node *rsv) | ||
502 | { | ||
503 | struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root; | ||
504 | struct rb_node *node = &rsv->rsv_node; | ||
505 | ext4_fsblk_t start = rsv->rsv_start; | ||
506 | |||
507 | struct rb_node ** p = &root->rb_node; | ||
508 | struct rb_node * parent = NULL; | ||
509 | struct ext4_reserve_window_node *this; | ||
510 | |||
511 | while (*p) | ||
512 | { | ||
513 | parent = *p; | ||
514 | this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node); | ||
515 | |||
516 | if (start < this->rsv_start) | ||
517 | p = &(*p)->rb_left; | ||
518 | else if (start > this->rsv_end) | ||
519 | p = &(*p)->rb_right; | ||
520 | else { | ||
521 | rsv_window_dump(root, 1); | ||
522 | BUG(); | ||
523 | } | ||
524 | } | ||
525 | |||
526 | rb_link_node(node, parent, p); | ||
527 | rb_insert_color(node, root); | ||
528 | } | ||
529 | |||
530 | /** | ||
531 | * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree | ||
532 | * @sb: super block | ||
533 | * @rsv: reservation window to remove | ||
534 | * | ||
535 | * Mark the block reservation window as not allocated, and unlink it | ||
536 | * from the filesystem reservation window rb tree. Must be called with | ||
537 | * rsv_lock hold. | ||
538 | */ | ||
539 | static void rsv_window_remove(struct super_block *sb, | ||
540 | struct ext4_reserve_window_node *rsv) | ||
541 | { | ||
542 | rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
543 | rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
544 | rsv->rsv_alloc_hit = 0; | ||
545 | rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root); | ||
546 | } | ||
547 | |||
548 | /* | ||
549 | * rsv_is_empty() -- Check if the reservation window is allocated. | ||
550 | * @rsv: given reservation window to check | ||
551 | * | ||
552 | * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED. | ||
553 | */ | ||
554 | static inline int rsv_is_empty(struct ext4_reserve_window *rsv) | ||
555 | { | ||
556 | /* a valid reservation end block could not be 0 */ | ||
557 | return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
558 | } | ||
559 | |||
560 | /** | ||
561 | * ext4_init_block_alloc_info() | ||
562 | * @inode: file inode structure | ||
563 | * | ||
564 | * Allocate and initialize the reservation window structure, and | ||
565 | * link the window to the ext4 inode structure at last | ||
566 | * | ||
567 | * The reservation window structure is only dynamically allocated | ||
568 | * and linked to ext4 inode the first time the open file | ||
569 | * needs a new block. So, before every ext4_new_block(s) call, for | ||
570 | * regular files, we should check whether the reservation window | ||
571 | * structure exists or not. In the latter case, this function is called. | ||
572 | * Fail to do so will result in block reservation being turned off for that | ||
573 | * open file. | ||
574 | * | ||
575 | * This function is called from ext4_get_blocks_handle(), also called | ||
576 | * when setting the reservation window size through ioctl before the file | ||
577 | * is open for write (needs block allocation). | ||
578 | * | ||
579 | * Needs down_write(i_data_sem) protection prior to call this function. | ||
580 | */ | ||
581 | void ext4_init_block_alloc_info(struct inode *inode) | ||
582 | { | ||
583 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
584 | struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; | ||
585 | struct super_block *sb = inode->i_sb; | ||
586 | |||
587 | block_i = kmalloc(sizeof(*block_i), GFP_NOFS); | ||
588 | if (block_i) { | ||
589 | struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node; | ||
590 | |||
591 | rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
592 | rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
593 | |||
594 | /* | ||
595 | * if filesystem is mounted with NORESERVATION, the goal | ||
596 | * reservation window size is set to zero to indicate | ||
597 | * block reservation is off | ||
598 | */ | ||
599 | if (!test_opt(sb, RESERVATION)) | ||
600 | rsv->rsv_goal_size = 0; | ||
601 | else | ||
602 | rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS; | ||
603 | rsv->rsv_alloc_hit = 0; | ||
604 | block_i->last_alloc_logical_block = 0; | ||
605 | block_i->last_alloc_physical_block = 0; | ||
606 | } | ||
607 | ei->i_block_alloc_info = block_i; | ||
608 | } | ||
609 | |||
610 | /** | ||
611 | * ext4_discard_reservation() | ||
612 | * @inode: inode | ||
613 | * | ||
614 | * Discard(free) block reservation window on last file close, or truncate | ||
615 | * or at last iput(). | ||
616 | * | ||
617 | * It is being called in three cases: | ||
618 | * ext4_release_file(): last writer close the file | ||
619 | * ext4_clear_inode(): last iput(), when nobody link to this file. | ||
620 | * ext4_truncate(): when the block indirect map is about to change. | ||
621 | * | ||
622 | */ | ||
623 | void ext4_discard_reservation(struct inode *inode) | ||
624 | { | ||
625 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
626 | struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; | ||
627 | struct ext4_reserve_window_node *rsv; | ||
628 | spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock; | ||
629 | |||
630 | ext4_mb_discard_inode_preallocations(inode); | ||
631 | |||
632 | if (!block_i) | ||
633 | return; | ||
634 | |||
635 | rsv = &block_i->rsv_window_node; | ||
636 | if (!rsv_is_empty(&rsv->rsv_window)) { | ||
637 | spin_lock(rsv_lock); | ||
638 | if (!rsv_is_empty(&rsv->rsv_window)) | ||
639 | rsv_window_remove(inode->i_sb, rsv); | ||
640 | spin_unlock(rsv_lock); | ||
641 | } | ||
642 | } | ||
643 | 351 | ||
644 | /** | 352 | /** |
645 | * ext4_free_blocks_sb() -- Free given blocks and update quota | 353 | * ext4_free_blocks_sb() -- Free given blocks and update quota |
@@ -648,6 +356,13 @@ void ext4_discard_reservation(struct inode *inode) | |||
648 | * @block: start physcial block to free | 356 | * @block: start physcial block to free |
649 | * @count: number of blocks to free | 357 | * @count: number of blocks to free |
650 | * @pdquot_freed_blocks: pointer to quota | 358 | * @pdquot_freed_blocks: pointer to quota |
359 | * | ||
360 | * XXX This function is only used by the on-line resizing code, which | ||
361 | * should probably be fixed up to call the mballoc variant. There | ||
362 | * this needs to be cleaned up later; in fact, I'm not convinced this | ||
363 | * is 100% correct in the face of the mballoc code. The online resizing | ||
364 | * code needs to be fixed up to more tightly (and correctly) interlock | ||
365 | * with the mballoc code. | ||
651 | */ | 366 | */ |
652 | void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | 367 | void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, |
653 | ext4_fsblk_t block, unsigned long count, | 368 | ext4_fsblk_t block, unsigned long count, |
@@ -659,8 +374,8 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | |||
659 | ext4_grpblk_t bit; | 374 | ext4_grpblk_t bit; |
660 | unsigned long i; | 375 | unsigned long i; |
661 | unsigned long overflow; | 376 | unsigned long overflow; |
662 | struct ext4_group_desc * desc; | 377 | struct ext4_group_desc *desc; |
663 | struct ext4_super_block * es; | 378 | struct ext4_super_block *es; |
664 | struct ext4_sb_info *sbi; | 379 | struct ext4_sb_info *sbi; |
665 | int err = 0, ret; | 380 | int err = 0, ret; |
666 | ext4_grpblk_t group_freed; | 381 | ext4_grpblk_t group_freed; |
@@ -671,13 +386,13 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | |||
671 | if (block < le32_to_cpu(es->s_first_data_block) || | 386 | if (block < le32_to_cpu(es->s_first_data_block) || |
672 | block + count < block || | 387 | block + count < block || |
673 | block + count > ext4_blocks_count(es)) { | 388 | block + count > ext4_blocks_count(es)) { |
674 | ext4_error (sb, "ext4_free_blocks", | 389 | ext4_error(sb, "ext4_free_blocks", |
675 | "Freeing blocks not in datazone - " | 390 | "Freeing blocks not in datazone - " |
676 | "block = %llu, count = %lu", block, count); | 391 | "block = %llu, count = %lu", block, count); |
677 | goto error_return; | 392 | goto error_return; |
678 | } | 393 | } |
679 | 394 | ||
680 | ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1); | 395 | ext4_debug("freeing block(s) %llu-%llu\n", block, block + count - 1); |
681 | 396 | ||
682 | do_more: | 397 | do_more: |
683 | overflow = 0; | 398 | overflow = 0; |
@@ -694,7 +409,7 @@ do_more: | |||
694 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); | 409 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); |
695 | if (!bitmap_bh) | 410 | if (!bitmap_bh) |
696 | goto error_return; | 411 | goto error_return; |
697 | desc = ext4_get_group_desc (sb, block_group, &gd_bh); | 412 | desc = ext4_get_group_desc(sb, block_group, &gd_bh); |
698 | if (!desc) | 413 | if (!desc) |
699 | goto error_return; | 414 | goto error_return; |
700 | 415 | ||
@@ -703,10 +418,10 @@ do_more: | |||
703 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || | 418 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || |
704 | in_range(block + count - 1, ext4_inode_table(sb, desc), | 419 | in_range(block + count - 1, ext4_inode_table(sb, desc), |
705 | sbi->s_itb_per_group)) { | 420 | sbi->s_itb_per_group)) { |
706 | ext4_error (sb, "ext4_free_blocks", | 421 | ext4_error(sb, "ext4_free_blocks", |
707 | "Freeing blocks in system zones - " | 422 | "Freeing blocks in system zones - " |
708 | "Block = %llu, count = %lu", | 423 | "Block = %llu, count = %lu", |
709 | block, count); | 424 | block, count); |
710 | goto error_return; | 425 | goto error_return; |
711 | } | 426 | } |
712 | 427 | ||
@@ -848,7 +563,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
848 | ext4_fsblk_t block, unsigned long count, | 563 | ext4_fsblk_t block, unsigned long count, |
849 | int metadata) | 564 | int metadata) |
850 | { | 565 | { |
851 | struct super_block * sb; | 566 | struct super_block *sb; |
852 | unsigned long dquot_freed_blocks; | 567 | unsigned long dquot_freed_blocks; |
853 | 568 | ||
854 | /* this isn't the right place to decide whether block is metadata | 569 | /* this isn't the right place to decide whether block is metadata |
@@ -859,748 +574,52 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
859 | 574 | ||
860 | sb = inode->i_sb; | 575 | sb = inode->i_sb; |
861 | 576 | ||
862 | if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info) | 577 | ext4_mb_free_blocks(handle, inode, block, count, |
863 | ext4_free_blocks_sb(handle, sb, block, count, | 578 | metadata, &dquot_freed_blocks); |
864 | &dquot_freed_blocks); | ||
865 | else | ||
866 | ext4_mb_free_blocks(handle, inode, block, count, | ||
867 | metadata, &dquot_freed_blocks); | ||
868 | if (dquot_freed_blocks) | 579 | if (dquot_freed_blocks) |
869 | DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); | 580 | DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); |
870 | return; | 581 | return; |
871 | } | 582 | } |
872 | 583 | ||
873 | /** | 584 | int ext4_claim_free_blocks(struct ext4_sb_info *sbi, |
874 | * ext4_test_allocatable() | 585 | s64 nblocks) |
875 | * @nr: given allocation block group | ||
876 | * @bh: bufferhead contains the bitmap of the given block group | ||
877 | * | ||
878 | * For ext4 allocations, we must not reuse any blocks which are | ||
879 | * allocated in the bitmap buffer's "last committed data" copy. This | ||
880 | * prevents deletes from freeing up the page for reuse until we have | ||
881 | * committed the delete transaction. | ||
882 | * | ||
883 | * If we didn't do this, then deleting something and reallocating it as | ||
884 | * data would allow the old block to be overwritten before the | ||
885 | * transaction committed (because we force data to disk before commit). | ||
886 | * This would lead to corruption if we crashed between overwriting the | ||
887 | * data and committing the delete. | ||
888 | * | ||
889 | * @@@ We may want to make this allocation behaviour conditional on | ||
890 | * data-writes at some point, and disable it for metadata allocations or | ||
891 | * sync-data inodes. | ||
892 | */ | ||
893 | static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh) | ||
894 | { | ||
895 | int ret; | ||
896 | struct journal_head *jh = bh2jh(bh); | ||
897 | |||
898 | if (ext4_test_bit(nr, bh->b_data)) | ||
899 | return 0; | ||
900 | |||
901 | jbd_lock_bh_state(bh); | ||
902 | if (!jh->b_committed_data) | ||
903 | ret = 1; | ||
904 | else | ||
905 | ret = !ext4_test_bit(nr, jh->b_committed_data); | ||
906 | jbd_unlock_bh_state(bh); | ||
907 | return ret; | ||
908 | } | ||
909 | |||
910 | /** | ||
911 | * bitmap_search_next_usable_block() | ||
912 | * @start: the starting block (group relative) of the search | ||
913 | * @bh: bufferhead contains the block group bitmap | ||
914 | * @maxblocks: the ending block (group relative) of the reservation | ||
915 | * | ||
916 | * The bitmap search --- search forward alternately through the actual | ||
917 | * bitmap on disk and the last-committed copy in journal, until we find a | ||
918 | * bit free in both bitmaps. | ||
919 | */ | ||
920 | static ext4_grpblk_t | ||
921 | bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, | ||
922 | ext4_grpblk_t maxblocks) | ||
923 | { | 586 | { |
924 | ext4_grpblk_t next; | 587 | s64 free_blocks, dirty_blocks; |
925 | struct journal_head *jh = bh2jh(bh); | 588 | s64 root_blocks = 0; |
926 | 589 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; | |
927 | while (start < maxblocks) { | 590 | struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; |
928 | next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start); | ||
929 | if (next >= maxblocks) | ||
930 | return -1; | ||
931 | if (ext4_test_allocatable(next, bh)) | ||
932 | return next; | ||
933 | jbd_lock_bh_state(bh); | ||
934 | if (jh->b_committed_data) | ||
935 | start = ext4_find_next_zero_bit(jh->b_committed_data, | ||
936 | maxblocks, next); | ||
937 | jbd_unlock_bh_state(bh); | ||
938 | } | ||
939 | return -1; | ||
940 | } | ||
941 | 591 | ||
942 | /** | 592 | free_blocks = percpu_counter_read_positive(fbc); |
943 | * find_next_usable_block() | 593 | dirty_blocks = percpu_counter_read_positive(dbc); |
944 | * @start: the starting block (group relative) to find next | ||
945 | * allocatable block in bitmap. | ||
946 | * @bh: bufferhead contains the block group bitmap | ||
947 | * @maxblocks: the ending block (group relative) for the search | ||
948 | * | ||
949 | * Find an allocatable block in a bitmap. We honor both the bitmap and | ||
950 | * its last-committed copy (if that exists), and perform the "most | ||
951 | * appropriate allocation" algorithm of looking for a free block near | ||
952 | * the initial goal; then for a free byte somewhere in the bitmap; then | ||
953 | * for any free bit in the bitmap. | ||
954 | */ | ||
955 | static ext4_grpblk_t | ||
956 | find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, | ||
957 | ext4_grpblk_t maxblocks) | ||
958 | { | ||
959 | ext4_grpblk_t here, next; | ||
960 | char *p, *r; | ||
961 | |||
962 | if (start > 0) { | ||
963 | /* | ||
964 | * The goal was occupied; search forward for a free | ||
965 | * block within the next XX blocks. | ||
966 | * | ||
967 | * end_goal is more or less random, but it has to be | ||
968 | * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the | ||
969 | * next 64-bit boundary is simple.. | ||
970 | */ | ||
971 | ext4_grpblk_t end_goal = (start + 63) & ~63; | ||
972 | if (end_goal > maxblocks) | ||
973 | end_goal = maxblocks; | ||
974 | here = ext4_find_next_zero_bit(bh->b_data, end_goal, start); | ||
975 | if (here < end_goal && ext4_test_allocatable(here, bh)) | ||
976 | return here; | ||
977 | ext4_debug("Bit not found near goal\n"); | ||
978 | } | ||
979 | |||
980 | here = start; | ||
981 | if (here < 0) | ||
982 | here = 0; | ||
983 | |||
984 | p = ((char *)bh->b_data) + (here >> 3); | ||
985 | r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); | ||
986 | next = (r - ((char *)bh->b_data)) << 3; | ||
987 | |||
988 | if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh)) | ||
989 | return next; | ||
990 | |||
991 | /* | ||
992 | * The bitmap search --- search forward alternately through the actual | ||
993 | * bitmap and the last-committed copy until we find a bit free in | ||
994 | * both | ||
995 | */ | ||
996 | here = bitmap_search_next_usable_block(here, bh, maxblocks); | ||
997 | return here; | ||
998 | } | ||
999 | |||
1000 | /** | ||
1001 | * claim_block() | ||
1002 | * @block: the free block (group relative) to allocate | ||
1003 | * @bh: the bufferhead containts the block group bitmap | ||
1004 | * | ||
1005 | * We think we can allocate this block in this bitmap. Try to set the bit. | ||
1006 | * If that succeeds then check that nobody has allocated and then freed the | ||
1007 | * block since we saw that is was not marked in b_committed_data. If it _was_ | ||
1008 | * allocated and freed then clear the bit in the bitmap again and return | ||
1009 | * zero (failure). | ||
1010 | */ | ||
1011 | static inline int | ||
1012 | claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh) | ||
1013 | { | ||
1014 | struct journal_head *jh = bh2jh(bh); | ||
1015 | int ret; | ||
1016 | |||
1017 | if (ext4_set_bit_atomic(lock, block, bh->b_data)) | ||
1018 | return 0; | ||
1019 | jbd_lock_bh_state(bh); | ||
1020 | if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) { | ||
1021 | ext4_clear_bit_atomic(lock, block, bh->b_data); | ||
1022 | ret = 0; | ||
1023 | } else { | ||
1024 | ret = 1; | ||
1025 | } | ||
1026 | jbd_unlock_bh_state(bh); | ||
1027 | return ret; | ||
1028 | } | ||
1029 | 594 | ||
1030 | /** | 595 | if (!capable(CAP_SYS_RESOURCE) && |
1031 | * ext4_try_to_allocate() | 596 | sbi->s_resuid != current->fsuid && |
1032 | * @sb: superblock | 597 | (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) |
1033 | * @handle: handle to this transaction | 598 | root_blocks = ext4_r_blocks_count(sbi->s_es); |
1034 | * @group: given allocation block group | ||
1035 | * @bitmap_bh: bufferhead holds the block bitmap | ||
1036 | * @grp_goal: given target block within the group | ||
1037 | * @count: target number of blocks to allocate | ||
1038 | * @my_rsv: reservation window | ||
1039 | * | ||
1040 | * Attempt to allocate blocks within a give range. Set the range of allocation | ||
1041 | * first, then find the first free bit(s) from the bitmap (within the range), | ||
1042 | * and at last, allocate the blocks by claiming the found free bit as allocated. | ||
1043 | * | ||
1044 | * To set the range of this allocation: | ||
1045 | * if there is a reservation window, only try to allocate block(s) from the | ||
1046 | * file's own reservation window; | ||
1047 | * Otherwise, the allocation range starts from the give goal block, ends at | ||
1048 | * the block group's last block. | ||
1049 | * | ||
1050 | * If we failed to allocate the desired block then we may end up crossing to a | ||
1051 | * new bitmap. In that case we must release write access to the old one via | ||
1052 | * ext4_journal_release_buffer(), else we'll run out of credits. | ||
1053 | */ | ||
1054 | static ext4_grpblk_t | ||
1055 | ext4_try_to_allocate(struct super_block *sb, handle_t *handle, | ||
1056 | ext4_group_t group, struct buffer_head *bitmap_bh, | ||
1057 | ext4_grpblk_t grp_goal, unsigned long *count, | ||
1058 | struct ext4_reserve_window *my_rsv) | ||
1059 | { | ||
1060 | ext4_fsblk_t group_first_block; | ||
1061 | ext4_grpblk_t start, end; | ||
1062 | unsigned long num = 0; | ||
1063 | |||
1064 | /* we do allocation within the reservation window if we have a window */ | ||
1065 | if (my_rsv) { | ||
1066 | group_first_block = ext4_group_first_block_no(sb, group); | ||
1067 | if (my_rsv->_rsv_start >= group_first_block) | ||
1068 | start = my_rsv->_rsv_start - group_first_block; | ||
1069 | else | ||
1070 | /* reservation window cross group boundary */ | ||
1071 | start = 0; | ||
1072 | end = my_rsv->_rsv_end - group_first_block + 1; | ||
1073 | if (end > EXT4_BLOCKS_PER_GROUP(sb)) | ||
1074 | /* reservation window crosses group boundary */ | ||
1075 | end = EXT4_BLOCKS_PER_GROUP(sb); | ||
1076 | if ((start <= grp_goal) && (grp_goal < end)) | ||
1077 | start = grp_goal; | ||
1078 | else | ||
1079 | grp_goal = -1; | ||
1080 | } else { | ||
1081 | if (grp_goal > 0) | ||
1082 | start = grp_goal; | ||
1083 | else | ||
1084 | start = 0; | ||
1085 | end = EXT4_BLOCKS_PER_GROUP(sb); | ||
1086 | } | ||
1087 | |||
1088 | BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb)); | ||
1089 | |||
1090 | repeat: | ||
1091 | if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) { | ||
1092 | grp_goal = find_next_usable_block(start, bitmap_bh, end); | ||
1093 | if (grp_goal < 0) | ||
1094 | goto fail_access; | ||
1095 | if (!my_rsv) { | ||
1096 | int i; | ||
1097 | |||
1098 | for (i = 0; i < 7 && grp_goal > start && | ||
1099 | ext4_test_allocatable(grp_goal - 1, | ||
1100 | bitmap_bh); | ||
1101 | i++, grp_goal--) | ||
1102 | ; | ||
1103 | } | ||
1104 | } | ||
1105 | start = grp_goal; | ||
1106 | |||
1107 | if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group), | ||
1108 | grp_goal, bitmap_bh)) { | ||
1109 | /* | ||
1110 | * The block was allocated by another thread, or it was | ||
1111 | * allocated and then freed by another thread | ||
1112 | */ | ||
1113 | start++; | ||
1114 | grp_goal++; | ||
1115 | if (start >= end) | ||
1116 | goto fail_access; | ||
1117 | goto repeat; | ||
1118 | } | ||
1119 | num++; | ||
1120 | grp_goal++; | ||
1121 | while (num < *count && grp_goal < end | ||
1122 | && ext4_test_allocatable(grp_goal, bitmap_bh) | ||
1123 | && claim_block(sb_bgl_lock(EXT4_SB(sb), group), | ||
1124 | grp_goal, bitmap_bh)) { | ||
1125 | num++; | ||
1126 | grp_goal++; | ||
1127 | } | ||
1128 | *count = num; | ||
1129 | return grp_goal - num; | ||
1130 | fail_access: | ||
1131 | *count = num; | ||
1132 | return -1; | ||
1133 | } | ||
1134 | |||
1135 | /** | ||
1136 | * find_next_reservable_window(): | ||
1137 | * find a reservable space within the given range. | ||
1138 | * It does not allocate the reservation window for now: | ||
1139 | * alloc_new_reservation() will do the work later. | ||
1140 | * | ||
1141 | * @search_head: the head of the searching list; | ||
1142 | * This is not necessarily the list head of the whole filesystem | ||
1143 | * | ||
1144 | * We have both head and start_block to assist the search | ||
1145 | * for the reservable space. The list starts from head, | ||
1146 | * but we will shift to the place where start_block is, | ||
1147 | * then start from there, when looking for a reservable space. | ||
1148 | * | ||
1149 | * @size: the target new reservation window size | ||
1150 | * | ||
1151 | * @group_first_block: the first block we consider to start | ||
1152 | * the real search from | ||
1153 | * | ||
1154 | * @last_block: | ||
1155 | * the maximum block number that our goal reservable space | ||
1156 | * could start from. This is normally the last block in this | ||
1157 | * group. The search will end when we found the start of next | ||
1158 | * possible reservable space is out of this boundary. | ||
1159 | * This could handle the cross boundary reservation window | ||
1160 | * request. | ||
1161 | * | ||
1162 | * basically we search from the given range, rather than the whole | ||
1163 | * reservation double linked list, (start_block, last_block) | ||
1164 | * to find a free region that is of my size and has not | ||
1165 | * been reserved. | ||
1166 | * | ||
1167 | */ | ||
1168 | static int find_next_reservable_window( | ||
1169 | struct ext4_reserve_window_node *search_head, | ||
1170 | struct ext4_reserve_window_node *my_rsv, | ||
1171 | struct super_block * sb, | ||
1172 | ext4_fsblk_t start_block, | ||
1173 | ext4_fsblk_t last_block) | ||
1174 | { | ||
1175 | struct rb_node *next; | ||
1176 | struct ext4_reserve_window_node *rsv, *prev; | ||
1177 | ext4_fsblk_t cur; | ||
1178 | int size = my_rsv->rsv_goal_size; | ||
1179 | |||
1180 | /* TODO: make the start of the reservation window byte-aligned */ | ||
1181 | /* cur = *start_block & ~7;*/ | ||
1182 | cur = start_block; | ||
1183 | rsv = search_head; | ||
1184 | if (!rsv) | ||
1185 | return -1; | ||
1186 | |||
1187 | while (1) { | ||
1188 | if (cur <= rsv->rsv_end) | ||
1189 | cur = rsv->rsv_end + 1; | ||
1190 | |||
1191 | /* TODO? | ||
1192 | * in the case we could not find a reservable space | ||
1193 | * that is what is expected, during the re-search, we could | ||
1194 | * remember what's the largest reservable space we could have | ||
1195 | * and return that one. | ||
1196 | * | ||
1197 | * For now it will fail if we could not find the reservable | ||
1198 | * space with expected-size (or more)... | ||
1199 | */ | ||
1200 | if (cur > last_block) | ||
1201 | return -1; /* fail */ | ||
1202 | |||
1203 | prev = rsv; | ||
1204 | next = rb_next(&rsv->rsv_node); | ||
1205 | rsv = rb_entry(next,struct ext4_reserve_window_node,rsv_node); | ||
1206 | 599 | ||
1207 | /* | 600 | if (free_blocks - (nblocks + root_blocks + dirty_blocks) < |
1208 | * Reached the last reservation, we can just append to the | 601 | EXT4_FREEBLOCKS_WATERMARK) { |
1209 | * previous one. | 602 | free_blocks = percpu_counter_sum(fbc); |
1210 | */ | 603 | dirty_blocks = percpu_counter_sum(dbc); |
1211 | if (!next) | 604 | if (dirty_blocks < 0) { |
1212 | break; | 605 | printk(KERN_CRIT "Dirty block accounting " |
1213 | 606 | "went wrong %lld\n", | |
1214 | if (cur + size <= rsv->rsv_start) { | 607 | dirty_blocks); |
1215 | /* | ||
1216 | * Found a reserveable space big enough. We could | ||
1217 | * have a reservation across the group boundary here | ||
1218 | */ | ||
1219 | break; | ||
1220 | } | 608 | } |
1221 | } | 609 | } |
1222 | /* | 610 | /* Check whether we have space after |
1223 | * we come here either : | 611 | * accounting for current dirty blocks |
1224 | * when we reach the end of the whole list, | ||
1225 | * and there is empty reservable space after last entry in the list. | ||
1226 | * append it to the end of the list. | ||
1227 | * | ||
1228 | * or we found one reservable space in the middle of the list, | ||
1229 | * return the reservation window that we could append to. | ||
1230 | * succeed. | ||
1231 | */ | 612 | */ |
613 | if (free_blocks < ((root_blocks + nblocks) + dirty_blocks)) | ||
614 | /* we don't have free space */ | ||
615 | return -ENOSPC; | ||
1232 | 616 | ||
1233 | if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) | 617 | /* Add the blocks to nblocks */ |
1234 | rsv_window_remove(sb, my_rsv); | 618 | percpu_counter_add(dbc, nblocks); |
1235 | |||
1236 | /* | ||
1237 | * Let's book the whole avaliable window for now. We will check the | ||
1238 | * disk bitmap later and then, if there are free blocks then we adjust | ||
1239 | * the window size if it's larger than requested. | ||
1240 | * Otherwise, we will remove this node from the tree next time | ||
1241 | * call find_next_reservable_window. | ||
1242 | */ | ||
1243 | my_rsv->rsv_start = cur; | ||
1244 | my_rsv->rsv_end = cur + size - 1; | ||
1245 | my_rsv->rsv_alloc_hit = 0; | ||
1246 | |||
1247 | if (prev != my_rsv) | ||
1248 | ext4_rsv_window_add(sb, my_rsv); | ||
1249 | |||
1250 | return 0; | 619 | return 0; |
1251 | } | 620 | } |
1252 | 621 | ||
1253 | /** | 622 | /** |
1254 | * alloc_new_reservation()--allocate a new reservation window | ||
1255 | * | ||
1256 | * To make a new reservation, we search part of the filesystem | ||
1257 | * reservation list (the list that inside the group). We try to | ||
1258 | * allocate a new reservation window near the allocation goal, | ||
1259 | * or the beginning of the group, if there is no goal. | ||
1260 | * | ||
1261 | * We first find a reservable space after the goal, then from | ||
1262 | * there, we check the bitmap for the first free block after | ||
1263 | * it. If there is no free block until the end of group, then the | ||
1264 | * whole group is full, we failed. Otherwise, check if the free | ||
1265 | * block is inside the expected reservable space, if so, we | ||
1266 | * succeed. | ||
1267 | * If the first free block is outside the reservable space, then | ||
1268 | * start from the first free block, we search for next available | ||
1269 | * space, and go on. | ||
1270 | * | ||
1271 | * on succeed, a new reservation will be found and inserted into the list | ||
1272 | * It contains at least one free block, and it does not overlap with other | ||
1273 | * reservation windows. | ||
1274 | * | ||
1275 | * failed: we failed to find a reservation window in this group | ||
1276 | * | ||
1277 | * @rsv: the reservation | ||
1278 | * | ||
1279 | * @grp_goal: The goal (group-relative). It is where the search for a | ||
1280 | * free reservable space should start from. | ||
1281 | * if we have a grp_goal(grp_goal >0 ), then start from there, | ||
1282 | * no grp_goal(grp_goal = -1), we start from the first block | ||
1283 | * of the group. | ||
1284 | * | ||
1285 | * @sb: the super block | ||
1286 | * @group: the group we are trying to allocate in | ||
1287 | * @bitmap_bh: the block group block bitmap | ||
1288 | * | ||
1289 | */ | ||
1290 | static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv, | ||
1291 | ext4_grpblk_t grp_goal, struct super_block *sb, | ||
1292 | ext4_group_t group, struct buffer_head *bitmap_bh) | ||
1293 | { | ||
1294 | struct ext4_reserve_window_node *search_head; | ||
1295 | ext4_fsblk_t group_first_block, group_end_block, start_block; | ||
1296 | ext4_grpblk_t first_free_block; | ||
1297 | struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root; | ||
1298 | unsigned long size; | ||
1299 | int ret; | ||
1300 | spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; | ||
1301 | |||
1302 | group_first_block = ext4_group_first_block_no(sb, group); | ||
1303 | group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); | ||
1304 | |||
1305 | if (grp_goal < 0) | ||
1306 | start_block = group_first_block; | ||
1307 | else | ||
1308 | start_block = grp_goal + group_first_block; | ||
1309 | |||
1310 | size = my_rsv->rsv_goal_size; | ||
1311 | |||
1312 | if (!rsv_is_empty(&my_rsv->rsv_window)) { | ||
1313 | /* | ||
1314 | * if the old reservation is cross group boundary | ||
1315 | * and if the goal is inside the old reservation window, | ||
1316 | * we will come here when we just failed to allocate from | ||
1317 | * the first part of the window. We still have another part | ||
1318 | * that belongs to the next group. In this case, there is no | ||
1319 | * point to discard our window and try to allocate a new one | ||
1320 | * in this group(which will fail). we should | ||
1321 | * keep the reservation window, just simply move on. | ||
1322 | * | ||
1323 | * Maybe we could shift the start block of the reservation | ||
1324 | * window to the first block of next group. | ||
1325 | */ | ||
1326 | |||
1327 | if ((my_rsv->rsv_start <= group_end_block) && | ||
1328 | (my_rsv->rsv_end > group_end_block) && | ||
1329 | (start_block >= my_rsv->rsv_start)) | ||
1330 | return -1; | ||
1331 | |||
1332 | if ((my_rsv->rsv_alloc_hit > | ||
1333 | (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { | ||
1334 | /* | ||
1335 | * if the previously allocation hit ratio is | ||
1336 | * greater than 1/2, then we double the size of | ||
1337 | * the reservation window the next time, | ||
1338 | * otherwise we keep the same size window | ||
1339 | */ | ||
1340 | size = size * 2; | ||
1341 | if (size > EXT4_MAX_RESERVE_BLOCKS) | ||
1342 | size = EXT4_MAX_RESERVE_BLOCKS; | ||
1343 | my_rsv->rsv_goal_size= size; | ||
1344 | } | ||
1345 | } | ||
1346 | |||
1347 | spin_lock(rsv_lock); | ||
1348 | /* | ||
1349 | * shift the search start to the window near the goal block | ||
1350 | */ | ||
1351 | search_head = search_reserve_window(fs_rsv_root, start_block); | ||
1352 | |||
1353 | /* | ||
1354 | * find_next_reservable_window() simply finds a reservable window | ||
1355 | * inside the given range(start_block, group_end_block). | ||
1356 | * | ||
1357 | * To make sure the reservation window has a free bit inside it, we | ||
1358 | * need to check the bitmap after we found a reservable window. | ||
1359 | */ | ||
1360 | retry: | ||
1361 | ret = find_next_reservable_window(search_head, my_rsv, sb, | ||
1362 | start_block, group_end_block); | ||
1363 | |||
1364 | if (ret == -1) { | ||
1365 | if (!rsv_is_empty(&my_rsv->rsv_window)) | ||
1366 | rsv_window_remove(sb, my_rsv); | ||
1367 | spin_unlock(rsv_lock); | ||
1368 | return -1; | ||
1369 | } | ||
1370 | |||
1371 | /* | ||
1372 | * On success, find_next_reservable_window() returns the | ||
1373 | * reservation window where there is a reservable space after it. | ||
1374 | * Before we reserve this reservable space, we need | ||
1375 | * to make sure there is at least a free block inside this region. | ||
1376 | * | ||
1377 | * searching the first free bit on the block bitmap and copy of | ||
1378 | * last committed bitmap alternatively, until we found a allocatable | ||
1379 | * block. Search start from the start block of the reservable space | ||
1380 | * we just found. | ||
1381 | */ | ||
1382 | spin_unlock(rsv_lock); | ||
1383 | first_free_block = bitmap_search_next_usable_block( | ||
1384 | my_rsv->rsv_start - group_first_block, | ||
1385 | bitmap_bh, group_end_block - group_first_block + 1); | ||
1386 | |||
1387 | if (first_free_block < 0) { | ||
1388 | /* | ||
1389 | * no free block left on the bitmap, no point | ||
1390 | * to reserve the space. return failed. | ||
1391 | */ | ||
1392 | spin_lock(rsv_lock); | ||
1393 | if (!rsv_is_empty(&my_rsv->rsv_window)) | ||
1394 | rsv_window_remove(sb, my_rsv); | ||
1395 | spin_unlock(rsv_lock); | ||
1396 | return -1; /* failed */ | ||
1397 | } | ||
1398 | |||
1399 | start_block = first_free_block + group_first_block; | ||
1400 | /* | ||
1401 | * check if the first free block is within the | ||
1402 | * free space we just reserved | ||
1403 | */ | ||
1404 | if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) | ||
1405 | return 0; /* success */ | ||
1406 | /* | ||
1407 | * if the first free bit we found is out of the reservable space | ||
1408 | * continue search for next reservable space, | ||
1409 | * start from where the free block is, | ||
1410 | * we also shift the list head to where we stopped last time | ||
1411 | */ | ||
1412 | search_head = my_rsv; | ||
1413 | spin_lock(rsv_lock); | ||
1414 | goto retry; | ||
1415 | } | ||
1416 | |||
1417 | /** | ||
1418 | * try_to_extend_reservation() | ||
1419 | * @my_rsv: given reservation window | ||
1420 | * @sb: super block | ||
1421 | * @size: the delta to extend | ||
1422 | * | ||
1423 | * Attempt to expand the reservation window large enough to have | ||
1424 | * required number of free blocks | ||
1425 | * | ||
1426 | * Since ext4_try_to_allocate() will always allocate blocks within | ||
1427 | * the reservation window range, if the window size is too small, | ||
1428 | * multiple blocks allocation has to stop at the end of the reservation | ||
1429 | * window. To make this more efficient, given the total number of | ||
1430 | * blocks needed and the current size of the window, we try to | ||
1431 | * expand the reservation window size if necessary on a best-effort | ||
1432 | * basis before ext4_new_blocks() tries to allocate blocks, | ||
1433 | */ | ||
1434 | static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv, | ||
1435 | struct super_block *sb, int size) | ||
1436 | { | ||
1437 | struct ext4_reserve_window_node *next_rsv; | ||
1438 | struct rb_node *next; | ||
1439 | spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; | ||
1440 | |||
1441 | if (!spin_trylock(rsv_lock)) | ||
1442 | return; | ||
1443 | |||
1444 | next = rb_next(&my_rsv->rsv_node); | ||
1445 | |||
1446 | if (!next) | ||
1447 | my_rsv->rsv_end += size; | ||
1448 | else { | ||
1449 | next_rsv = rb_entry(next, struct ext4_reserve_window_node, rsv_node); | ||
1450 | |||
1451 | if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) | ||
1452 | my_rsv->rsv_end += size; | ||
1453 | else | ||
1454 | my_rsv->rsv_end = next_rsv->rsv_start - 1; | ||
1455 | } | ||
1456 | spin_unlock(rsv_lock); | ||
1457 | } | ||
1458 | |||
1459 | /** | ||
1460 | * ext4_try_to_allocate_with_rsv() | ||
1461 | * @sb: superblock | ||
1462 | * @handle: handle to this transaction | ||
1463 | * @group: given allocation block group | ||
1464 | * @bitmap_bh: bufferhead holds the block bitmap | ||
1465 | * @grp_goal: given target block within the group | ||
1466 | * @count: target number of blocks to allocate | ||
1467 | * @my_rsv: reservation window | ||
1468 | * @errp: pointer to store the error code | ||
1469 | * | ||
1470 | * This is the main function used to allocate a new block and its reservation | ||
1471 | * window. | ||
1472 | * | ||
1473 | * Each time when a new block allocation is need, first try to allocate from | ||
1474 | * its own reservation. If it does not have a reservation window, instead of | ||
1475 | * looking for a free bit on bitmap first, then look up the reservation list to | ||
1476 | * see if it is inside somebody else's reservation window, we try to allocate a | ||
1477 | * reservation window for it starting from the goal first. Then do the block | ||
1478 | * allocation within the reservation window. | ||
1479 | * | ||
1480 | * This will avoid keeping on searching the reservation list again and | ||
1481 | * again when somebody is looking for a free block (without | ||
1482 | * reservation), and there are lots of free blocks, but they are all | ||
1483 | * being reserved. | ||
1484 | * | ||
1485 | * We use a red-black tree for the per-filesystem reservation list. | ||
1486 | * | ||
1487 | */ | ||
1488 | static ext4_grpblk_t | ||
1489 | ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, | ||
1490 | ext4_group_t group, struct buffer_head *bitmap_bh, | ||
1491 | ext4_grpblk_t grp_goal, | ||
1492 | struct ext4_reserve_window_node * my_rsv, | ||
1493 | unsigned long *count, int *errp) | ||
1494 | { | ||
1495 | ext4_fsblk_t group_first_block, group_last_block; | ||
1496 | ext4_grpblk_t ret = 0; | ||
1497 | int fatal; | ||
1498 | unsigned long num = *count; | ||
1499 | |||
1500 | *errp = 0; | ||
1501 | |||
1502 | /* | ||
1503 | * Make sure we use undo access for the bitmap, because it is critical | ||
1504 | * that we do the frozen_data COW on bitmap buffers in all cases even | ||
1505 | * if the buffer is in BJ_Forget state in the committing transaction. | ||
1506 | */ | ||
1507 | BUFFER_TRACE(bitmap_bh, "get undo access for new block"); | ||
1508 | fatal = ext4_journal_get_undo_access(handle, bitmap_bh); | ||
1509 | if (fatal) { | ||
1510 | *errp = fatal; | ||
1511 | return -1; | ||
1512 | } | ||
1513 | |||
1514 | /* | ||
1515 | * we don't deal with reservation when | ||
1516 | * filesystem is mounted without reservation | ||
1517 | * or the file is not a regular file | ||
1518 | * or last attempt to allocate a block with reservation turned on failed | ||
1519 | */ | ||
1520 | if (my_rsv == NULL ) { | ||
1521 | ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, | ||
1522 | grp_goal, count, NULL); | ||
1523 | goto out; | ||
1524 | } | ||
1525 | /* | ||
1526 | * grp_goal is a group relative block number (if there is a goal) | ||
1527 | * 0 <= grp_goal < EXT4_BLOCKS_PER_GROUP(sb) | ||
1528 | * first block is a filesystem wide block number | ||
1529 | * first block is the block number of the first block in this group | ||
1530 | */ | ||
1531 | group_first_block = ext4_group_first_block_no(sb, group); | ||
1532 | group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); | ||
1533 | |||
1534 | /* | ||
1535 | * Basically we will allocate a new block from inode's reservation | ||
1536 | * window. | ||
1537 | * | ||
1538 | * We need to allocate a new reservation window, if: | ||
1539 | * a) inode does not have a reservation window; or | ||
1540 | * b) last attempt to allocate a block from existing reservation | ||
1541 | * failed; or | ||
1542 | * c) we come here with a goal and with a reservation window | ||
1543 | * | ||
1544 | * We do not need to allocate a new reservation window if we come here | ||
1545 | * at the beginning with a goal and the goal is inside the window, or | ||
1546 | * we don't have a goal but already have a reservation window. | ||
1547 | * then we could go to allocate from the reservation window directly. | ||
1548 | */ | ||
1549 | while (1) { | ||
1550 | if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || | ||
1551 | !goal_in_my_reservation(&my_rsv->rsv_window, | ||
1552 | grp_goal, group, sb)) { | ||
1553 | if (my_rsv->rsv_goal_size < *count) | ||
1554 | my_rsv->rsv_goal_size = *count; | ||
1555 | ret = alloc_new_reservation(my_rsv, grp_goal, sb, | ||
1556 | group, bitmap_bh); | ||
1557 | if (ret < 0) | ||
1558 | break; /* failed */ | ||
1559 | |||
1560 | if (!goal_in_my_reservation(&my_rsv->rsv_window, | ||
1561 | grp_goal, group, sb)) | ||
1562 | grp_goal = -1; | ||
1563 | } else if (grp_goal >= 0) { | ||
1564 | int curr = my_rsv->rsv_end - | ||
1565 | (grp_goal + group_first_block) + 1; | ||
1566 | |||
1567 | if (curr < *count) | ||
1568 | try_to_extend_reservation(my_rsv, sb, | ||
1569 | *count - curr); | ||
1570 | } | ||
1571 | |||
1572 | if ((my_rsv->rsv_start > group_last_block) || | ||
1573 | (my_rsv->rsv_end < group_first_block)) { | ||
1574 | rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1); | ||
1575 | BUG(); | ||
1576 | } | ||
1577 | ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, | ||
1578 | grp_goal, &num, &my_rsv->rsv_window); | ||
1579 | if (ret >= 0) { | ||
1580 | my_rsv->rsv_alloc_hit += num; | ||
1581 | *count = num; | ||
1582 | break; /* succeed */ | ||
1583 | } | ||
1584 | num = *count; | ||
1585 | } | ||
1586 | out: | ||
1587 | if (ret >= 0) { | ||
1588 | BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for " | ||
1589 | "bitmap block"); | ||
1590 | fatal = ext4_journal_dirty_metadata(handle, bitmap_bh); | ||
1591 | if (fatal) { | ||
1592 | *errp = fatal; | ||
1593 | return -1; | ||
1594 | } | ||
1595 | return ret; | ||
1596 | } | ||
1597 | |||
1598 | BUFFER_TRACE(bitmap_bh, "journal_release_buffer"); | ||
1599 | ext4_journal_release_buffer(handle, bitmap_bh); | ||
1600 | return ret; | ||
1601 | } | ||
1602 | |||
1603 | /** | ||
1604 | * ext4_has_free_blocks() | 623 | * ext4_has_free_blocks() |
1605 | * @sbi: in-core super block structure. | 624 | * @sbi: in-core super block structure. |
1606 | * @nblocks: number of neeed blocks | 625 | * @nblocks: number of neeed blocks |
@@ -1610,26 +629,34 @@ out: | |||
1610 | * On success, return nblocks | 629 | * On success, return nblocks |
1611 | */ | 630 | */ |
1612 | ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, | 631 | ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, |
1613 | ext4_fsblk_t nblocks) | 632 | s64 nblocks) |
1614 | { | 633 | { |
1615 | ext4_fsblk_t free_blocks; | 634 | s64 free_blocks, dirty_blocks; |
1616 | ext4_fsblk_t root_blocks = 0; | 635 | s64 root_blocks = 0; |
636 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; | ||
637 | struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; | ||
1617 | 638 | ||
1618 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | 639 | free_blocks = percpu_counter_read_positive(fbc); |
640 | dirty_blocks = percpu_counter_read_positive(dbc); | ||
1619 | 641 | ||
1620 | if (!capable(CAP_SYS_RESOURCE) && | 642 | if (!capable(CAP_SYS_RESOURCE) && |
1621 | sbi->s_resuid != current->fsuid && | 643 | sbi->s_resuid != current->fsuid && |
1622 | (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) | 644 | (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) |
1623 | root_blocks = ext4_r_blocks_count(sbi->s_es); | 645 | root_blocks = ext4_r_blocks_count(sbi->s_es); |
1624 | #ifdef CONFIG_SMP | 646 | |
1625 | if (free_blocks - root_blocks < FBC_BATCH) | 647 | if (free_blocks - (nblocks + root_blocks + dirty_blocks) < |
1626 | free_blocks = | 648 | EXT4_FREEBLOCKS_WATERMARK) { |
1627 | percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); | 649 | free_blocks = percpu_counter_sum(fbc); |
1628 | #endif | 650 | dirty_blocks = percpu_counter_sum(dbc); |
1629 | if (free_blocks - root_blocks < nblocks) | 651 | } |
1630 | return free_blocks - root_blocks; | 652 | if (free_blocks <= (root_blocks + dirty_blocks)) |
653 | /* we don't have free space */ | ||
654 | return 0; | ||
655 | |||
656 | if (free_blocks - (root_blocks + dirty_blocks) < nblocks) | ||
657 | return free_blocks - (root_blocks + dirty_blocks); | ||
1631 | return nblocks; | 658 | return nblocks; |
1632 | } | 659 | } |
1633 | 660 | ||
1634 | 661 | ||
1635 | /** | 662 | /** |
@@ -1654,303 +681,6 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) | |||
1654 | return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); | 681 | return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); |
1655 | } | 682 | } |
1656 | 683 | ||
1657 | /** | ||
1658 | * ext4_old_new_blocks() -- core block bitmap based block allocation function | ||
1659 | * | ||
1660 | * @handle: handle to this transaction | ||
1661 | * @inode: file inode | ||
1662 | * @goal: given target block(filesystem wide) | ||
1663 | * @count: target number of blocks to allocate | ||
1664 | * @errp: error code | ||
1665 | * | ||
1666 | * ext4_old_new_blocks uses a goal block to assist allocation and look up | ||
1667 | * the block bitmap directly to do block allocation. It tries to | ||
1668 | * allocate block(s) from the block group contains the goal block first. If | ||
1669 | * that fails, it will try to allocate block(s) from other block groups | ||
1670 | * without any specific goal block. | ||
1671 | * | ||
1672 | * This function is called when -o nomballoc mount option is enabled | ||
1673 | * | ||
1674 | */ | ||
1675 | ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, | ||
1676 | ext4_fsblk_t goal, unsigned long *count, int *errp) | ||
1677 | { | ||
1678 | struct buffer_head *bitmap_bh = NULL; | ||
1679 | struct buffer_head *gdp_bh; | ||
1680 | ext4_group_t group_no; | ||
1681 | ext4_group_t goal_group; | ||
1682 | ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */ | ||
1683 | ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ | ||
1684 | ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */ | ||
1685 | ext4_group_t bgi; /* blockgroup iteration index */ | ||
1686 | int fatal = 0, err; | ||
1687 | int performed_allocation = 0; | ||
1688 | ext4_grpblk_t free_blocks; /* number of free blocks in a group */ | ||
1689 | struct super_block *sb; | ||
1690 | struct ext4_group_desc *gdp; | ||
1691 | struct ext4_super_block *es; | ||
1692 | struct ext4_sb_info *sbi; | ||
1693 | struct ext4_reserve_window_node *my_rsv = NULL; | ||
1694 | struct ext4_block_alloc_info *block_i; | ||
1695 | unsigned short windowsz = 0; | ||
1696 | ext4_group_t ngroups; | ||
1697 | unsigned long num = *count; | ||
1698 | |||
1699 | sb = inode->i_sb; | ||
1700 | if (!sb) { | ||
1701 | *errp = -ENODEV; | ||
1702 | printk("ext4_new_block: nonexistent device"); | ||
1703 | return 0; | ||
1704 | } | ||
1705 | |||
1706 | sbi = EXT4_SB(sb); | ||
1707 | if (!EXT4_I(inode)->i_delalloc_reserved_flag) { | ||
1708 | /* | ||
1709 | * With delalloc we already reserved the blocks | ||
1710 | */ | ||
1711 | *count = ext4_has_free_blocks(sbi, *count); | ||
1712 | } | ||
1713 | if (*count == 0) { | ||
1714 | *errp = -ENOSPC; | ||
1715 | return 0; /*return with ENOSPC error */ | ||
1716 | } | ||
1717 | num = *count; | ||
1718 | |||
1719 | /* | ||
1720 | * Check quota for allocation of this block. | ||
1721 | */ | ||
1722 | if (DQUOT_ALLOC_BLOCK(inode, num)) { | ||
1723 | *errp = -EDQUOT; | ||
1724 | return 0; | ||
1725 | } | ||
1726 | |||
1727 | sbi = EXT4_SB(sb); | ||
1728 | es = EXT4_SB(sb)->s_es; | ||
1729 | ext4_debug("goal=%llu.\n", goal); | ||
1730 | /* | ||
1731 | * Allocate a block from reservation only when | ||
1732 | * filesystem is mounted with reservation(default,-o reservation), and | ||
1733 | * it's a regular file, and | ||
1734 | * the desired window size is greater than 0 (One could use ioctl | ||
1735 | * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off | ||
1736 | * reservation on that particular file) | ||
1737 | */ | ||
1738 | block_i = EXT4_I(inode)->i_block_alloc_info; | ||
1739 | if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) | ||
1740 | my_rsv = &block_i->rsv_window_node; | ||
1741 | |||
1742 | /* | ||
1743 | * First, test whether the goal block is free. | ||
1744 | */ | ||
1745 | if (goal < le32_to_cpu(es->s_first_data_block) || | ||
1746 | goal >= ext4_blocks_count(es)) | ||
1747 | goal = le32_to_cpu(es->s_first_data_block); | ||
1748 | ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk); | ||
1749 | goal_group = group_no; | ||
1750 | retry_alloc: | ||
1751 | gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); | ||
1752 | if (!gdp) | ||
1753 | goto io_error; | ||
1754 | |||
1755 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); | ||
1756 | /* | ||
1757 | * if there is not enough free blocks to make a new resevation | ||
1758 | * turn off reservation for this allocation | ||
1759 | */ | ||
1760 | if (my_rsv && (free_blocks < windowsz) | ||
1761 | && (rsv_is_empty(&my_rsv->rsv_window))) | ||
1762 | my_rsv = NULL; | ||
1763 | |||
1764 | if (free_blocks > 0) { | ||
1765 | bitmap_bh = ext4_read_block_bitmap(sb, group_no); | ||
1766 | if (!bitmap_bh) | ||
1767 | goto io_error; | ||
1768 | grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, | ||
1769 | group_no, bitmap_bh, grp_target_blk, | ||
1770 | my_rsv, &num, &fatal); | ||
1771 | if (fatal) | ||
1772 | goto out; | ||
1773 | if (grp_alloc_blk >= 0) | ||
1774 | goto allocated; | ||
1775 | } | ||
1776 | |||
1777 | ngroups = EXT4_SB(sb)->s_groups_count; | ||
1778 | smp_rmb(); | ||
1779 | |||
1780 | /* | ||
1781 | * Now search the rest of the groups. We assume that | ||
1782 | * group_no and gdp correctly point to the last group visited. | ||
1783 | */ | ||
1784 | for (bgi = 0; bgi < ngroups; bgi++) { | ||
1785 | group_no++; | ||
1786 | if (group_no >= ngroups) | ||
1787 | group_no = 0; | ||
1788 | gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); | ||
1789 | if (!gdp) | ||
1790 | goto io_error; | ||
1791 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); | ||
1792 | /* | ||
1793 | * skip this group if the number of | ||
1794 | * free blocks is less than half of the reservation | ||
1795 | * window size. | ||
1796 | */ | ||
1797 | if (free_blocks <= (windowsz/2)) | ||
1798 | continue; | ||
1799 | |||
1800 | brelse(bitmap_bh); | ||
1801 | bitmap_bh = ext4_read_block_bitmap(sb, group_no); | ||
1802 | if (!bitmap_bh) | ||
1803 | goto io_error; | ||
1804 | /* | ||
1805 | * try to allocate block(s) from this group, without a goal(-1). | ||
1806 | */ | ||
1807 | grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, | ||
1808 | group_no, bitmap_bh, -1, my_rsv, | ||
1809 | &num, &fatal); | ||
1810 | if (fatal) | ||
1811 | goto out; | ||
1812 | if (grp_alloc_blk >= 0) | ||
1813 | goto allocated; | ||
1814 | } | ||
1815 | /* | ||
1816 | * We may end up a bogus ealier ENOSPC error due to | ||
1817 | * filesystem is "full" of reservations, but | ||
1818 | * there maybe indeed free blocks avaliable on disk | ||
1819 | * In this case, we just forget about the reservations | ||
1820 | * just do block allocation as without reservations. | ||
1821 | */ | ||
1822 | if (my_rsv) { | ||
1823 | my_rsv = NULL; | ||
1824 | windowsz = 0; | ||
1825 | group_no = goal_group; | ||
1826 | goto retry_alloc; | ||
1827 | } | ||
1828 | /* No space left on the device */ | ||
1829 | *errp = -ENOSPC; | ||
1830 | goto out; | ||
1831 | |||
1832 | allocated: | ||
1833 | |||
1834 | ext4_debug("using block group %lu(%d)\n", | ||
1835 | group_no, gdp->bg_free_blocks_count); | ||
1836 | |||
1837 | BUFFER_TRACE(gdp_bh, "get_write_access"); | ||
1838 | fatal = ext4_journal_get_write_access(handle, gdp_bh); | ||
1839 | if (fatal) | ||
1840 | goto out; | ||
1841 | |||
1842 | ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no); | ||
1843 | |||
1844 | if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || | ||
1845 | in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) || | ||
1846 | in_range(ret_block, ext4_inode_table(sb, gdp), | ||
1847 | EXT4_SB(sb)->s_itb_per_group) || | ||
1848 | in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), | ||
1849 | EXT4_SB(sb)->s_itb_per_group)) { | ||
1850 | ext4_error(sb, "ext4_new_block", | ||
1851 | "Allocating block in system zone - " | ||
1852 | "blocks from %llu, length %lu", | ||
1853 | ret_block, num); | ||
1854 | /* | ||
1855 | * claim_block marked the blocks we allocated | ||
1856 | * as in use. So we may want to selectively | ||
1857 | * mark some of the blocks as free | ||
1858 | */ | ||
1859 | goto retry_alloc; | ||
1860 | } | ||
1861 | |||
1862 | performed_allocation = 1; | ||
1863 | |||
1864 | #ifdef CONFIG_JBD2_DEBUG | ||
1865 | { | ||
1866 | struct buffer_head *debug_bh; | ||
1867 | |||
1868 | /* Record bitmap buffer state in the newly allocated block */ | ||
1869 | debug_bh = sb_find_get_block(sb, ret_block); | ||
1870 | if (debug_bh) { | ||
1871 | BUFFER_TRACE(debug_bh, "state when allocated"); | ||
1872 | BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state"); | ||
1873 | brelse(debug_bh); | ||
1874 | } | ||
1875 | } | ||
1876 | jbd_lock_bh_state(bitmap_bh); | ||
1877 | spin_lock(sb_bgl_lock(sbi, group_no)); | ||
1878 | if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) { | ||
1879 | int i; | ||
1880 | |||
1881 | for (i = 0; i < num; i++) { | ||
1882 | if (ext4_test_bit(grp_alloc_blk+i, | ||
1883 | bh2jh(bitmap_bh)->b_committed_data)) { | ||
1884 | printk("%s: block was unexpectedly set in " | ||
1885 | "b_committed_data\n", __func__); | ||
1886 | } | ||
1887 | } | ||
1888 | } | ||
1889 | ext4_debug("found bit %d\n", grp_alloc_blk); | ||
1890 | spin_unlock(sb_bgl_lock(sbi, group_no)); | ||
1891 | jbd_unlock_bh_state(bitmap_bh); | ||
1892 | #endif | ||
1893 | |||
1894 | if (ret_block + num - 1 >= ext4_blocks_count(es)) { | ||
1895 | ext4_error(sb, "ext4_new_block", | ||
1896 | "block(%llu) >= blocks count(%llu) - " | ||
1897 | "block_group = %lu, es == %p ", ret_block, | ||
1898 | ext4_blocks_count(es), group_no, es); | ||
1899 | goto out; | ||
1900 | } | ||
1901 | |||
1902 | /* | ||
1903 | * It is up to the caller to add the new buffer to a journal | ||
1904 | * list of some description. We don't know in advance whether | ||
1905 | * the caller wants to use it as metadata or data. | ||
1906 | */ | ||
1907 | spin_lock(sb_bgl_lock(sbi, group_no)); | ||
1908 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) | ||
1909 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | ||
1910 | le16_add_cpu(&gdp->bg_free_blocks_count, -num); | ||
1911 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); | ||
1912 | spin_unlock(sb_bgl_lock(sbi, group_no)); | ||
1913 | if (!EXT4_I(inode)->i_delalloc_reserved_flag) | ||
1914 | percpu_counter_sub(&sbi->s_freeblocks_counter, num); | ||
1915 | |||
1916 | if (sbi->s_log_groups_per_flex) { | ||
1917 | ext4_group_t flex_group = ext4_flex_group(sbi, group_no); | ||
1918 | spin_lock(sb_bgl_lock(sbi, flex_group)); | ||
1919 | sbi->s_flex_groups[flex_group].free_blocks -= num; | ||
1920 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | ||
1921 | } | ||
1922 | |||
1923 | BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); | ||
1924 | err = ext4_journal_dirty_metadata(handle, gdp_bh); | ||
1925 | if (!fatal) | ||
1926 | fatal = err; | ||
1927 | |||
1928 | sb->s_dirt = 1; | ||
1929 | if (fatal) | ||
1930 | goto out; | ||
1931 | |||
1932 | *errp = 0; | ||
1933 | brelse(bitmap_bh); | ||
1934 | DQUOT_FREE_BLOCK(inode, *count-num); | ||
1935 | *count = num; | ||
1936 | return ret_block; | ||
1937 | |||
1938 | io_error: | ||
1939 | *errp = -EIO; | ||
1940 | out: | ||
1941 | if (fatal) { | ||
1942 | *errp = fatal; | ||
1943 | ext4_std_error(sb, fatal); | ||
1944 | } | ||
1945 | /* | ||
1946 | * Undo the block allocation | ||
1947 | */ | ||
1948 | if (!performed_allocation) | ||
1949 | DQUOT_FREE_BLOCK(inode, *count); | ||
1950 | brelse(bitmap_bh); | ||
1951 | return 0; | ||
1952 | } | ||
1953 | |||
1954 | #define EXT4_META_BLOCK 0x1 | 684 | #define EXT4_META_BLOCK 0x1 |
1955 | 685 | ||
1956 | static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, | 686 | static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, |
@@ -1960,10 +690,6 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, | |||
1960 | struct ext4_allocation_request ar; | 690 | struct ext4_allocation_request ar; |
1961 | ext4_fsblk_t ret; | 691 | ext4_fsblk_t ret; |
1962 | 692 | ||
1963 | if (!test_opt(inode->i_sb, MBALLOC)) { | ||
1964 | return ext4_old_new_blocks(handle, inode, goal, count, errp); | ||
1965 | } | ||
1966 | |||
1967 | memset(&ar, 0, sizeof(ar)); | 693 | memset(&ar, 0, sizeof(ar)); |
1968 | /* Fill with neighbour allocated blocks */ | 694 | /* Fill with neighbour allocated blocks */ |
1969 | 695 | ||
@@ -2005,7 +731,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
2005 | /* | 731 | /* |
2006 | * Account for the allocated meta blocks | 732 | * Account for the allocated meta blocks |
2007 | */ | 733 | */ |
2008 | if (!(*errp)) { | 734 | if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) { |
2009 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 735 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
2010 | EXT4_I(inode)->i_allocated_meta_blocks += *count; | 736 | EXT4_I(inode)->i_allocated_meta_blocks += *count; |
2011 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 737 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
@@ -2090,10 +816,9 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
2090 | bitmap_count += x; | 816 | bitmap_count += x; |
2091 | } | 817 | } |
2092 | brelse(bitmap_bh); | 818 | brelse(bitmap_bh); |
2093 | printk("ext4_count_free_blocks: stored = %llu" | 819 | printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu" |
2094 | ", computed = %llu, %llu\n", | 820 | ", computed = %llu, %llu\n", ext4_free_blocks_count(es), |
2095 | ext4_free_blocks_count(es), | 821 | desc_count, bitmap_count); |
2096 | desc_count, bitmap_count); | ||
2097 | return bitmap_count; | 822 | return bitmap_count; |
2098 | #else | 823 | #else |
2099 | desc_count = 0; | 824 | desc_count = 0; |
@@ -2180,8 +905,9 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) | |||
2180 | 905 | ||
2181 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || | 906 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || |
2182 | metagroup < first_meta_bg) | 907 | metagroup < first_meta_bg) |
2183 | return ext4_bg_num_gdb_nometa(sb,group); | 908 | return ext4_bg_num_gdb_nometa(sb, group); |
2184 | 909 | ||
2185 | return ext4_bg_num_gdb_meta(sb,group); | 910 | return ext4_bg_num_gdb_meta(sb,group); |
2186 | 911 | ||
2187 | } | 912 | } |
913 | |||
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index d37ea6750454..0a7a6663c190 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c | |||
@@ -15,17 +15,17 @@ | |||
15 | 15 | ||
16 | static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; | 16 | static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; |
17 | 17 | ||
18 | unsigned long ext4_count_free (struct buffer_head * map, unsigned int numchars) | 18 | unsigned long ext4_count_free(struct buffer_head *map, unsigned int numchars) |
19 | { | 19 | { |
20 | unsigned int i; | 20 | unsigned int i; |
21 | unsigned long sum = 0; | 21 | unsigned long sum = 0; |
22 | 22 | ||
23 | if (!map) | 23 | if (!map) |
24 | return (0); | 24 | return 0; |
25 | for (i = 0; i < numchars; i++) | 25 | for (i = 0; i < numchars; i++) |
26 | sum += nibblemap[map->b_data[i] & 0xf] + | 26 | sum += nibblemap[map->b_data[i] & 0xf] + |
27 | nibblemap[(map->b_data[i] >> 4) & 0xf]; | 27 | nibblemap[(map->b_data[i] >> 4) & 0xf]; |
28 | return (sum); | 28 | return sum; |
29 | } | 29 | } |
30 | 30 | ||
31 | #endif /* EXT4FS_DEBUG */ | 31 | #endif /* EXT4FS_DEBUG */ |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index d3d23d73c08b..3ca6a2b7632d 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -33,10 +33,10 @@ static unsigned char ext4_filetype_table[] = { | |||
33 | }; | 33 | }; |
34 | 34 | ||
35 | static int ext4_readdir(struct file *, void *, filldir_t); | 35 | static int ext4_readdir(struct file *, void *, filldir_t); |
36 | static int ext4_dx_readdir(struct file * filp, | 36 | static int ext4_dx_readdir(struct file *filp, |
37 | void * dirent, filldir_t filldir); | 37 | void *dirent, filldir_t filldir); |
38 | static int ext4_release_dir (struct inode * inode, | 38 | static int ext4_release_dir(struct inode *inode, |
39 | struct file * filp); | 39 | struct file *filp); |
40 | 40 | ||
41 | const struct file_operations ext4_dir_operations = { | 41 | const struct file_operations ext4_dir_operations = { |
42 | .llseek = generic_file_llseek, | 42 | .llseek = generic_file_llseek, |
@@ -61,12 +61,12 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) | |||
61 | } | 61 | } |
62 | 62 | ||
63 | 63 | ||
64 | int ext4_check_dir_entry (const char * function, struct inode * dir, | 64 | int ext4_check_dir_entry(const char *function, struct inode *dir, |
65 | struct ext4_dir_entry_2 * de, | 65 | struct ext4_dir_entry_2 *de, |
66 | struct buffer_head * bh, | 66 | struct buffer_head *bh, |
67 | unsigned long offset) | 67 | unsigned long offset) |
68 | { | 68 | { |
69 | const char * error_msg = NULL; | 69 | const char *error_msg = NULL; |
70 | const int rlen = ext4_rec_len_from_disk(de->rec_len); | 70 | const int rlen = ext4_rec_len_from_disk(de->rec_len); |
71 | 71 | ||
72 | if (rlen < EXT4_DIR_REC_LEN(1)) | 72 | if (rlen < EXT4_DIR_REC_LEN(1)) |
@@ -82,7 +82,7 @@ int ext4_check_dir_entry (const char * function, struct inode * dir, | |||
82 | error_msg = "inode out of bounds"; | 82 | error_msg = "inode out of bounds"; |
83 | 83 | ||
84 | if (error_msg != NULL) | 84 | if (error_msg != NULL) |
85 | ext4_error (dir->i_sb, function, | 85 | ext4_error(dir->i_sb, function, |
86 | "bad entry in directory #%lu: %s - " | 86 | "bad entry in directory #%lu: %s - " |
87 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", | 87 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", |
88 | dir->i_ino, error_msg, offset, | 88 | dir->i_ino, error_msg, offset, |
@@ -91,8 +91,8 @@ int ext4_check_dir_entry (const char * function, struct inode * dir, | |||
91 | return error_msg == NULL ? 1 : 0; | 91 | return error_msg == NULL ? 1 : 0; |
92 | } | 92 | } |
93 | 93 | ||
94 | static int ext4_readdir(struct file * filp, | 94 | static int ext4_readdir(struct file *filp, |
95 | void * dirent, filldir_t filldir) | 95 | void *dirent, filldir_t filldir) |
96 | { | 96 | { |
97 | int error = 0; | 97 | int error = 0; |
98 | unsigned long offset; | 98 | unsigned long offset; |
@@ -102,6 +102,7 @@ static int ext4_readdir(struct file * filp, | |||
102 | int err; | 102 | int err; |
103 | struct inode *inode = filp->f_path.dentry->d_inode; | 103 | struct inode *inode = filp->f_path.dentry->d_inode; |
104 | int ret = 0; | 104 | int ret = 0; |
105 | int dir_has_error = 0; | ||
105 | 106 | ||
106 | sb = inode->i_sb; | 107 | sb = inode->i_sb; |
107 | 108 | ||
@@ -148,9 +149,13 @@ static int ext4_readdir(struct file * filp, | |||
148 | * of recovering data when there's a bad sector | 149 | * of recovering data when there's a bad sector |
149 | */ | 150 | */ |
150 | if (!bh) { | 151 | if (!bh) { |
151 | ext4_error (sb, "ext4_readdir", | 152 | if (!dir_has_error) { |
152 | "directory #%lu contains a hole at offset %lu", | 153 | ext4_error(sb, __func__, "directory #%lu " |
153 | inode->i_ino, (unsigned long)filp->f_pos); | 154 | "contains a hole at offset %Lu", |
155 | inode->i_ino, | ||
156 | (unsigned long long) filp->f_pos); | ||
157 | dir_has_error = 1; | ||
158 | } | ||
154 | /* corrupt size? Maybe no more blocks to read */ | 159 | /* corrupt size? Maybe no more blocks to read */ |
155 | if (filp->f_pos > inode->i_blocks << 9) | 160 | if (filp->f_pos > inode->i_blocks << 9) |
156 | break; | 161 | break; |
@@ -187,14 +192,14 @@ revalidate: | |||
187 | while (!error && filp->f_pos < inode->i_size | 192 | while (!error && filp->f_pos < inode->i_size |
188 | && offset < sb->s_blocksize) { | 193 | && offset < sb->s_blocksize) { |
189 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); | 194 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); |
190 | if (!ext4_check_dir_entry ("ext4_readdir", inode, de, | 195 | if (!ext4_check_dir_entry("ext4_readdir", inode, de, |
191 | bh, offset)) { | 196 | bh, offset)) { |
192 | /* | 197 | /* |
193 | * On error, skip the f_pos to the next block | 198 | * On error, skip the f_pos to the next block |
194 | */ | 199 | */ |
195 | filp->f_pos = (filp->f_pos | | 200 | filp->f_pos = (filp->f_pos | |
196 | (sb->s_blocksize - 1)) + 1; | 201 | (sb->s_blocksize - 1)) + 1; |
197 | brelse (bh); | 202 | brelse(bh); |
198 | ret = stored; | 203 | ret = stored; |
199 | goto out; | 204 | goto out; |
200 | } | 205 | } |
@@ -218,12 +223,12 @@ revalidate: | |||
218 | break; | 223 | break; |
219 | if (version != filp->f_version) | 224 | if (version != filp->f_version) |
220 | goto revalidate; | 225 | goto revalidate; |
221 | stored ++; | 226 | stored++; |
222 | } | 227 | } |
223 | filp->f_pos += ext4_rec_len_from_disk(de->rec_len); | 228 | filp->f_pos += ext4_rec_len_from_disk(de->rec_len); |
224 | } | 229 | } |
225 | offset = 0; | 230 | offset = 0; |
226 | brelse (bh); | 231 | brelse(bh); |
227 | } | 232 | } |
228 | out: | 233 | out: |
229 | return ret; | 234 | return ret; |
@@ -290,9 +295,9 @@ static void free_rb_tree_fname(struct rb_root *root) | |||
290 | parent = rb_parent(n); | 295 | parent = rb_parent(n); |
291 | fname = rb_entry(n, struct fname, rb_hash); | 296 | fname = rb_entry(n, struct fname, rb_hash); |
292 | while (fname) { | 297 | while (fname) { |
293 | struct fname * old = fname; | 298 | struct fname *old = fname; |
294 | fname = fname->next; | 299 | fname = fname->next; |
295 | kfree (old); | 300 | kfree(old); |
296 | } | 301 | } |
297 | if (!parent) | 302 | if (!parent) |
298 | root->rb_node = NULL; | 303 | root->rb_node = NULL; |
@@ -331,7 +336,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
331 | struct ext4_dir_entry_2 *dirent) | 336 | struct ext4_dir_entry_2 *dirent) |
332 | { | 337 | { |
333 | struct rb_node **p, *parent = NULL; | 338 | struct rb_node **p, *parent = NULL; |
334 | struct fname * fname, *new_fn; | 339 | struct fname *fname, *new_fn; |
335 | struct dir_private_info *info; | 340 | struct dir_private_info *info; |
336 | int len; | 341 | int len; |
337 | 342 | ||
@@ -388,19 +393,20 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
388 | * for all entres on the fname linked list. (Normally there is only | 393 | * for all entres on the fname linked list. (Normally there is only |
389 | * one entry on the linked list, unless there are 62 bit hash collisions.) | 394 | * one entry on the linked list, unless there are 62 bit hash collisions.) |
390 | */ | 395 | */ |
391 | static int call_filldir(struct file * filp, void * dirent, | 396 | static int call_filldir(struct file *filp, void *dirent, |
392 | filldir_t filldir, struct fname *fname) | 397 | filldir_t filldir, struct fname *fname) |
393 | { | 398 | { |
394 | struct dir_private_info *info = filp->private_data; | 399 | struct dir_private_info *info = filp->private_data; |
395 | loff_t curr_pos; | 400 | loff_t curr_pos; |
396 | struct inode *inode = filp->f_path.dentry->d_inode; | 401 | struct inode *inode = filp->f_path.dentry->d_inode; |
397 | struct super_block * sb; | 402 | struct super_block *sb; |
398 | int error; | 403 | int error; |
399 | 404 | ||
400 | sb = inode->i_sb; | 405 | sb = inode->i_sb; |
401 | 406 | ||
402 | if (!fname) { | 407 | if (!fname) { |
403 | printk("call_filldir: called with null fname?!?\n"); | 408 | printk(KERN_ERR "ext4: call_filldir: called with " |
409 | "null fname?!?\n"); | ||
404 | return 0; | 410 | return 0; |
405 | } | 411 | } |
406 | curr_pos = hash2pos(fname->hash, fname->minor_hash); | 412 | curr_pos = hash2pos(fname->hash, fname->minor_hash); |
@@ -411,7 +417,7 @@ static int call_filldir(struct file * filp, void * dirent, | |||
411 | get_dtype(sb, fname->file_type)); | 417 | get_dtype(sb, fname->file_type)); |
412 | if (error) { | 418 | if (error) { |
413 | filp->f_pos = curr_pos; | 419 | filp->f_pos = curr_pos; |
414 | info->extra_fname = fname->next; | 420 | info->extra_fname = fname; |
415 | return error; | 421 | return error; |
416 | } | 422 | } |
417 | fname = fname->next; | 423 | fname = fname->next; |
@@ -419,8 +425,8 @@ static int call_filldir(struct file * filp, void * dirent, | |||
419 | return 0; | 425 | return 0; |
420 | } | 426 | } |
421 | 427 | ||
422 | static int ext4_dx_readdir(struct file * filp, | 428 | static int ext4_dx_readdir(struct file *filp, |
423 | void * dirent, filldir_t filldir) | 429 | void *dirent, filldir_t filldir) |
424 | { | 430 | { |
425 | struct dir_private_info *info = filp->private_data; | 431 | struct dir_private_info *info = filp->private_data; |
426 | struct inode *inode = filp->f_path.dentry->d_inode; | 432 | struct inode *inode = filp->f_path.dentry->d_inode; |
@@ -450,11 +456,21 @@ static int ext4_dx_readdir(struct file * filp, | |||
450 | * If there are any leftover names on the hash collision | 456 | * If there are any leftover names on the hash collision |
451 | * chain, return them first. | 457 | * chain, return them first. |
452 | */ | 458 | */ |
453 | if (info->extra_fname && | 459 | if (info->extra_fname) { |
454 | call_filldir(filp, dirent, filldir, info->extra_fname)) | 460 | if (call_filldir(filp, dirent, filldir, info->extra_fname)) |
455 | goto finished; | 461 | goto finished; |
456 | 462 | ||
457 | if (!info->curr_node) | 463 | info->extra_fname = NULL; |
464 | info->curr_node = rb_next(info->curr_node); | ||
465 | if (!info->curr_node) { | ||
466 | if (info->next_hash == ~0) { | ||
467 | filp->f_pos = EXT4_HTREE_EOF; | ||
468 | goto finished; | ||
469 | } | ||
470 | info->curr_hash = info->next_hash; | ||
471 | info->curr_minor_hash = 0; | ||
472 | } | ||
473 | } else if (!info->curr_node) | ||
458 | info->curr_node = rb_first(&info->root); | 474 | info->curr_node = rb_first(&info->root); |
459 | 475 | ||
460 | while (1) { | 476 | while (1) { |
@@ -501,7 +517,7 @@ finished: | |||
501 | return 0; | 517 | return 0; |
502 | } | 518 | } |
503 | 519 | ||
504 | static int ext4_release_dir (struct inode * inode, struct file * filp) | 520 | static int ext4_release_dir(struct inode *inode, struct file *filp) |
505 | { | 521 | { |
506 | if (filp->private_data) | 522 | if (filp->private_data) |
507 | ext4_htree_free_dir_info(filp->private_data); | 523 | ext4_htree_free_dir_info(filp->private_data); |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6c7924d9e358..6690a41cdd9f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -44,9 +44,9 @@ | |||
44 | #ifdef EXT4FS_DEBUG | 44 | #ifdef EXT4FS_DEBUG |
45 | #define ext4_debug(f, a...) \ | 45 | #define ext4_debug(f, a...) \ |
46 | do { \ | 46 | do { \ |
47 | printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ | 47 | printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ |
48 | __FILE__, __LINE__, __func__); \ | 48 | __FILE__, __LINE__, __func__); \ |
49 | printk (KERN_DEBUG f, ## a); \ | 49 | printk(KERN_DEBUG f, ## a); \ |
50 | } while (0) | 50 | } while (0) |
51 | #else | 51 | #else |
52 | #define ext4_debug(f, a...) do {} while (0) | 52 | #define ext4_debug(f, a...) do {} while (0) |
@@ -128,7 +128,7 @@ struct ext4_allocation_request { | |||
128 | #else | 128 | #else |
129 | # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) | 129 | # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) |
130 | #endif | 130 | #endif |
131 | #define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32)) | 131 | #define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32)) |
132 | #ifdef __KERNEL__ | 132 | #ifdef __KERNEL__ |
133 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) | 133 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) |
134 | #else | 134 | #else |
@@ -245,7 +245,7 @@ struct flex_groups { | |||
245 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ | 245 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ |
246 | 246 | ||
247 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ | 247 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ |
248 | #define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ | 248 | #define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */ |
249 | 249 | ||
250 | /* | 250 | /* |
251 | * Inode dynamic state flags | 251 | * Inode dynamic state flags |
@@ -291,8 +291,6 @@ struct ext4_new_group_data { | |||
291 | #define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS | 291 | #define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS |
292 | #define EXT4_IOC_GETVERSION _IOR('f', 3, long) | 292 | #define EXT4_IOC_GETVERSION _IOR('f', 3, long) |
293 | #define EXT4_IOC_SETVERSION _IOW('f', 4, long) | 293 | #define EXT4_IOC_SETVERSION _IOW('f', 4, long) |
294 | #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) | ||
295 | #define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input) | ||
296 | #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION | 294 | #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION |
297 | #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION | 295 | #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION |
298 | #ifdef CONFIG_JBD2_DEBUG | 296 | #ifdef CONFIG_JBD2_DEBUG |
@@ -300,7 +298,10 @@ struct ext4_new_group_data { | |||
300 | #endif | 298 | #endif |
301 | #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) | 299 | #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) |
302 | #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) | 300 | #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) |
303 | #define EXT4_IOC_MIGRATE _IO('f', 7) | 301 | #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) |
302 | #define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input) | ||
303 | #define EXT4_IOC_MIGRATE _IO('f', 9) | ||
304 | /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ | ||
304 | 305 | ||
305 | /* | 306 | /* |
306 | * ioctl commands in 32 bit emulation | 307 | * ioctl commands in 32 bit emulation |
@@ -538,8 +539,9 @@ do { \ | |||
538 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ | 539 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ |
539 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ | 540 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ |
540 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ | 541 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ |
541 | #define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */ | ||
542 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | 542 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ |
543 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ | ||
544 | |||
543 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ | 545 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ |
544 | #ifndef _LINUX_EXT2_FS_H | 546 | #ifndef _LINUX_EXT2_FS_H |
545 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt | 547 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt |
@@ -667,7 +669,7 @@ struct ext4_super_block { | |||
667 | }; | 669 | }; |
668 | 670 | ||
669 | #ifdef __KERNEL__ | 671 | #ifdef __KERNEL__ |
670 | static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb) | 672 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
671 | { | 673 | { |
672 | return sb->s_fs_info; | 674 | return sb->s_fs_info; |
673 | } | 675 | } |
@@ -725,11 +727,11 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | |||
725 | */ | 727 | */ |
726 | 728 | ||
727 | #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ | 729 | #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ |
728 | ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) ) | 730 | (EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) |
729 | #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ | 731 | #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ |
730 | ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) ) | 732 | (EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) |
731 | #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ | 733 | #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ |
732 | ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) ) | 734 | (EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) |
733 | #define EXT4_SET_COMPAT_FEATURE(sb,mask) \ | 735 | #define EXT4_SET_COMPAT_FEATURE(sb,mask) \ |
734 | EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) | 736 | EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) |
735 | #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ | 737 | #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ |
@@ -789,6 +791,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | |||
789 | #define EXT4_DEF_RESUID 0 | 791 | #define EXT4_DEF_RESUID 0 |
790 | #define EXT4_DEF_RESGID 0 | 792 | #define EXT4_DEF_RESGID 0 |
791 | 793 | ||
794 | #define EXT4_DEF_INODE_READAHEAD_BLKS 32 | ||
795 | |||
792 | /* | 796 | /* |
793 | * Default mount options | 797 | * Default mount options |
794 | */ | 798 | */ |
@@ -954,6 +958,24 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) | |||
954 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, | 958 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, |
955 | unsigned long *blockgrpp, ext4_grpblk_t *offsetp); | 959 | unsigned long *blockgrpp, ext4_grpblk_t *offsetp); |
956 | 960 | ||
961 | extern struct proc_dir_entry *ext4_proc_root; | ||
962 | |||
963 | #ifdef CONFIG_PROC_FS | ||
964 | extern const struct file_operations ext4_ui_proc_fops; | ||
965 | |||
966 | #define EXT4_PROC_HANDLER(name, var) \ | ||
967 | do { \ | ||
968 | proc = proc_create_data(name, mode, sbi->s_proc, \ | ||
969 | &ext4_ui_proc_fops, &sbi->s_##var); \ | ||
970 | if (proc == NULL) { \ | ||
971 | printk(KERN_ERR "EXT4-fs: can't create %s\n", name); \ | ||
972 | goto err_out; \ | ||
973 | } \ | ||
974 | } while (0) | ||
975 | #else | ||
976 | #define EXT4_PROC_HANDLER(name, var) | ||
977 | #endif | ||
978 | |||
957 | /* | 979 | /* |
958 | * Function prototypes | 980 | * Function prototypes |
959 | */ | 981 | */ |
@@ -981,23 +1003,20 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
981 | extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, | 1003 | extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, |
982 | ext4_lblk_t iblock, ext4_fsblk_t goal, | 1004 | ext4_lblk_t iblock, ext4_fsblk_t goal, |
983 | unsigned long *count, int *errp); | 1005 | unsigned long *count, int *errp); |
984 | extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, | 1006 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); |
985 | ext4_fsblk_t goal, unsigned long *count, int *errp); | ||
986 | extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, | 1007 | extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, |
987 | ext4_fsblk_t nblocks); | 1008 | s64 nblocks); |
988 | extern void ext4_free_blocks (handle_t *handle, struct inode *inode, | 1009 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, |
989 | ext4_fsblk_t block, unsigned long count, int metadata); | 1010 | ext4_fsblk_t block, unsigned long count, int metadata); |
990 | extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, | 1011 | extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, |
991 | ext4_fsblk_t block, unsigned long count, | 1012 | ext4_fsblk_t block, unsigned long count, |
992 | unsigned long *pdquot_freed_blocks); | 1013 | unsigned long *pdquot_freed_blocks); |
993 | extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); | 1014 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); |
994 | extern void ext4_check_blocks_bitmap (struct super_block *); | 1015 | extern void ext4_check_blocks_bitmap(struct super_block *); |
995 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | 1016 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, |
996 | ext4_group_t block_group, | 1017 | ext4_group_t block_group, |
997 | struct buffer_head ** bh); | 1018 | struct buffer_head ** bh); |
998 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); | 1019 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); |
999 | extern void ext4_init_block_alloc_info(struct inode *); | ||
1000 | extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv); | ||
1001 | 1020 | ||
1002 | /* dir.c */ | 1021 | /* dir.c */ |
1003 | extern int ext4_check_dir_entry(const char *, struct inode *, | 1022 | extern int ext4_check_dir_entry(const char *, struct inode *, |
@@ -1009,20 +1028,20 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
1009 | extern void ext4_htree_free_dir_info(struct dir_private_info *p); | 1028 | extern void ext4_htree_free_dir_info(struct dir_private_info *p); |
1010 | 1029 | ||
1011 | /* fsync.c */ | 1030 | /* fsync.c */ |
1012 | extern int ext4_sync_file (struct file *, struct dentry *, int); | 1031 | extern int ext4_sync_file(struct file *, struct dentry *, int); |
1013 | 1032 | ||
1014 | /* hash.c */ | 1033 | /* hash.c */ |
1015 | extern int ext4fs_dirhash(const char *name, int len, struct | 1034 | extern int ext4fs_dirhash(const char *name, int len, struct |
1016 | dx_hash_info *hinfo); | 1035 | dx_hash_info *hinfo); |
1017 | 1036 | ||
1018 | /* ialloc.c */ | 1037 | /* ialloc.c */ |
1019 | extern struct inode * ext4_new_inode (handle_t *, struct inode *, int); | 1038 | extern struct inode * ext4_new_inode(handle_t *, struct inode *, int); |
1020 | extern void ext4_free_inode (handle_t *, struct inode *); | 1039 | extern void ext4_free_inode(handle_t *, struct inode *); |
1021 | extern struct inode * ext4_orphan_get (struct super_block *, unsigned long); | 1040 | extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); |
1022 | extern unsigned long ext4_count_free_inodes (struct super_block *); | 1041 | extern unsigned long ext4_count_free_inodes(struct super_block *); |
1023 | extern unsigned long ext4_count_dirs (struct super_block *); | 1042 | extern unsigned long ext4_count_dirs(struct super_block *); |
1024 | extern void ext4_check_inodes_bitmap (struct super_block *); | 1043 | extern void ext4_check_inodes_bitmap(struct super_block *); |
1025 | extern unsigned long ext4_count_free (struct buffer_head *, unsigned); | 1044 | extern unsigned long ext4_count_free(struct buffer_head *, unsigned); |
1026 | 1045 | ||
1027 | /* mballoc.c */ | 1046 | /* mballoc.c */ |
1028 | extern long ext4_mb_stats; | 1047 | extern long ext4_mb_stats; |
@@ -1032,7 +1051,7 @@ extern int ext4_mb_release(struct super_block *); | |||
1032 | extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, | 1051 | extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, |
1033 | struct ext4_allocation_request *, int *); | 1052 | struct ext4_allocation_request *, int *); |
1034 | extern int ext4_mb_reserve_blocks(struct super_block *, int); | 1053 | extern int ext4_mb_reserve_blocks(struct super_block *, int); |
1035 | extern void ext4_mb_discard_inode_preallocations(struct inode *); | 1054 | extern void ext4_discard_preallocations(struct inode *); |
1036 | extern int __init init_ext4_mballoc(void); | 1055 | extern int __init init_ext4_mballoc(void); |
1037 | extern void exit_ext4_mballoc(void); | 1056 | extern void exit_ext4_mballoc(void); |
1038 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, | 1057 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, |
@@ -1050,39 +1069,41 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, | |||
1050 | ext4_lblk_t, int, int *); | 1069 | ext4_lblk_t, int, int *); |
1051 | struct buffer_head *ext4_bread(handle_t *, struct inode *, | 1070 | struct buffer_head *ext4_bread(handle_t *, struct inode *, |
1052 | ext4_lblk_t, int, int *); | 1071 | ext4_lblk_t, int, int *); |
1072 | int ext4_get_block(struct inode *inode, sector_t iblock, | ||
1073 | struct buffer_head *bh_result, int create); | ||
1053 | int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | 1074 | int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, |
1054 | ext4_lblk_t iblock, unsigned long maxblocks, | 1075 | ext4_lblk_t iblock, unsigned long maxblocks, |
1055 | struct buffer_head *bh_result, | 1076 | struct buffer_head *bh_result, |
1056 | int create, int extend_disksize); | 1077 | int create, int extend_disksize); |
1057 | 1078 | ||
1058 | extern struct inode *ext4_iget(struct super_block *, unsigned long); | 1079 | extern struct inode *ext4_iget(struct super_block *, unsigned long); |
1059 | extern int ext4_write_inode (struct inode *, int); | 1080 | extern int ext4_write_inode(struct inode *, int); |
1060 | extern int ext4_setattr (struct dentry *, struct iattr *); | 1081 | extern int ext4_setattr(struct dentry *, struct iattr *); |
1061 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | 1082 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, |
1062 | struct kstat *stat); | 1083 | struct kstat *stat); |
1063 | extern void ext4_delete_inode (struct inode *); | 1084 | extern void ext4_delete_inode(struct inode *); |
1064 | extern int ext4_sync_inode (handle_t *, struct inode *); | 1085 | extern int ext4_sync_inode(handle_t *, struct inode *); |
1065 | extern void ext4_discard_reservation (struct inode *); | ||
1066 | extern void ext4_dirty_inode(struct inode *); | 1086 | extern void ext4_dirty_inode(struct inode *); |
1067 | extern int ext4_change_inode_journal_flag(struct inode *, int); | 1087 | extern int ext4_change_inode_journal_flag(struct inode *, int); |
1068 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 1088 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
1069 | extern int ext4_can_truncate(struct inode *inode); | 1089 | extern int ext4_can_truncate(struct inode *inode); |
1070 | extern void ext4_truncate (struct inode *); | 1090 | extern void ext4_truncate(struct inode *); |
1071 | extern void ext4_set_inode_flags(struct inode *); | 1091 | extern void ext4_set_inode_flags(struct inode *); |
1072 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 1092 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
1073 | extern void ext4_set_aops(struct inode *inode); | 1093 | extern void ext4_set_aops(struct inode *inode); |
1074 | extern int ext4_writepage_trans_blocks(struct inode *); | 1094 | extern int ext4_writepage_trans_blocks(struct inode *); |
1095 | extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); | ||
1096 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | ||
1075 | extern int ext4_block_truncate_page(handle_t *handle, | 1097 | extern int ext4_block_truncate_page(handle_t *handle, |
1076 | struct address_space *mapping, loff_t from); | 1098 | struct address_space *mapping, loff_t from); |
1077 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); | 1099 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); |
1078 | 1100 | ||
1079 | /* ioctl.c */ | 1101 | /* ioctl.c */ |
1080 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); | 1102 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); |
1081 | extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); | 1103 | extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); |
1082 | 1104 | ||
1083 | /* migrate.c */ | 1105 | /* migrate.c */ |
1084 | extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int, | 1106 | extern int ext4_ext_migrate(struct inode *); |
1085 | unsigned long); | ||
1086 | /* namei.c */ | 1107 | /* namei.c */ |
1087 | extern int ext4_orphan_add(handle_t *, struct inode *); | 1108 | extern int ext4_orphan_add(handle_t *, struct inode *); |
1088 | extern int ext4_orphan_del(handle_t *, struct inode *); | 1109 | extern int ext4_orphan_del(handle_t *, struct inode *); |
@@ -1097,14 +1118,14 @@ extern int ext4_group_extend(struct super_block *sb, | |||
1097 | ext4_fsblk_t n_blocks_count); | 1118 | ext4_fsblk_t n_blocks_count); |
1098 | 1119 | ||
1099 | /* super.c */ | 1120 | /* super.c */ |
1100 | extern void ext4_error (struct super_block *, const char *, const char *, ...) | 1121 | extern void ext4_error(struct super_block *, const char *, const char *, ...) |
1101 | __attribute__ ((format (printf, 3, 4))); | 1122 | __attribute__ ((format (printf, 3, 4))); |
1102 | extern void __ext4_std_error (struct super_block *, const char *, int); | 1123 | extern void __ext4_std_error(struct super_block *, const char *, int); |
1103 | extern void ext4_abort (struct super_block *, const char *, const char *, ...) | 1124 | extern void ext4_abort(struct super_block *, const char *, const char *, ...) |
1104 | __attribute__ ((format (printf, 3, 4))); | 1125 | __attribute__ ((format (printf, 3, 4))); |
1105 | extern void ext4_warning (struct super_block *, const char *, const char *, ...) | 1126 | extern void ext4_warning(struct super_block *, const char *, const char *, ...) |
1106 | __attribute__ ((format (printf, 3, 4))); | 1127 | __attribute__ ((format (printf, 3, 4))); |
1107 | extern void ext4_update_dynamic_rev (struct super_block *sb); | 1128 | extern void ext4_update_dynamic_rev(struct super_block *sb); |
1108 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, | 1129 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, |
1109 | __u32 compat); | 1130 | __u32 compat); |
1110 | extern int ext4_update_rocompat_feature(handle_t *handle, | 1131 | extern int ext4_update_rocompat_feature(handle_t *handle, |
@@ -1177,7 +1198,7 @@ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) | |||
1177 | 1198 | ||
1178 | static inline | 1199 | static inline |
1179 | struct ext4_group_info *ext4_get_group_info(struct super_block *sb, | 1200 | struct ext4_group_info *ext4_get_group_info(struct super_block *sb, |
1180 | ext4_group_t group) | 1201 | ext4_group_t group) |
1181 | { | 1202 | { |
1182 | struct ext4_group_info ***grp_info; | 1203 | struct ext4_group_info ***grp_info; |
1183 | long indexv, indexh; | 1204 | long indexv, indexh; |
@@ -1205,6 +1226,28 @@ do { \ | |||
1205 | __ext4_std_error((sb), __func__, (errno)); \ | 1226 | __ext4_std_error((sb), __func__, (errno)); \ |
1206 | } while (0) | 1227 | } while (0) |
1207 | 1228 | ||
1229 | #ifdef CONFIG_SMP | ||
1230 | /* Each CPU can accumulate FBC_BATCH blocks in their local | ||
1231 | * counters. So we need to make sure we have free blocks more | ||
1232 | * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times. | ||
1233 | */ | ||
1234 | #define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids)) | ||
1235 | #else | ||
1236 | #define EXT4_FREEBLOCKS_WATERMARK 0 | ||
1237 | #endif | ||
1238 | |||
1239 | static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) | ||
1240 | { | ||
1241 | /* | ||
1242 | * XXX: replace with spinlock if seen contended -bzzz | ||
1243 | */ | ||
1244 | down_write(&EXT4_I(inode)->i_data_sem); | ||
1245 | if (newsize > EXT4_I(inode)->i_disksize) | ||
1246 | EXT4_I(inode)->i_disksize = newsize; | ||
1247 | up_write(&EXT4_I(inode)->i_data_sem); | ||
1248 | return ; | ||
1249 | } | ||
1250 | |||
1208 | /* | 1251 | /* |
1209 | * Inodes and files operations | 1252 | * Inodes and files operations |
1210 | */ | 1253 | */ |
@@ -1227,6 +1270,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations; | |||
1227 | /* extents.c */ | 1270 | /* extents.c */ |
1228 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); | 1271 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); |
1229 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); | 1272 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); |
1273 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, | ||
1274 | int chunk); | ||
1230 | extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | 1275 | extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, |
1231 | ext4_lblk_t iblock, | 1276 | ext4_lblk_t iblock, |
1232 | unsigned long max_blocks, struct buffer_head *bh_result, | 1277 | unsigned long max_blocks, struct buffer_head *bh_result, |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 6c166c0a54b7..bec7ce59fc0d 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -124,6 +124,19 @@ struct ext4_ext_path { | |||
124 | #define EXT4_EXT_CACHE_GAP 1 | 124 | #define EXT4_EXT_CACHE_GAP 1 |
125 | #define EXT4_EXT_CACHE_EXTENT 2 | 125 | #define EXT4_EXT_CACHE_EXTENT 2 |
126 | 126 | ||
127 | /* | ||
128 | * to be called by ext4_ext_walk_space() | ||
129 | * negative retcode - error | ||
130 | * positive retcode - signal for ext4_ext_walk_space(), see below | ||
131 | * callback must return valid extent (passed or newly created) | ||
132 | */ | ||
133 | typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, | ||
134 | struct ext4_ext_cache *, | ||
135 | struct ext4_extent *, void *); | ||
136 | |||
137 | #define EXT_CONTINUE 0 | ||
138 | #define EXT_BREAK 1 | ||
139 | #define EXT_REPEAT 2 | ||
127 | 140 | ||
128 | #define EXT_MAX_BLOCK 0xffffffff | 141 | #define EXT_MAX_BLOCK 0xffffffff |
129 | 142 | ||
@@ -216,12 +229,16 @@ extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); | |||
216 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); | 229 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); |
217 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); | 230 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); |
218 | extern int ext4_extent_tree_init(handle_t *, struct inode *); | 231 | extern int ext4_extent_tree_init(handle_t *, struct inode *); |
219 | extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); | 232 | extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, |
233 | int num, | ||
234 | struct ext4_ext_path *path); | ||
220 | extern int ext4_ext_try_to_merge(struct inode *inode, | 235 | extern int ext4_ext_try_to_merge(struct inode *inode, |
221 | struct ext4_ext_path *path, | 236 | struct ext4_ext_path *path, |
222 | struct ext4_extent *); | 237 | struct ext4_extent *); |
223 | extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); | 238 | extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); |
224 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); | 239 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); |
240 | extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t, | ||
241 | ext_prepare_callback, void *); | ||
225 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, | 242 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, |
226 | struct ext4_ext_path *); | 243 | struct ext4_ext_path *); |
227 | extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, | 244 | extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, |
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h index ef7409f0e7e4..5c124c0ac6d3 100644 --- a/fs/ext4/ext4_i.h +++ b/fs/ext4/ext4_i.h | |||
@@ -33,38 +33,6 @@ typedef __u32 ext4_lblk_t; | |||
33 | /* data type for block group number */ | 33 | /* data type for block group number */ |
34 | typedef unsigned long ext4_group_t; | 34 | typedef unsigned long ext4_group_t; |
35 | 35 | ||
36 | struct ext4_reserve_window { | ||
37 | ext4_fsblk_t _rsv_start; /* First byte reserved */ | ||
38 | ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */ | ||
39 | }; | ||
40 | |||
41 | struct ext4_reserve_window_node { | ||
42 | struct rb_node rsv_node; | ||
43 | __u32 rsv_goal_size; | ||
44 | __u32 rsv_alloc_hit; | ||
45 | struct ext4_reserve_window rsv_window; | ||
46 | }; | ||
47 | |||
48 | struct ext4_block_alloc_info { | ||
49 | /* information about reservation window */ | ||
50 | struct ext4_reserve_window_node rsv_window_node; | ||
51 | /* | ||
52 | * was i_next_alloc_block in ext4_inode_info | ||
53 | * is the logical (file-relative) number of the | ||
54 | * most-recently-allocated block in this file. | ||
55 | * We use this for detecting linearly ascending allocation requests. | ||
56 | */ | ||
57 | ext4_lblk_t last_alloc_logical_block; | ||
58 | /* | ||
59 | * Was i_next_alloc_goal in ext4_inode_info | ||
60 | * is the *physical* companion to i_next_alloc_block. | ||
61 | * it the physical block number of the block which was most-recentl | ||
62 | * allocated to this file. This give us the goal (target) for the next | ||
63 | * allocation when we detect linearly ascending requests. | ||
64 | */ | ||
65 | ext4_fsblk_t last_alloc_physical_block; | ||
66 | }; | ||
67 | |||
68 | #define rsv_start rsv_window._rsv_start | 36 | #define rsv_start rsv_window._rsv_start |
69 | #define rsv_end rsv_window._rsv_end | 37 | #define rsv_end rsv_window._rsv_end |
70 | 38 | ||
@@ -97,11 +65,8 @@ struct ext4_inode_info { | |||
97 | ext4_group_t i_block_group; | 65 | ext4_group_t i_block_group; |
98 | __u32 i_state; /* Dynamic state flags for ext4 */ | 66 | __u32 i_state; /* Dynamic state flags for ext4 */ |
99 | 67 | ||
100 | /* block reservation info */ | ||
101 | struct ext4_block_alloc_info *i_block_alloc_info; | ||
102 | |||
103 | ext4_lblk_t i_dir_start_lookup; | 68 | ext4_lblk_t i_dir_start_lookup; |
104 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 69 | #ifdef CONFIG_EXT4_FS_XATTR |
105 | /* | 70 | /* |
106 | * Extended attributes can be read independently of the main file | 71 | * Extended attributes can be read independently of the main file |
107 | * data. Taking i_mutex even when reading would cause contention | 72 | * data. Taking i_mutex even when reading would cause contention |
@@ -111,7 +76,7 @@ struct ext4_inode_info { | |||
111 | */ | 76 | */ |
112 | struct rw_semaphore xattr_sem; | 77 | struct rw_semaphore xattr_sem; |
113 | #endif | 78 | #endif |
114 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 79 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
115 | struct posix_acl *i_acl; | 80 | struct posix_acl *i_acl; |
116 | struct posix_acl *i_default_acl; | 81 | struct posix_acl *i_default_acl; |
117 | #endif | 82 | #endif |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index eb8bc3afe6e9..b455c685a98b 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -51,6 +51,14 @@ | |||
51 | EXT4_XATTR_TRANS_BLOCKS - 2 + \ | 51 | EXT4_XATTR_TRANS_BLOCKS - 2 + \ |
52 | 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) | 52 | 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) |
53 | 53 | ||
54 | /* | ||
55 | * Define the number of metadata blocks we need to account to modify data. | ||
56 | * | ||
57 | * This include super block, inode block, quota blocks and xattr blocks | ||
58 | */ | ||
59 | #define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ | ||
60 | 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) | ||
61 | |||
54 | /* Delete operations potentially hit one directory's namespace plus an | 62 | /* Delete operations potentially hit one directory's namespace plus an |
55 | * entire inode, plus arbitrary amounts of bitmap/indirection data. Be | 63 | * entire inode, plus arbitrary amounts of bitmap/indirection data. Be |
56 | * generous. We can grow the delete transaction later if necessary. */ | 64 | * generous. We can grow the delete transaction later if necessary. */ |
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h index 6300226d5531..6a0b40d43264 100644 --- a/fs/ext4/ext4_sb.h +++ b/fs/ext4/ext4_sb.h | |||
@@ -40,8 +40,8 @@ struct ext4_sb_info { | |||
40 | unsigned long s_blocks_last; /* Last seen block count */ | 40 | unsigned long s_blocks_last; /* Last seen block count */ |
41 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ | 41 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ |
42 | struct buffer_head * s_sbh; /* Buffer containing the super block */ | 42 | struct buffer_head * s_sbh; /* Buffer containing the super block */ |
43 | struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ | 43 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ |
44 | struct buffer_head ** s_group_desc; | 44 | struct buffer_head **s_group_desc; |
45 | unsigned long s_mount_opt; | 45 | unsigned long s_mount_opt; |
46 | ext4_fsblk_t s_sb_block; | 46 | ext4_fsblk_t s_sb_block; |
47 | uid_t s_resuid; | 47 | uid_t s_resuid; |
@@ -52,6 +52,7 @@ struct ext4_sb_info { | |||
52 | int s_desc_per_block_bits; | 52 | int s_desc_per_block_bits; |
53 | int s_inode_size; | 53 | int s_inode_size; |
54 | int s_first_ino; | 54 | int s_first_ino; |
55 | unsigned int s_inode_readahead_blks; | ||
55 | spinlock_t s_next_gen_lock; | 56 | spinlock_t s_next_gen_lock; |
56 | u32 s_next_generation; | 57 | u32 s_next_generation; |
57 | u32 s_hash_seed[4]; | 58 | u32 s_hash_seed[4]; |
@@ -59,16 +60,17 @@ struct ext4_sb_info { | |||
59 | struct percpu_counter s_freeblocks_counter; | 60 | struct percpu_counter s_freeblocks_counter; |
60 | struct percpu_counter s_freeinodes_counter; | 61 | struct percpu_counter s_freeinodes_counter; |
61 | struct percpu_counter s_dirs_counter; | 62 | struct percpu_counter s_dirs_counter; |
63 | struct percpu_counter s_dirtyblocks_counter; | ||
62 | struct blockgroup_lock s_blockgroup_lock; | 64 | struct blockgroup_lock s_blockgroup_lock; |
65 | struct proc_dir_entry *s_proc; | ||
63 | 66 | ||
64 | /* root of the per fs reservation window tree */ | 67 | /* root of the per fs reservation window tree */ |
65 | spinlock_t s_rsv_window_lock; | 68 | spinlock_t s_rsv_window_lock; |
66 | struct rb_root s_rsv_window_root; | 69 | struct rb_root s_rsv_window_root; |
67 | struct ext4_reserve_window_node s_rsv_window_head; | ||
68 | 70 | ||
69 | /* Journaling */ | 71 | /* Journaling */ |
70 | struct inode * s_journal_inode; | 72 | struct inode *s_journal_inode; |
71 | struct journal_s * s_journal; | 73 | struct journal_s *s_journal; |
72 | struct list_head s_orphan; | 74 | struct list_head s_orphan; |
73 | unsigned long s_commit_interval; | 75 | unsigned long s_commit_interval; |
74 | struct block_device *journal_bdev; | 76 | struct block_device *journal_bdev; |
@@ -106,12 +108,12 @@ struct ext4_sb_info { | |||
106 | 108 | ||
107 | /* tunables */ | 109 | /* tunables */ |
108 | unsigned long s_stripe; | 110 | unsigned long s_stripe; |
109 | unsigned long s_mb_stream_request; | 111 | unsigned int s_mb_stream_request; |
110 | unsigned long s_mb_max_to_scan; | 112 | unsigned int s_mb_max_to_scan; |
111 | unsigned long s_mb_min_to_scan; | 113 | unsigned int s_mb_min_to_scan; |
112 | unsigned long s_mb_stats; | 114 | unsigned int s_mb_stats; |
113 | unsigned long s_mb_order2_reqs; | 115 | unsigned int s_mb_order2_reqs; |
114 | unsigned long s_mb_group_prealloc; | 116 | unsigned int s_mb_group_prealloc; |
115 | /* where last allocation was done - for stream allocation */ | 117 | /* where last allocation was done - for stream allocation */ |
116 | unsigned long s_mb_last_group; | 118 | unsigned long s_mb_last_group; |
117 | unsigned long s_mb_last_start; | 119 | unsigned long s_mb_last_start; |
@@ -121,7 +123,6 @@ struct ext4_sb_info { | |||
121 | int s_mb_history_cur; | 123 | int s_mb_history_cur; |
122 | int s_mb_history_max; | 124 | int s_mb_history_max; |
123 | int s_mb_history_num; | 125 | int s_mb_history_num; |
124 | struct proc_dir_entry *s_mb_proc; | ||
125 | spinlock_t s_mb_history_lock; | 126 | spinlock_t s_mb_history_lock; |
126 | int s_mb_history_filter; | 127 | int s_mb_history_filter; |
127 | 128 | ||
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 612c3d2c3824..ea2ce3c0ae66 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
41 | #include <linux/falloc.h> | 41 | #include <linux/falloc.h> |
42 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
43 | #include <linux/fiemap.h> | ||
43 | #include "ext4_jbd2.h" | 44 | #include "ext4_jbd2.h" |
44 | #include "ext4_extents.h" | 45 | #include "ext4_extents.h" |
45 | 46 | ||
@@ -383,8 +384,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) | |||
383 | ext_debug("\n"); | 384 | ext_debug("\n"); |
384 | } | 385 | } |
385 | #else | 386 | #else |
386 | #define ext4_ext_show_path(inode,path) | 387 | #define ext4_ext_show_path(inode, path) |
387 | #define ext4_ext_show_leaf(inode,path) | 388 | #define ext4_ext_show_leaf(inode, path) |
388 | #endif | 389 | #endif |
389 | 390 | ||
390 | void ext4_ext_drop_refs(struct ext4_ext_path *path) | 391 | void ext4_ext_drop_refs(struct ext4_ext_path *path) |
@@ -440,9 +441,10 @@ ext4_ext_binsearch_idx(struct inode *inode, | |||
440 | for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { | 441 | for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { |
441 | if (k != 0 && | 442 | if (k != 0 && |
442 | le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { | 443 | le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { |
443 | printk("k=%d, ix=0x%p, first=0x%p\n", k, | 444 | printk(KERN_DEBUG "k=%d, ix=0x%p, " |
444 | ix, EXT_FIRST_INDEX(eh)); | 445 | "first=0x%p\n", k, |
445 | printk("%u <= %u\n", | 446 | ix, EXT_FIRST_INDEX(eh)); |
447 | printk(KERN_DEBUG "%u <= %u\n", | ||
446 | le32_to_cpu(ix->ei_block), | 448 | le32_to_cpu(ix->ei_block), |
447 | le32_to_cpu(ix[-1].ei_block)); | 449 | le32_to_cpu(ix[-1].ei_block)); |
448 | } | 450 | } |
@@ -1475,7 +1477,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1475 | struct ext4_ext_path *path, | 1477 | struct ext4_ext_path *path, |
1476 | struct ext4_extent *newext) | 1478 | struct ext4_extent *newext) |
1477 | { | 1479 | { |
1478 | struct ext4_extent_header * eh; | 1480 | struct ext4_extent_header *eh; |
1479 | struct ext4_extent *ex, *fex; | 1481 | struct ext4_extent *ex, *fex; |
1480 | struct ext4_extent *nearex; /* nearest extent */ | 1482 | struct ext4_extent *nearex; /* nearest extent */ |
1481 | struct ext4_ext_path *npath = NULL; | 1483 | struct ext4_ext_path *npath = NULL; |
@@ -1625,6 +1627,113 @@ cleanup: | |||
1625 | return err; | 1627 | return err; |
1626 | } | 1628 | } |
1627 | 1629 | ||
1630 | int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | ||
1631 | ext4_lblk_t num, ext_prepare_callback func, | ||
1632 | void *cbdata) | ||
1633 | { | ||
1634 | struct ext4_ext_path *path = NULL; | ||
1635 | struct ext4_ext_cache cbex; | ||
1636 | struct ext4_extent *ex; | ||
1637 | ext4_lblk_t next, start = 0, end = 0; | ||
1638 | ext4_lblk_t last = block + num; | ||
1639 | int depth, exists, err = 0; | ||
1640 | |||
1641 | BUG_ON(func == NULL); | ||
1642 | BUG_ON(inode == NULL); | ||
1643 | |||
1644 | while (block < last && block != EXT_MAX_BLOCK) { | ||
1645 | num = last - block; | ||
1646 | /* find extent for this block */ | ||
1647 | path = ext4_ext_find_extent(inode, block, path); | ||
1648 | if (IS_ERR(path)) { | ||
1649 | err = PTR_ERR(path); | ||
1650 | path = NULL; | ||
1651 | break; | ||
1652 | } | ||
1653 | |||
1654 | depth = ext_depth(inode); | ||
1655 | BUG_ON(path[depth].p_hdr == NULL); | ||
1656 | ex = path[depth].p_ext; | ||
1657 | next = ext4_ext_next_allocated_block(path); | ||
1658 | |||
1659 | exists = 0; | ||
1660 | if (!ex) { | ||
1661 | /* there is no extent yet, so try to allocate | ||
1662 | * all requested space */ | ||
1663 | start = block; | ||
1664 | end = block + num; | ||
1665 | } else if (le32_to_cpu(ex->ee_block) > block) { | ||
1666 | /* need to allocate space before found extent */ | ||
1667 | start = block; | ||
1668 | end = le32_to_cpu(ex->ee_block); | ||
1669 | if (block + num < end) | ||
1670 | end = block + num; | ||
1671 | } else if (block >= le32_to_cpu(ex->ee_block) | ||
1672 | + ext4_ext_get_actual_len(ex)) { | ||
1673 | /* need to allocate space after found extent */ | ||
1674 | start = block; | ||
1675 | end = block + num; | ||
1676 | if (end >= next) | ||
1677 | end = next; | ||
1678 | } else if (block >= le32_to_cpu(ex->ee_block)) { | ||
1679 | /* | ||
1680 | * some part of requested space is covered | ||
1681 | * by found extent | ||
1682 | */ | ||
1683 | start = block; | ||
1684 | end = le32_to_cpu(ex->ee_block) | ||
1685 | + ext4_ext_get_actual_len(ex); | ||
1686 | if (block + num < end) | ||
1687 | end = block + num; | ||
1688 | exists = 1; | ||
1689 | } else { | ||
1690 | BUG(); | ||
1691 | } | ||
1692 | BUG_ON(end <= start); | ||
1693 | |||
1694 | if (!exists) { | ||
1695 | cbex.ec_block = start; | ||
1696 | cbex.ec_len = end - start; | ||
1697 | cbex.ec_start = 0; | ||
1698 | cbex.ec_type = EXT4_EXT_CACHE_GAP; | ||
1699 | } else { | ||
1700 | cbex.ec_block = le32_to_cpu(ex->ee_block); | ||
1701 | cbex.ec_len = ext4_ext_get_actual_len(ex); | ||
1702 | cbex.ec_start = ext_pblock(ex); | ||
1703 | cbex.ec_type = EXT4_EXT_CACHE_EXTENT; | ||
1704 | } | ||
1705 | |||
1706 | BUG_ON(cbex.ec_len == 0); | ||
1707 | err = func(inode, path, &cbex, ex, cbdata); | ||
1708 | ext4_ext_drop_refs(path); | ||
1709 | |||
1710 | if (err < 0) | ||
1711 | break; | ||
1712 | |||
1713 | if (err == EXT_REPEAT) | ||
1714 | continue; | ||
1715 | else if (err == EXT_BREAK) { | ||
1716 | err = 0; | ||
1717 | break; | ||
1718 | } | ||
1719 | |||
1720 | if (ext_depth(inode) != depth) { | ||
1721 | /* depth was changed. we have to realloc path */ | ||
1722 | kfree(path); | ||
1723 | path = NULL; | ||
1724 | } | ||
1725 | |||
1726 | block = cbex.ec_block + cbex.ec_len; | ||
1727 | } | ||
1728 | |||
1729 | if (path) { | ||
1730 | ext4_ext_drop_refs(path); | ||
1731 | kfree(path); | ||
1732 | } | ||
1733 | |||
1734 | return err; | ||
1735 | } | ||
1736 | |||
1628 | static void | 1737 | static void |
1629 | ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, | 1738 | ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, |
1630 | __u32 len, ext4_fsblk_t start, int type) | 1739 | __u32 len, ext4_fsblk_t start, int type) |
@@ -1747,54 +1856,61 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
1747 | } | 1856 | } |
1748 | 1857 | ||
1749 | /* | 1858 | /* |
1750 | * ext4_ext_calc_credits_for_insert: | 1859 | * ext4_ext_calc_credits_for_single_extent: |
1751 | * This routine returns max. credits that the extent tree can consume. | 1860 | * This routine returns max. credits that needed to insert an extent |
1752 | * It should be OK for low-performance paths like ->writepage() | 1861 | * to the extent tree. |
1753 | * To allow many writing processes to fit into a single transaction, | 1862 | * When pass the actual path, the caller should calculate credits |
1754 | * the caller should calculate credits under i_data_sem and | 1863 | * under i_data_sem. |
1755 | * pass the actual path. | ||
1756 | */ | 1864 | */ |
1757 | int ext4_ext_calc_credits_for_insert(struct inode *inode, | 1865 | int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, |
1758 | struct ext4_ext_path *path) | 1866 | struct ext4_ext_path *path) |
1759 | { | 1867 | { |
1760 | int depth, needed; | ||
1761 | |||
1762 | if (path) { | 1868 | if (path) { |
1869 | int depth = ext_depth(inode); | ||
1870 | int ret = 0; | ||
1871 | |||
1763 | /* probably there is space in leaf? */ | 1872 | /* probably there is space in leaf? */ |
1764 | depth = ext_depth(inode); | ||
1765 | if (le16_to_cpu(path[depth].p_hdr->eh_entries) | 1873 | if (le16_to_cpu(path[depth].p_hdr->eh_entries) |
1766 | < le16_to_cpu(path[depth].p_hdr->eh_max)) | 1874 | < le16_to_cpu(path[depth].p_hdr->eh_max)) { |
1767 | return 1; | ||
1768 | } | ||
1769 | |||
1770 | /* | ||
1771 | * given 32-bit logical block (4294967296 blocks), max. tree | ||
1772 | * can be 4 levels in depth -- 4 * 340^4 == 53453440000. | ||
1773 | * Let's also add one more level for imbalance. | ||
1774 | */ | ||
1775 | depth = 5; | ||
1776 | 1875 | ||
1777 | /* allocation of new data block(s) */ | 1876 | /* |
1778 | needed = 2; | 1877 | * There are some space in the leaf tree, no |
1878 | * need to account for leaf block credit | ||
1879 | * | ||
1880 | * bitmaps and block group descriptor blocks | ||
1881 | * and other metadat blocks still need to be | ||
1882 | * accounted. | ||
1883 | */ | ||
1884 | /* 1 bitmap, 1 block group descriptor */ | ||
1885 | ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); | ||
1886 | } | ||
1887 | } | ||
1779 | 1888 | ||
1780 | /* | 1889 | return ext4_chunk_trans_blocks(inode, nrblocks); |
1781 | * tree can be full, so it would need to grow in depth: | 1890 | } |
1782 | * we need one credit to modify old root, credits for | ||
1783 | * new root will be added in split accounting | ||
1784 | */ | ||
1785 | needed += 1; | ||
1786 | 1891 | ||
1787 | /* | 1892 | /* |
1788 | * Index split can happen, we would need: | 1893 | * How many index/leaf blocks need to change/allocate to modify nrblocks? |
1789 | * allocate intermediate indexes (bitmap + group) | 1894 | * |
1790 | * + change two blocks at each level, but root (already included) | 1895 | * if nrblocks are fit in a single extent (chunk flag is 1), then |
1791 | */ | 1896 | * in the worse case, each tree level index/leaf need to be changed |
1792 | needed += (depth * 2) + (depth * 2); | 1897 | * if the tree split due to insert a new extent, then the old tree |
1898 | * index/leaf need to be updated too | ||
1899 | * | ||
1900 | * If the nrblocks are discontiguous, they could cause | ||
1901 | * the whole tree split more than once, but this is really rare. | ||
1902 | */ | ||
1903 | int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | ||
1904 | { | ||
1905 | int index; | ||
1906 | int depth = ext_depth(inode); | ||
1793 | 1907 | ||
1794 | /* any allocation modifies superblock */ | 1908 | if (chunk) |
1795 | needed += 1; | 1909 | index = depth * 2; |
1910 | else | ||
1911 | index = depth * 3; | ||
1796 | 1912 | ||
1797 | return needed; | 1913 | return index; |
1798 | } | 1914 | } |
1799 | 1915 | ||
1800 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | 1916 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, |
@@ -1921,9 +2037,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
1921 | correct_index = 1; | 2037 | correct_index = 1; |
1922 | credits += (ext_depth(inode)) + 1; | 2038 | credits += (ext_depth(inode)) + 1; |
1923 | } | 2039 | } |
1924 | #ifdef CONFIG_QUOTA | ||
1925 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | 2040 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); |
1926 | #endif | ||
1927 | 2041 | ||
1928 | err = ext4_ext_journal_restart(handle, credits); | 2042 | err = ext4_ext_journal_restart(handle, credits); |
1929 | if (err) | 2043 | if (err) |
@@ -2137,7 +2251,7 @@ void ext4_ext_init(struct super_block *sb) | |||
2137 | */ | 2251 | */ |
2138 | 2252 | ||
2139 | if (test_opt(sb, EXTENTS)) { | 2253 | if (test_opt(sb, EXTENTS)) { |
2140 | printk("EXT4-fs: file extents enabled"); | 2254 | printk(KERN_INFO "EXT4-fs: file extents enabled"); |
2141 | #ifdef AGGRESSIVE_TEST | 2255 | #ifdef AGGRESSIVE_TEST |
2142 | printk(", aggressive tests"); | 2256 | printk(", aggressive tests"); |
2143 | #endif | 2257 | #endif |
@@ -2691,11 +2805,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2691 | goto out2; | 2805 | goto out2; |
2692 | } | 2806 | } |
2693 | /* | 2807 | /* |
2694 | * Okay, we need to do block allocation. Lazily initialize the block | 2808 | * Okay, we need to do block allocation. |
2695 | * allocation info here if necessary. | ||
2696 | */ | 2809 | */ |
2697 | if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) | ||
2698 | ext4_init_block_alloc_info(inode); | ||
2699 | 2810 | ||
2700 | /* find neighbour allocated blocks */ | 2811 | /* find neighbour allocated blocks */ |
2701 | ar.lleft = iblock; | 2812 | ar.lleft = iblock; |
@@ -2755,7 +2866,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2755 | /* free data blocks we just allocated */ | 2866 | /* free data blocks we just allocated */ |
2756 | /* not a good idea to call discard here directly, | 2867 | /* not a good idea to call discard here directly, |
2757 | * but otherwise we'd need to call it every free() */ | 2868 | * but otherwise we'd need to call it every free() */ |
2758 | ext4_mb_discard_inode_preallocations(inode); | 2869 | ext4_discard_preallocations(inode); |
2759 | ext4_free_blocks(handle, inode, ext_pblock(&newex), | 2870 | ext4_free_blocks(handle, inode, ext_pblock(&newex), |
2760 | ext4_ext_get_actual_len(&newex), 0); | 2871 | ext4_ext_get_actual_len(&newex), 0); |
2761 | goto out2; | 2872 | goto out2; |
@@ -2805,7 +2916,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
2805 | /* | 2916 | /* |
2806 | * probably first extent we're gonna free will be last in block | 2917 | * probably first extent we're gonna free will be last in block |
2807 | */ | 2918 | */ |
2808 | err = ext4_writepage_trans_blocks(inode) + 3; | 2919 | err = ext4_writepage_trans_blocks(inode); |
2809 | handle = ext4_journal_start(inode, err); | 2920 | handle = ext4_journal_start(inode, err); |
2810 | if (IS_ERR(handle)) | 2921 | if (IS_ERR(handle)) |
2811 | return; | 2922 | return; |
@@ -2819,7 +2930,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
2819 | down_write(&EXT4_I(inode)->i_data_sem); | 2930 | down_write(&EXT4_I(inode)->i_data_sem); |
2820 | ext4_ext_invalidate_cache(inode); | 2931 | ext4_ext_invalidate_cache(inode); |
2821 | 2932 | ||
2822 | ext4_mb_discard_inode_preallocations(inode); | 2933 | ext4_discard_preallocations(inode); |
2823 | 2934 | ||
2824 | /* | 2935 | /* |
2825 | * TODO: optimization is possible here. | 2936 | * TODO: optimization is possible here. |
@@ -2858,27 +2969,6 @@ out_stop: | |||
2858 | ext4_journal_stop(handle); | 2969 | ext4_journal_stop(handle); |
2859 | } | 2970 | } |
2860 | 2971 | ||
2861 | /* | ||
2862 | * ext4_ext_writepage_trans_blocks: | ||
2863 | * calculate max number of blocks we could modify | ||
2864 | * in order to allocate new block for an inode | ||
2865 | */ | ||
2866 | int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) | ||
2867 | { | ||
2868 | int needed; | ||
2869 | |||
2870 | needed = ext4_ext_calc_credits_for_insert(inode, NULL); | ||
2871 | |||
2872 | /* caller wants to allocate num blocks, but note it includes sb */ | ||
2873 | needed = needed * num - (num - 1); | ||
2874 | |||
2875 | #ifdef CONFIG_QUOTA | ||
2876 | needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | ||
2877 | #endif | ||
2878 | |||
2879 | return needed; | ||
2880 | } | ||
2881 | |||
2882 | static void ext4_falloc_update_inode(struct inode *inode, | 2972 | static void ext4_falloc_update_inode(struct inode *inode, |
2883 | int mode, loff_t new_size, int update_ctime) | 2973 | int mode, loff_t new_size, int update_ctime) |
2884 | { | 2974 | { |
@@ -2893,10 +2983,11 @@ static void ext4_falloc_update_inode(struct inode *inode, | |||
2893 | * Update only when preallocation was requested beyond | 2983 | * Update only when preallocation was requested beyond |
2894 | * the file size. | 2984 | * the file size. |
2895 | */ | 2985 | */ |
2896 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 2986 | if (!(mode & FALLOC_FL_KEEP_SIZE)) { |
2897 | new_size > i_size_read(inode)) { | 2987 | if (new_size > i_size_read(inode)) |
2898 | i_size_write(inode, new_size); | 2988 | i_size_write(inode, new_size); |
2899 | EXT4_I(inode)->i_disksize = new_size; | 2989 | if (new_size > EXT4_I(inode)->i_disksize) |
2990 | ext4_update_i_disksize(inode, new_size); | ||
2900 | } | 2991 | } |
2901 | 2992 | ||
2902 | } | 2993 | } |
@@ -2939,10 +3030,9 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) | |||
2939 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) | 3030 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) |
2940 | - block; | 3031 | - block; |
2941 | /* | 3032 | /* |
2942 | * credits to insert 1 extent into extent tree + buffers to be able to | 3033 | * credits to insert 1 extent into extent tree |
2943 | * modify 1 super block, 1 block bitmap and 1 group descriptor. | ||
2944 | */ | 3034 | */ |
2945 | credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; | 3035 | credits = ext4_chunk_trans_blocks(inode, max_blocks); |
2946 | mutex_lock(&inode->i_mutex); | 3036 | mutex_lock(&inode->i_mutex); |
2947 | retry: | 3037 | retry: |
2948 | while (ret >= 0 && ret < max_blocks) { | 3038 | while (ret >= 0 && ret < max_blocks) { |
@@ -2989,3 +3079,143 @@ retry: | |||
2989 | mutex_unlock(&inode->i_mutex); | 3079 | mutex_unlock(&inode->i_mutex); |
2990 | return ret > 0 ? ret2 : ret; | 3080 | return ret > 0 ? ret2 : ret; |
2991 | } | 3081 | } |
3082 | |||
3083 | /* | ||
3084 | * Callback function called for each extent to gather FIEMAP information. | ||
3085 | */ | ||
3086 | int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | ||
3087 | struct ext4_ext_cache *newex, struct ext4_extent *ex, | ||
3088 | void *data) | ||
3089 | { | ||
3090 | struct fiemap_extent_info *fieinfo = data; | ||
3091 | unsigned long blksize_bits = inode->i_sb->s_blocksize_bits; | ||
3092 | __u64 logical; | ||
3093 | __u64 physical; | ||
3094 | __u64 length; | ||
3095 | __u32 flags = 0; | ||
3096 | int error; | ||
3097 | |||
3098 | logical = (__u64)newex->ec_block << blksize_bits; | ||
3099 | |||
3100 | if (newex->ec_type == EXT4_EXT_CACHE_GAP) { | ||
3101 | pgoff_t offset; | ||
3102 | struct page *page; | ||
3103 | struct buffer_head *bh = NULL; | ||
3104 | |||
3105 | offset = logical >> PAGE_SHIFT; | ||
3106 | page = find_get_page(inode->i_mapping, offset); | ||
3107 | if (!page || !page_has_buffers(page)) | ||
3108 | return EXT_CONTINUE; | ||
3109 | |||
3110 | bh = page_buffers(page); | ||
3111 | |||
3112 | if (!bh) | ||
3113 | return EXT_CONTINUE; | ||
3114 | |||
3115 | if (buffer_delay(bh)) { | ||
3116 | flags |= FIEMAP_EXTENT_DELALLOC; | ||
3117 | page_cache_release(page); | ||
3118 | } else { | ||
3119 | page_cache_release(page); | ||
3120 | return EXT_CONTINUE; | ||
3121 | } | ||
3122 | } | ||
3123 | |||
3124 | physical = (__u64)newex->ec_start << blksize_bits; | ||
3125 | length = (__u64)newex->ec_len << blksize_bits; | ||
3126 | |||
3127 | if (ex && ext4_ext_is_uninitialized(ex)) | ||
3128 | flags |= FIEMAP_EXTENT_UNWRITTEN; | ||
3129 | |||
3130 | /* | ||
3131 | * If this extent reaches EXT_MAX_BLOCK, it must be last. | ||
3132 | * | ||
3133 | * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK, | ||
3134 | * this also indicates no more allocated blocks. | ||
3135 | * | ||
3136 | * XXX this might miss a single-block extent at EXT_MAX_BLOCK | ||
3137 | */ | ||
3138 | if (logical + length - 1 == EXT_MAX_BLOCK || | ||
3139 | ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK) | ||
3140 | flags |= FIEMAP_EXTENT_LAST; | ||
3141 | |||
3142 | error = fiemap_fill_next_extent(fieinfo, logical, physical, | ||
3143 | length, flags); | ||
3144 | if (error < 0) | ||
3145 | return error; | ||
3146 | if (error == 1) | ||
3147 | return EXT_BREAK; | ||
3148 | |||
3149 | return EXT_CONTINUE; | ||
3150 | } | ||
3151 | |||
3152 | /* fiemap flags we can handle specified here */ | ||
3153 | #define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) | ||
3154 | |||
3155 | int ext4_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo) | ||
3156 | { | ||
3157 | __u64 physical = 0; | ||
3158 | __u64 length; | ||
3159 | __u32 flags = FIEMAP_EXTENT_LAST; | ||
3160 | int blockbits = inode->i_sb->s_blocksize_bits; | ||
3161 | int error = 0; | ||
3162 | |||
3163 | /* in-inode? */ | ||
3164 | if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { | ||
3165 | struct ext4_iloc iloc; | ||
3166 | int offset; /* offset of xattr in inode */ | ||
3167 | |||
3168 | error = ext4_get_inode_loc(inode, &iloc); | ||
3169 | if (error) | ||
3170 | return error; | ||
3171 | physical = iloc.bh->b_blocknr << blockbits; | ||
3172 | offset = EXT4_GOOD_OLD_INODE_SIZE + | ||
3173 | EXT4_I(inode)->i_extra_isize; | ||
3174 | physical += offset; | ||
3175 | length = EXT4_SB(inode->i_sb)->s_inode_size - offset; | ||
3176 | flags |= FIEMAP_EXTENT_DATA_INLINE; | ||
3177 | } else { /* external block */ | ||
3178 | physical = EXT4_I(inode)->i_file_acl << blockbits; | ||
3179 | length = inode->i_sb->s_blocksize; | ||
3180 | } | ||
3181 | |||
3182 | if (physical) | ||
3183 | error = fiemap_fill_next_extent(fieinfo, 0, physical, | ||
3184 | length, flags); | ||
3185 | return (error < 0 ? error : 0); | ||
3186 | } | ||
3187 | |||
3188 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
3189 | __u64 start, __u64 len) | ||
3190 | { | ||
3191 | ext4_lblk_t start_blk; | ||
3192 | ext4_lblk_t len_blks; | ||
3193 | int error = 0; | ||
3194 | |||
3195 | /* fallback to generic here if not in extents fmt */ | ||
3196 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
3197 | return generic_block_fiemap(inode, fieinfo, start, len, | ||
3198 | ext4_get_block); | ||
3199 | |||
3200 | if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) | ||
3201 | return -EBADR; | ||
3202 | |||
3203 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { | ||
3204 | error = ext4_xattr_fiemap(inode, fieinfo); | ||
3205 | } else { | ||
3206 | start_blk = start >> inode->i_sb->s_blocksize_bits; | ||
3207 | len_blks = len >> inode->i_sb->s_blocksize_bits; | ||
3208 | |||
3209 | /* | ||
3210 | * Walk the extent tree gathering extent information. | ||
3211 | * ext4_ext_fiemap_cb will push extents back to user. | ||
3212 | */ | ||
3213 | down_write(&EXT4_I(inode)->i_data_sem); | ||
3214 | error = ext4_ext_walk_space(inode, start_blk, len_blks, | ||
3215 | ext4_ext_fiemap_cb, fieinfo); | ||
3216 | up_write(&EXT4_I(inode)->i_data_sem); | ||
3217 | } | ||
3218 | |||
3219 | return error; | ||
3220 | } | ||
3221 | |||
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 430eb7978db4..6bd11fba71f7 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -31,14 +31,14 @@ | |||
31 | * from ext4_file_open: open gets called at every open, but release | 31 | * from ext4_file_open: open gets called at every open, but release |
32 | * gets called only when /all/ the files are closed. | 32 | * gets called only when /all/ the files are closed. |
33 | */ | 33 | */ |
34 | static int ext4_release_file (struct inode * inode, struct file * filp) | 34 | static int ext4_release_file(struct inode *inode, struct file *filp) |
35 | { | 35 | { |
36 | /* if we are the last writer on the inode, drop the block reservation */ | 36 | /* if we are the last writer on the inode, drop the block reservation */ |
37 | if ((filp->f_mode & FMODE_WRITE) && | 37 | if ((filp->f_mode & FMODE_WRITE) && |
38 | (atomic_read(&inode->i_writecount) == 1)) | 38 | (atomic_read(&inode->i_writecount) == 1)) |
39 | { | 39 | { |
40 | down_write(&EXT4_I(inode)->i_data_sem); | 40 | down_write(&EXT4_I(inode)->i_data_sem); |
41 | ext4_discard_reservation(inode); | 41 | ext4_discard_preallocations(inode); |
42 | up_write(&EXT4_I(inode)->i_data_sem); | 42 | up_write(&EXT4_I(inode)->i_data_sem); |
43 | } | 43 | } |
44 | if (is_dx(inode) && filp->private_data) | 44 | if (is_dx(inode) && filp->private_data) |
@@ -140,6 +140,9 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
140 | return 0; | 140 | return 0; |
141 | } | 141 | } |
142 | 142 | ||
143 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
144 | __u64 start, __u64 len); | ||
145 | |||
143 | const struct file_operations ext4_file_operations = { | 146 | const struct file_operations ext4_file_operations = { |
144 | .llseek = generic_file_llseek, | 147 | .llseek = generic_file_llseek, |
145 | .read = do_sync_read, | 148 | .read = do_sync_read, |
@@ -162,7 +165,7 @@ const struct inode_operations ext4_file_inode_operations = { | |||
162 | .truncate = ext4_truncate, | 165 | .truncate = ext4_truncate, |
163 | .setattr = ext4_setattr, | 166 | .setattr = ext4_setattr, |
164 | .getattr = ext4_getattr, | 167 | .getattr = ext4_getattr, |
165 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 168 | #ifdef CONFIG_EXT4_FS_XATTR |
166 | .setxattr = generic_setxattr, | 169 | .setxattr = generic_setxattr, |
167 | .getxattr = generic_getxattr, | 170 | .getxattr = generic_getxattr, |
168 | .listxattr = ext4_listxattr, | 171 | .listxattr = ext4_listxattr, |
@@ -170,5 +173,6 @@ const struct inode_operations ext4_file_inode_operations = { | |||
170 | #endif | 173 | #endif |
171 | .permission = ext4_permission, | 174 | .permission = ext4_permission, |
172 | .fallocate = ext4_fallocate, | 175 | .fallocate = ext4_fallocate, |
176 | .fiemap = ext4_fiemap, | ||
173 | }; | 177 | }; |
174 | 178 | ||
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index a45c3737ad31..5afe4370840b 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/writeback.h> | 28 | #include <linux/writeback.h> |
29 | #include <linux/jbd2.h> | 29 | #include <linux/jbd2.h> |
30 | #include <linux/blkdev.h> | 30 | #include <linux/blkdev.h> |
31 | #include <linux/marker.h> | ||
31 | #include "ext4.h" | 32 | #include "ext4.h" |
32 | #include "ext4_jbd2.h" | 33 | #include "ext4_jbd2.h" |
33 | 34 | ||
@@ -43,7 +44,7 @@ | |||
43 | * inode to disk. | 44 | * inode to disk. |
44 | */ | 45 | */ |
45 | 46 | ||
46 | int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) | 47 | int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) |
47 | { | 48 | { |
48 | struct inode *inode = dentry->d_inode; | 49 | struct inode *inode = dentry->d_inode; |
49 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 50 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
@@ -51,6 +52,10 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) | |||
51 | 52 | ||
52 | J_ASSERT(ext4_journal_current_handle() == NULL); | 53 | J_ASSERT(ext4_journal_current_handle() == NULL); |
53 | 54 | ||
55 | trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld", | ||
56 | inode->i_sb->s_id, datasync, inode->i_ino, | ||
57 | dentry->d_parent->d_inode->i_ino); | ||
58 | |||
54 | /* | 59 | /* |
55 | * data=writeback: | 60 | * data=writeback: |
56 | * The caller's filemap_fdatawrite()/wait will sync the data. | 61 | * The caller's filemap_fdatawrite()/wait will sync the data. |
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index 1d6329dbe390..556ca8eba3db 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c | |||
@@ -27,7 +27,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[]) | |||
27 | sum += DELTA; | 27 | sum += DELTA; |
28 | b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); | 28 | b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); |
29 | b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); | 29 | b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); |
30 | } while(--n); | 30 | } while (--n); |
31 | 31 | ||
32 | buf[0] += b0; | 32 | buf[0] += b0; |
33 | buf[1] += b1; | 33 | buf[1] += b1; |
@@ -35,7 +35,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[]) | |||
35 | 35 | ||
36 | 36 | ||
37 | /* The old legacy hash */ | 37 | /* The old legacy hash */ |
38 | static __u32 dx_hack_hash (const char *name, int len) | 38 | static __u32 dx_hack_hash(const char *name, int len) |
39 | { | 39 | { |
40 | __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; | 40 | __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; |
41 | while (len--) { | 41 | while (len--) { |
@@ -59,7 +59,7 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) | |||
59 | val = pad; | 59 | val = pad; |
60 | if (len > num*4) | 60 | if (len > num*4) |
61 | len = num * 4; | 61 | len = num * 4; |
62 | for (i=0; i < len; i++) { | 62 | for (i = 0; i < len; i++) { |
63 | if ((i % 4) == 0) | 63 | if ((i % 4) == 0) |
64 | val = pad; | 64 | val = pad; |
65 | val = msg[i] + (val << 8); | 65 | val = msg[i] + (val << 8); |
@@ -104,7 +104,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) | |||
104 | 104 | ||
105 | /* Check to see if the seed is all zero's */ | 105 | /* Check to see if the seed is all zero's */ |
106 | if (hinfo->seed) { | 106 | if (hinfo->seed) { |
107 | for (i=0; i < 4; i++) { | 107 | for (i = 0; i < 4; i++) { |
108 | if (hinfo->seed[i]) | 108 | if (hinfo->seed[i]) |
109 | break; | 109 | break; |
110 | } | 110 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 655e760212b8..fe34d74cfb19 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -115,9 +115,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
115 | block_group, bitmap_blk); | 115 | block_group, bitmap_blk); |
116 | return NULL; | 116 | return NULL; |
117 | } | 117 | } |
118 | if (bh_uptodate_or_lock(bh)) | 118 | if (buffer_uptodate(bh) && |
119 | !(desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) | ||
119 | return bh; | 120 | return bh; |
120 | 121 | ||
122 | lock_buffer(bh); | ||
121 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 123 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); |
122 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | 124 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { |
123 | ext4_init_inode_bitmap(sb, bh, block_group, desc); | 125 | ext4_init_inode_bitmap(sb, bh, block_group, desc); |
@@ -154,39 +156,40 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
154 | * though), and then we'd have two inodes sharing the | 156 | * though), and then we'd have two inodes sharing the |
155 | * same inode number and space on the harddisk. | 157 | * same inode number and space on the harddisk. |
156 | */ | 158 | */ |
157 | void ext4_free_inode (handle_t *handle, struct inode * inode) | 159 | void ext4_free_inode(handle_t *handle, struct inode *inode) |
158 | { | 160 | { |
159 | struct super_block * sb = inode->i_sb; | 161 | struct super_block *sb = inode->i_sb; |
160 | int is_directory; | 162 | int is_directory; |
161 | unsigned long ino; | 163 | unsigned long ino; |
162 | struct buffer_head *bitmap_bh = NULL; | 164 | struct buffer_head *bitmap_bh = NULL; |
163 | struct buffer_head *bh2; | 165 | struct buffer_head *bh2; |
164 | ext4_group_t block_group; | 166 | ext4_group_t block_group; |
165 | unsigned long bit; | 167 | unsigned long bit; |
166 | struct ext4_group_desc * gdp; | 168 | struct ext4_group_desc *gdp; |
167 | struct ext4_super_block * es; | 169 | struct ext4_super_block *es; |
168 | struct ext4_sb_info *sbi; | 170 | struct ext4_sb_info *sbi; |
169 | int fatal = 0, err; | 171 | int fatal = 0, err; |
170 | ext4_group_t flex_group; | 172 | ext4_group_t flex_group; |
171 | 173 | ||
172 | if (atomic_read(&inode->i_count) > 1) { | 174 | if (atomic_read(&inode->i_count) > 1) { |
173 | printk ("ext4_free_inode: inode has count=%d\n", | 175 | printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", |
174 | atomic_read(&inode->i_count)); | 176 | atomic_read(&inode->i_count)); |
175 | return; | 177 | return; |
176 | } | 178 | } |
177 | if (inode->i_nlink) { | 179 | if (inode->i_nlink) { |
178 | printk ("ext4_free_inode: inode has nlink=%d\n", | 180 | printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n", |
179 | inode->i_nlink); | 181 | inode->i_nlink); |
180 | return; | 182 | return; |
181 | } | 183 | } |
182 | if (!sb) { | 184 | if (!sb) { |
183 | printk("ext4_free_inode: inode on nonexistent device\n"); | 185 | printk(KERN_ERR "ext4_free_inode: inode on " |
186 | "nonexistent device\n"); | ||
184 | return; | 187 | return; |
185 | } | 188 | } |
186 | sbi = EXT4_SB(sb); | 189 | sbi = EXT4_SB(sb); |
187 | 190 | ||
188 | ino = inode->i_ino; | 191 | ino = inode->i_ino; |
189 | ext4_debug ("freeing inode %lu\n", ino); | 192 | ext4_debug("freeing inode %lu\n", ino); |
190 | 193 | ||
191 | /* | 194 | /* |
192 | * Note: we must free any quota before locking the superblock, | 195 | * Note: we must free any quota before locking the superblock, |
@@ -200,12 +203,12 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) | |||
200 | is_directory = S_ISDIR(inode->i_mode); | 203 | is_directory = S_ISDIR(inode->i_mode); |
201 | 204 | ||
202 | /* Do this BEFORE marking the inode not in use or returning an error */ | 205 | /* Do this BEFORE marking the inode not in use or returning an error */ |
203 | clear_inode (inode); | 206 | clear_inode(inode); |
204 | 207 | ||
205 | es = EXT4_SB(sb)->s_es; | 208 | es = EXT4_SB(sb)->s_es; |
206 | if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { | 209 | if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { |
207 | ext4_error (sb, "ext4_free_inode", | 210 | ext4_error(sb, "ext4_free_inode", |
208 | "reserved or nonexistent inode %lu", ino); | 211 | "reserved or nonexistent inode %lu", ino); |
209 | goto error_return; | 212 | goto error_return; |
210 | } | 213 | } |
211 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); | 214 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); |
@@ -222,10 +225,10 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) | |||
222 | /* Ok, now we can actually update the inode bitmaps.. */ | 225 | /* Ok, now we can actually update the inode bitmaps.. */ |
223 | if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), | 226 | if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), |
224 | bit, bitmap_bh->b_data)) | 227 | bit, bitmap_bh->b_data)) |
225 | ext4_error (sb, "ext4_free_inode", | 228 | ext4_error(sb, "ext4_free_inode", |
226 | "bit already cleared for inode %lu", ino); | 229 | "bit already cleared for inode %lu", ino); |
227 | else { | 230 | else { |
228 | gdp = ext4_get_group_desc (sb, block_group, &bh2); | 231 | gdp = ext4_get_group_desc(sb, block_group, &bh2); |
229 | 232 | ||
230 | BUFFER_TRACE(bh2, "get_write_access"); | 233 | BUFFER_TRACE(bh2, "get_write_access"); |
231 | fatal = ext4_journal_get_write_access(handle, bh2); | 234 | fatal = ext4_journal_get_write_access(handle, bh2); |
@@ -287,7 +290,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent, | |||
287 | avefreei = freei / ngroups; | 290 | avefreei = freei / ngroups; |
288 | 291 | ||
289 | for (group = 0; group < ngroups; group++) { | 292 | for (group = 0; group < ngroups; group++) { |
290 | desc = ext4_get_group_desc (sb, group, NULL); | 293 | desc = ext4_get_group_desc(sb, group, NULL); |
291 | if (!desc || !desc->bg_free_inodes_count) | 294 | if (!desc || !desc->bg_free_inodes_count) |
292 | continue; | 295 | continue; |
293 | if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) | 296 | if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) |
@@ -351,7 +354,7 @@ find_close_to_parent: | |||
351 | goto found_flexbg; | 354 | goto found_flexbg; |
352 | } | 355 | } |
353 | 356 | ||
354 | if (best_flex < 0 || | 357 | if (flex_group[best_flex].free_inodes == 0 || |
355 | (flex_group[i].free_blocks > | 358 | (flex_group[i].free_blocks > |
356 | flex_group[best_flex].free_blocks && | 359 | flex_group[best_flex].free_blocks && |
357 | flex_group[i].free_inodes)) | 360 | flex_group[i].free_inodes)) |
@@ -576,16 +579,16 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
576 | * For other inodes, search forward from the parent directory's block | 579 | * For other inodes, search forward from the parent directory's block |
577 | * group to find a free inode. | 580 | * group to find a free inode. |
578 | */ | 581 | */ |
579 | struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | 582 | struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) |
580 | { | 583 | { |
581 | struct super_block *sb; | 584 | struct super_block *sb; |
582 | struct buffer_head *bitmap_bh = NULL; | 585 | struct buffer_head *bitmap_bh = NULL; |
583 | struct buffer_head *bh2; | 586 | struct buffer_head *bh2; |
584 | ext4_group_t group = 0; | 587 | ext4_group_t group = 0; |
585 | unsigned long ino = 0; | 588 | unsigned long ino = 0; |
586 | struct inode * inode; | 589 | struct inode *inode; |
587 | struct ext4_group_desc * gdp = NULL; | 590 | struct ext4_group_desc *gdp = NULL; |
588 | struct ext4_super_block * es; | 591 | struct ext4_super_block *es; |
589 | struct ext4_inode_info *ei; | 592 | struct ext4_inode_info *ei; |
590 | struct ext4_sb_info *sbi; | 593 | struct ext4_sb_info *sbi; |
591 | int ret2, err = 0; | 594 | int ret2, err = 0; |
@@ -613,7 +616,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | |||
613 | } | 616 | } |
614 | 617 | ||
615 | if (S_ISDIR(mode)) { | 618 | if (S_ISDIR(mode)) { |
616 | if (test_opt (sb, OLDALLOC)) | 619 | if (test_opt(sb, OLDALLOC)) |
617 | ret2 = find_group_dir(sb, dir, &group); | 620 | ret2 = find_group_dir(sb, dir, &group); |
618 | else | 621 | else |
619 | ret2 = find_group_orlov(sb, dir, &group); | 622 | ret2 = find_group_orlov(sb, dir, &group); |
@@ -783,7 +786,7 @@ got: | |||
783 | } | 786 | } |
784 | 787 | ||
785 | inode->i_uid = current->fsuid; | 788 | inode->i_uid = current->fsuid; |
786 | if (test_opt (sb, GRPID)) | 789 | if (test_opt(sb, GRPID)) |
787 | inode->i_gid = dir->i_gid; | 790 | inode->i_gid = dir->i_gid; |
788 | else if (dir->i_mode & S_ISGID) { | 791 | else if (dir->i_mode & S_ISGID) { |
789 | inode->i_gid = dir->i_gid; | 792 | inode->i_gid = dir->i_gid; |
@@ -816,7 +819,6 @@ got: | |||
816 | ei->i_flags &= ~EXT4_DIRSYNC_FL; | 819 | ei->i_flags &= ~EXT4_DIRSYNC_FL; |
817 | ei->i_file_acl = 0; | 820 | ei->i_file_acl = 0; |
818 | ei->i_dtime = 0; | 821 | ei->i_dtime = 0; |
819 | ei->i_block_alloc_info = NULL; | ||
820 | ei->i_block_group = group; | 822 | ei->i_block_group = group; |
821 | 823 | ||
822 | ext4_set_inode_flags(inode); | 824 | ext4_set_inode_flags(inode); |
@@ -832,7 +834,7 @@ got: | |||
832 | ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; | 834 | ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; |
833 | 835 | ||
834 | ret = inode; | 836 | ret = inode; |
835 | if(DQUOT_ALLOC_INODE(inode)) { | 837 | if (DQUOT_ALLOC_INODE(inode)) { |
836 | err = -EDQUOT; | 838 | err = -EDQUOT; |
837 | goto fail_drop; | 839 | goto fail_drop; |
838 | } | 840 | } |
@@ -841,7 +843,7 @@ got: | |||
841 | if (err) | 843 | if (err) |
842 | goto fail_free_drop; | 844 | goto fail_free_drop; |
843 | 845 | ||
844 | err = ext4_init_security(handle,inode, dir); | 846 | err = ext4_init_security(handle, inode, dir); |
845 | if (err) | 847 | if (err) |
846 | goto fail_free_drop; | 848 | goto fail_free_drop; |
847 | 849 | ||
@@ -959,7 +961,7 @@ error: | |||
959 | return ERR_PTR(err); | 961 | return ERR_PTR(err); |
960 | } | 962 | } |
961 | 963 | ||
962 | unsigned long ext4_count_free_inodes (struct super_block * sb) | 964 | unsigned long ext4_count_free_inodes(struct super_block *sb) |
963 | { | 965 | { |
964 | unsigned long desc_count; | 966 | unsigned long desc_count; |
965 | struct ext4_group_desc *gdp; | 967 | struct ext4_group_desc *gdp; |
@@ -974,7 +976,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) | |||
974 | bitmap_count = 0; | 976 | bitmap_count = 0; |
975 | gdp = NULL; | 977 | gdp = NULL; |
976 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 978 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { |
977 | gdp = ext4_get_group_desc (sb, i, NULL); | 979 | gdp = ext4_get_group_desc(sb, i, NULL); |
978 | if (!gdp) | 980 | if (!gdp) |
979 | continue; | 981 | continue; |
980 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); | 982 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); |
@@ -989,13 +991,14 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) | |||
989 | bitmap_count += x; | 991 | bitmap_count += x; |
990 | } | 992 | } |
991 | brelse(bitmap_bh); | 993 | brelse(bitmap_bh); |
992 | printk("ext4_count_free_inodes: stored = %u, computed = %lu, %lu\n", | 994 | printk(KERN_DEBUG "ext4_count_free_inodes: " |
993 | le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); | 995 | "stored = %u, computed = %lu, %lu\n", |
996 | le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); | ||
994 | return desc_count; | 997 | return desc_count; |
995 | #else | 998 | #else |
996 | desc_count = 0; | 999 | desc_count = 0; |
997 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 1000 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { |
998 | gdp = ext4_get_group_desc (sb, i, NULL); | 1001 | gdp = ext4_get_group_desc(sb, i, NULL); |
999 | if (!gdp) | 1002 | if (!gdp) |
1000 | continue; | 1003 | continue; |
1001 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); | 1004 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); |
@@ -1006,13 +1009,13 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) | |||
1006 | } | 1009 | } |
1007 | 1010 | ||
1008 | /* Called at mount-time, super-block is locked */ | 1011 | /* Called at mount-time, super-block is locked */ |
1009 | unsigned long ext4_count_dirs (struct super_block * sb) | 1012 | unsigned long ext4_count_dirs(struct super_block * sb) |
1010 | { | 1013 | { |
1011 | unsigned long count = 0; | 1014 | unsigned long count = 0; |
1012 | ext4_group_t i; | 1015 | ext4_group_t i; |
1013 | 1016 | ||
1014 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 1017 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { |
1015 | struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL); | 1018 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); |
1016 | if (!gdp) | 1019 | if (!gdp) |
1017 | continue; | 1020 | continue; |
1018 | count += le16_to_cpu(gdp->bg_used_dirs_count); | 1021 | count += le16_to_cpu(gdp->bg_used_dirs_count); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 59fbbe899acc..9b4ec9decfd1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -41,6 +41,8 @@ | |||
41 | #include "acl.h" | 41 | #include "acl.h" |
42 | #include "ext4_extents.h" | 42 | #include "ext4_extents.h" |
43 | 43 | ||
44 | #define MPAGE_DA_EXTENT_TAIL 0x01 | ||
45 | |||
44 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | 46 | static inline int ext4_begin_ordered_truncate(struct inode *inode, |
45 | loff_t new_size) | 47 | loff_t new_size) |
46 | { | 48 | { |
@@ -188,7 +190,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) | |||
188 | /* | 190 | /* |
189 | * Called at the last iput() if i_nlink is zero. | 191 | * Called at the last iput() if i_nlink is zero. |
190 | */ | 192 | */ |
191 | void ext4_delete_inode (struct inode * inode) | 193 | void ext4_delete_inode(struct inode *inode) |
192 | { | 194 | { |
193 | handle_t *handle; | 195 | handle_t *handle; |
194 | int err; | 196 | int err; |
@@ -328,11 +330,11 @@ static int ext4_block_to_path(struct inode *inode, | |||
328 | int final = 0; | 330 | int final = 0; |
329 | 331 | ||
330 | if (i_block < 0) { | 332 | if (i_block < 0) { |
331 | ext4_warning (inode->i_sb, "ext4_block_to_path", "block < 0"); | 333 | ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0"); |
332 | } else if (i_block < direct_blocks) { | 334 | } else if (i_block < direct_blocks) { |
333 | offsets[n++] = i_block; | 335 | offsets[n++] = i_block; |
334 | final = direct_blocks; | 336 | final = direct_blocks; |
335 | } else if ( (i_block -= direct_blocks) < indirect_blocks) { | 337 | } else if ((i_block -= direct_blocks) < indirect_blocks) { |
336 | offsets[n++] = EXT4_IND_BLOCK; | 338 | offsets[n++] = EXT4_IND_BLOCK; |
337 | offsets[n++] = i_block; | 339 | offsets[n++] = i_block; |
338 | final = ptrs; | 340 | final = ptrs; |
@@ -398,14 +400,14 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, | |||
398 | 400 | ||
399 | *err = 0; | 401 | *err = 0; |
400 | /* i_data is not going away, no lock needed */ | 402 | /* i_data is not going away, no lock needed */ |
401 | add_chain (chain, NULL, EXT4_I(inode)->i_data + *offsets); | 403 | add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); |
402 | if (!p->key) | 404 | if (!p->key) |
403 | goto no_block; | 405 | goto no_block; |
404 | while (--depth) { | 406 | while (--depth) { |
405 | bh = sb_bread(sb, le32_to_cpu(p->key)); | 407 | bh = sb_bread(sb, le32_to_cpu(p->key)); |
406 | if (!bh) | 408 | if (!bh) |
407 | goto failure; | 409 | goto failure; |
408 | add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); | 410 | add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); |
409 | /* Reader: end */ | 411 | /* Reader: end */ |
410 | if (!p->key) | 412 | if (!p->key) |
411 | goto no_block; | 413 | goto no_block; |
@@ -441,7 +443,7 @@ no_block: | |||
441 | static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | 443 | static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) |
442 | { | 444 | { |
443 | struct ext4_inode_info *ei = EXT4_I(inode); | 445 | struct ext4_inode_info *ei = EXT4_I(inode); |
444 | __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; | 446 | __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; |
445 | __le32 *p; | 447 | __le32 *p; |
446 | ext4_fsblk_t bg_start; | 448 | ext4_fsblk_t bg_start; |
447 | ext4_fsblk_t last_block; | 449 | ext4_fsblk_t last_block; |
@@ -484,18 +486,9 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | |||
484 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | 486 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, |
485 | Indirect *partial) | 487 | Indirect *partial) |
486 | { | 488 | { |
487 | struct ext4_block_alloc_info *block_i; | ||
488 | |||
489 | block_i = EXT4_I(inode)->i_block_alloc_info; | ||
490 | |||
491 | /* | 489 | /* |
492 | * try the heuristic for sequential allocation, | 490 | * XXX need to get goal block from mballoc's data structures |
493 | * failing that at least try to get decent locality. | ||
494 | */ | 491 | */ |
495 | if (block_i && (block == block_i->last_alloc_logical_block + 1) | ||
496 | && (block_i->last_alloc_physical_block != 0)) { | ||
497 | return block_i->last_alloc_physical_block + 1; | ||
498 | } | ||
499 | 492 | ||
500 | return ext4_find_near(inode, partial); | 493 | return ext4_find_near(inode, partial); |
501 | } | 494 | } |
@@ -628,7 +621,7 @@ allocated: | |||
628 | *err = 0; | 621 | *err = 0; |
629 | return ret; | 622 | return ret; |
630 | failed_out: | 623 | failed_out: |
631 | for (i = 0; i <index; i++) | 624 | for (i = 0; i < index; i++) |
632 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); | 625 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); |
633 | return ret; | 626 | return ret; |
634 | } | 627 | } |
@@ -701,7 +694,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
701 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; | 694 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; |
702 | branch[n].key = cpu_to_le32(new_blocks[n]); | 695 | branch[n].key = cpu_to_le32(new_blocks[n]); |
703 | *branch[n].p = branch[n].key; | 696 | *branch[n].p = branch[n].key; |
704 | if ( n == indirect_blks) { | 697 | if (n == indirect_blks) { |
705 | current_block = new_blocks[n]; | 698 | current_block = new_blocks[n]; |
706 | /* | 699 | /* |
707 | * End of chain, update the last new metablock of | 700 | * End of chain, update the last new metablock of |
@@ -728,7 +721,7 @@ failed: | |||
728 | BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); | 721 | BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); |
729 | ext4_journal_forget(handle, branch[i].bh); | 722 | ext4_journal_forget(handle, branch[i].bh); |
730 | } | 723 | } |
731 | for (i = 0; i <indirect_blks; i++) | 724 | for (i = 0; i < indirect_blks; i++) |
732 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); | 725 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); |
733 | 726 | ||
734 | ext4_free_blocks(handle, inode, new_blocks[i], num, 0); | 727 | ext4_free_blocks(handle, inode, new_blocks[i], num, 0); |
@@ -755,10 +748,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
755 | { | 748 | { |
756 | int i; | 749 | int i; |
757 | int err = 0; | 750 | int err = 0; |
758 | struct ext4_block_alloc_info *block_i; | ||
759 | ext4_fsblk_t current_block; | 751 | ext4_fsblk_t current_block; |
760 | 752 | ||
761 | block_i = EXT4_I(inode)->i_block_alloc_info; | ||
762 | /* | 753 | /* |
763 | * If we're splicing into a [td]indirect block (as opposed to the | 754 | * If we're splicing into a [td]indirect block (as opposed to the |
764 | * inode) then we need to get write access to the [td]indirect block | 755 | * inode) then we need to get write access to the [td]indirect block |
@@ -781,18 +772,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
781 | if (num == 0 && blks > 1) { | 772 | if (num == 0 && blks > 1) { |
782 | current_block = le32_to_cpu(where->key) + 1; | 773 | current_block = le32_to_cpu(where->key) + 1; |
783 | for (i = 1; i < blks; i++) | 774 | for (i = 1; i < blks; i++) |
784 | *(where->p + i ) = cpu_to_le32(current_block++); | 775 | *(where->p + i) = cpu_to_le32(current_block++); |
785 | } | ||
786 | |||
787 | /* | ||
788 | * update the most recently allocated logical & physical block | ||
789 | * in i_block_alloc_info, to assist find the proper goal block for next | ||
790 | * allocation | ||
791 | */ | ||
792 | if (block_i) { | ||
793 | block_i->last_alloc_logical_block = block + blks - 1; | ||
794 | block_i->last_alloc_physical_block = | ||
795 | le32_to_cpu(where[num].key) + blks - 1; | ||
796 | } | 776 | } |
797 | 777 | ||
798 | /* We are done with atomic stuff, now do the rest of housekeeping */ | 778 | /* We are done with atomic stuff, now do the rest of housekeeping */ |
@@ -912,12 +892,8 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
912 | goto cleanup; | 892 | goto cleanup; |
913 | 893 | ||
914 | /* | 894 | /* |
915 | * Okay, we need to do block allocation. Lazily initialize the block | 895 | * Okay, we need to do block allocation. |
916 | * allocation info here if necessary | ||
917 | */ | 896 | */ |
918 | if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) | ||
919 | ext4_init_block_alloc_info(inode); | ||
920 | |||
921 | goal = ext4_find_goal(inode, iblock, partial); | 897 | goal = ext4_find_goal(inode, iblock, partial); |
922 | 898 | ||
923 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | 899 | /* the number of blocks need to allocate for [d,t]indirect blocks */ |
@@ -1005,6 +981,9 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) | |||
1005 | */ | 981 | */ |
1006 | static int ext4_calc_metadata_amount(struct inode *inode, int blocks) | 982 | static int ext4_calc_metadata_amount(struct inode *inode, int blocks) |
1007 | { | 983 | { |
984 | if (!blocks) | ||
985 | return 0; | ||
986 | |||
1008 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 987 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) |
1009 | return ext4_ext_calc_metadata_amount(inode, blocks); | 988 | return ext4_ext_calc_metadata_amount(inode, blocks); |
1010 | 989 | ||
@@ -1025,34 +1004,23 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1025 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | 1004 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); |
1026 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; | 1005 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; |
1027 | 1006 | ||
1028 | /* Account for allocated meta_blocks */ | 1007 | if (mdb_free) { |
1029 | mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; | 1008 | /* Account for allocated meta_blocks */ |
1009 | mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; | ||
1030 | 1010 | ||
1031 | /* update fs free blocks counter for truncate case */ | 1011 | /* update fs dirty blocks counter */ |
1032 | percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free); | 1012 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); |
1013 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | ||
1014 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
1015 | } | ||
1033 | 1016 | ||
1034 | /* update per-inode reservations */ | 1017 | /* update per-inode reservations */ |
1035 | BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); | 1018 | BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); |
1036 | EXT4_I(inode)->i_reserved_data_blocks -= used; | 1019 | EXT4_I(inode)->i_reserved_data_blocks -= used; |
1037 | 1020 | ||
1038 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | ||
1039 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
1040 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | ||
1041 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1021 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1042 | } | 1022 | } |
1043 | 1023 | ||
1044 | /* Maximum number of blocks we map for direct IO at once. */ | ||
1045 | #define DIO_MAX_BLOCKS 4096 | ||
1046 | /* | ||
1047 | * Number of credits we need for writing DIO_MAX_BLOCKS: | ||
1048 | * We need sb + group descriptor + bitmap + inode -> 4 | ||
1049 | * For B blocks with A block pointers per block we need: | ||
1050 | * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). | ||
1051 | * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. | ||
1052 | */ | ||
1053 | #define DIO_CREDITS 25 | ||
1054 | |||
1055 | |||
1056 | /* | 1024 | /* |
1057 | * The ext4_get_blocks_wrap() function try to look up the requested blocks, | 1025 | * The ext4_get_blocks_wrap() function try to look up the requested blocks, |
1058 | * and returns if the blocks are already mapped. | 1026 | * and returns if the blocks are already mapped. |
@@ -1164,19 +1132,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1164 | return retval; | 1132 | return retval; |
1165 | } | 1133 | } |
1166 | 1134 | ||
1167 | static int ext4_get_block(struct inode *inode, sector_t iblock, | 1135 | /* Maximum number of blocks we map for direct IO at once. */ |
1168 | struct buffer_head *bh_result, int create) | 1136 | #define DIO_MAX_BLOCKS 4096 |
1137 | |||
1138 | int ext4_get_block(struct inode *inode, sector_t iblock, | ||
1139 | struct buffer_head *bh_result, int create) | ||
1169 | { | 1140 | { |
1170 | handle_t *handle = ext4_journal_current_handle(); | 1141 | handle_t *handle = ext4_journal_current_handle(); |
1171 | int ret = 0, started = 0; | 1142 | int ret = 0, started = 0; |
1172 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 1143 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; |
1144 | int dio_credits; | ||
1173 | 1145 | ||
1174 | if (create && !handle) { | 1146 | if (create && !handle) { |
1175 | /* Direct IO write... */ | 1147 | /* Direct IO write... */ |
1176 | if (max_blocks > DIO_MAX_BLOCKS) | 1148 | if (max_blocks > DIO_MAX_BLOCKS) |
1177 | max_blocks = DIO_MAX_BLOCKS; | 1149 | max_blocks = DIO_MAX_BLOCKS; |
1178 | handle = ext4_journal_start(inode, DIO_CREDITS + | 1150 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); |
1179 | 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); | 1151 | handle = ext4_journal_start(inode, dio_credits); |
1180 | if (IS_ERR(handle)) { | 1152 | if (IS_ERR(handle)) { |
1181 | ret = PTR_ERR(handle); | 1153 | ret = PTR_ERR(handle); |
1182 | goto out; | 1154 | goto out; |
@@ -1244,7 +1216,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
1244 | BUFFER_TRACE(bh, "call get_create_access"); | 1216 | BUFFER_TRACE(bh, "call get_create_access"); |
1245 | fatal = ext4_journal_get_create_access(handle, bh); | 1217 | fatal = ext4_journal_get_create_access(handle, bh); |
1246 | if (!fatal && !buffer_uptodate(bh)) { | 1218 | if (!fatal && !buffer_uptodate(bh)) { |
1247 | memset(bh->b_data,0,inode->i_sb->s_blocksize); | 1219 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); |
1248 | set_buffer_uptodate(bh); | 1220 | set_buffer_uptodate(bh); |
1249 | } | 1221 | } |
1250 | unlock_buffer(bh); | 1222 | unlock_buffer(bh); |
@@ -1269,7 +1241,7 @@ err: | |||
1269 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | 1241 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, |
1270 | ext4_lblk_t block, int create, int *err) | 1242 | ext4_lblk_t block, int create, int *err) |
1271 | { | 1243 | { |
1272 | struct buffer_head * bh; | 1244 | struct buffer_head *bh; |
1273 | 1245 | ||
1274 | bh = ext4_getblk(handle, inode, block, create, err); | 1246 | bh = ext4_getblk(handle, inode, block, create, err); |
1275 | if (!bh) | 1247 | if (!bh) |
@@ -1285,13 +1257,13 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | |||
1285 | return NULL; | 1257 | return NULL; |
1286 | } | 1258 | } |
1287 | 1259 | ||
1288 | static int walk_page_buffers( handle_t *handle, | 1260 | static int walk_page_buffers(handle_t *handle, |
1289 | struct buffer_head *head, | 1261 | struct buffer_head *head, |
1290 | unsigned from, | 1262 | unsigned from, |
1291 | unsigned to, | 1263 | unsigned to, |
1292 | int *partial, | 1264 | int *partial, |
1293 | int (*fn)( handle_t *handle, | 1265 | int (*fn)(handle_t *handle, |
1294 | struct buffer_head *bh)) | 1266 | struct buffer_head *bh)) |
1295 | { | 1267 | { |
1296 | struct buffer_head *bh; | 1268 | struct buffer_head *bh; |
1297 | unsigned block_start, block_end; | 1269 | unsigned block_start, block_end; |
@@ -1299,9 +1271,9 @@ static int walk_page_buffers( handle_t *handle, | |||
1299 | int err, ret = 0; | 1271 | int err, ret = 0; |
1300 | struct buffer_head *next; | 1272 | struct buffer_head *next; |
1301 | 1273 | ||
1302 | for ( bh = head, block_start = 0; | 1274 | for (bh = head, block_start = 0; |
1303 | ret == 0 && (bh != head || !block_start); | 1275 | ret == 0 && (bh != head || !block_start); |
1304 | block_start = block_end, bh = next) | 1276 | block_start = block_end, bh = next) |
1305 | { | 1277 | { |
1306 | next = bh->b_this_page; | 1278 | next = bh->b_this_page; |
1307 | block_end = block_start + blocksize; | 1279 | block_end = block_start + blocksize; |
@@ -1354,23 +1326,23 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
1354 | loff_t pos, unsigned len, unsigned flags, | 1326 | loff_t pos, unsigned len, unsigned flags, |
1355 | struct page **pagep, void **fsdata) | 1327 | struct page **pagep, void **fsdata) |
1356 | { | 1328 | { |
1357 | struct inode *inode = mapping->host; | 1329 | struct inode *inode = mapping->host; |
1358 | int ret, needed_blocks = ext4_writepage_trans_blocks(inode); | 1330 | int ret, needed_blocks = ext4_writepage_trans_blocks(inode); |
1359 | handle_t *handle; | 1331 | handle_t *handle; |
1360 | int retries = 0; | 1332 | int retries = 0; |
1361 | struct page *page; | 1333 | struct page *page; |
1362 | pgoff_t index; | 1334 | pgoff_t index; |
1363 | unsigned from, to; | 1335 | unsigned from, to; |
1364 | 1336 | ||
1365 | index = pos >> PAGE_CACHE_SHIFT; | 1337 | index = pos >> PAGE_CACHE_SHIFT; |
1366 | from = pos & (PAGE_CACHE_SIZE - 1); | 1338 | from = pos & (PAGE_CACHE_SIZE - 1); |
1367 | to = from + len; | 1339 | to = from + len; |
1368 | 1340 | ||
1369 | retry: | 1341 | retry: |
1370 | handle = ext4_journal_start(inode, needed_blocks); | 1342 | handle = ext4_journal_start(inode, needed_blocks); |
1371 | if (IS_ERR(handle)) { | 1343 | if (IS_ERR(handle)) { |
1372 | ret = PTR_ERR(handle); | 1344 | ret = PTR_ERR(handle); |
1373 | goto out; | 1345 | goto out; |
1374 | } | 1346 | } |
1375 | 1347 | ||
1376 | page = __grab_cache_page(mapping, index); | 1348 | page = __grab_cache_page(mapping, index); |
@@ -1390,9 +1362,16 @@ retry: | |||
1390 | } | 1362 | } |
1391 | 1363 | ||
1392 | if (ret) { | 1364 | if (ret) { |
1393 | unlock_page(page); | 1365 | unlock_page(page); |
1394 | ext4_journal_stop(handle); | 1366 | ext4_journal_stop(handle); |
1395 | page_cache_release(page); | 1367 | page_cache_release(page); |
1368 | /* | ||
1369 | * block_write_begin may have instantiated a few blocks | ||
1370 | * outside i_size. Trim these off again. Don't need | ||
1371 | * i_size_read because we hold i_mutex. | ||
1372 | */ | ||
1373 | if (pos + len > inode->i_size) | ||
1374 | vmtruncate(inode, inode->i_size); | ||
1396 | } | 1375 | } |
1397 | 1376 | ||
1398 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 1377 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -1429,16 +1408,18 @@ static int ext4_ordered_write_end(struct file *file, | |||
1429 | ret = ext4_jbd2_file_inode(handle, inode); | 1408 | ret = ext4_jbd2_file_inode(handle, inode); |
1430 | 1409 | ||
1431 | if (ret == 0) { | 1410 | if (ret == 0) { |
1432 | /* | ||
1433 | * generic_write_end() will run mark_inode_dirty() if i_size | ||
1434 | * changes. So let's piggyback the i_disksize mark_inode_dirty | ||
1435 | * into that. | ||
1436 | */ | ||
1437 | loff_t new_i_size; | 1411 | loff_t new_i_size; |
1438 | 1412 | ||
1439 | new_i_size = pos + copied; | 1413 | new_i_size = pos + copied; |
1440 | if (new_i_size > EXT4_I(inode)->i_disksize) | 1414 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
1441 | EXT4_I(inode)->i_disksize = new_i_size; | 1415 | ext4_update_i_disksize(inode, new_i_size); |
1416 | /* We need to mark inode dirty even if | ||
1417 | * new_i_size is less that inode->i_size | ||
1418 | * bu greater than i_disksize.(hint delalloc) | ||
1419 | */ | ||
1420 | ext4_mark_inode_dirty(handle, inode); | ||
1421 | } | ||
1422 | |||
1442 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 1423 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
1443 | page, fsdata); | 1424 | page, fsdata); |
1444 | copied = ret2; | 1425 | copied = ret2; |
@@ -1463,8 +1444,14 @@ static int ext4_writeback_write_end(struct file *file, | |||
1463 | loff_t new_i_size; | 1444 | loff_t new_i_size; |
1464 | 1445 | ||
1465 | new_i_size = pos + copied; | 1446 | new_i_size = pos + copied; |
1466 | if (new_i_size > EXT4_I(inode)->i_disksize) | 1447 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
1467 | EXT4_I(inode)->i_disksize = new_i_size; | 1448 | ext4_update_i_disksize(inode, new_i_size); |
1449 | /* We need to mark inode dirty even if | ||
1450 | * new_i_size is less that inode->i_size | ||
1451 | * bu greater than i_disksize.(hint delalloc) | ||
1452 | */ | ||
1453 | ext4_mark_inode_dirty(handle, inode); | ||
1454 | } | ||
1468 | 1455 | ||
1469 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 1456 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
1470 | page, fsdata); | 1457 | page, fsdata); |
@@ -1489,6 +1476,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1489 | int ret = 0, ret2; | 1476 | int ret = 0, ret2; |
1490 | int partial = 0; | 1477 | int partial = 0; |
1491 | unsigned from, to; | 1478 | unsigned from, to; |
1479 | loff_t new_i_size; | ||
1492 | 1480 | ||
1493 | from = pos & (PAGE_CACHE_SIZE - 1); | 1481 | from = pos & (PAGE_CACHE_SIZE - 1); |
1494 | to = from + len; | 1482 | to = from + len; |
@@ -1503,11 +1491,12 @@ static int ext4_journalled_write_end(struct file *file, | |||
1503 | to, &partial, write_end_fn); | 1491 | to, &partial, write_end_fn); |
1504 | if (!partial) | 1492 | if (!partial) |
1505 | SetPageUptodate(page); | 1493 | SetPageUptodate(page); |
1506 | if (pos+copied > inode->i_size) | 1494 | new_i_size = pos + copied; |
1495 | if (new_i_size > inode->i_size) | ||
1507 | i_size_write(inode, pos+copied); | 1496 | i_size_write(inode, pos+copied); |
1508 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | 1497 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; |
1509 | if (inode->i_size > EXT4_I(inode)->i_disksize) { | 1498 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
1510 | EXT4_I(inode)->i_disksize = inode->i_size; | 1499 | ext4_update_i_disksize(inode, new_i_size); |
1511 | ret2 = ext4_mark_inode_dirty(handle, inode); | 1500 | ret2 = ext4_mark_inode_dirty(handle, inode); |
1512 | if (!ret) | 1501 | if (!ret) |
1513 | ret = ret2; | 1502 | ret = ret2; |
@@ -1524,6 +1513,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1524 | 1513 | ||
1525 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | 1514 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) |
1526 | { | 1515 | { |
1516 | int retries = 0; | ||
1527 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1517 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1528 | unsigned long md_needed, mdblocks, total = 0; | 1518 | unsigned long md_needed, mdblocks, total = 0; |
1529 | 1519 | ||
@@ -1532,6 +1522,7 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | |||
1532 | * in order to allocate nrblocks | 1522 | * in order to allocate nrblocks |
1533 | * worse case is one extent per block | 1523 | * worse case is one extent per block |
1534 | */ | 1524 | */ |
1525 | repeat: | ||
1535 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1526 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
1536 | total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; | 1527 | total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; |
1537 | mdblocks = ext4_calc_metadata_amount(inode, total); | 1528 | mdblocks = ext4_calc_metadata_amount(inode, total); |
@@ -1540,13 +1531,14 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | |||
1540 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; | 1531 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; |
1541 | total = md_needed + nrblocks; | 1532 | total = md_needed + nrblocks; |
1542 | 1533 | ||
1543 | if (ext4_has_free_blocks(sbi, total) < total) { | 1534 | if (ext4_claim_free_blocks(sbi, total)) { |
1544 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1535 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1536 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | ||
1537 | yield(); | ||
1538 | goto repeat; | ||
1539 | } | ||
1545 | return -ENOSPC; | 1540 | return -ENOSPC; |
1546 | } | 1541 | } |
1547 | /* reduce fs free blocks counter */ | ||
1548 | percpu_counter_sub(&sbi->s_freeblocks_counter, total); | ||
1549 | |||
1550 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; | 1542 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; |
1551 | EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; | 1543 | EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; |
1552 | 1544 | ||
@@ -1559,7 +1551,25 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1559 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1551 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1560 | int total, mdb, mdb_free, release; | 1552 | int total, mdb, mdb_free, release; |
1561 | 1553 | ||
1554 | if (!to_free) | ||
1555 | return; /* Nothing to release, exit */ | ||
1556 | |||
1562 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1557 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
1558 | |||
1559 | if (!EXT4_I(inode)->i_reserved_data_blocks) { | ||
1560 | /* | ||
1561 | * if there is no reserved blocks, but we try to free some | ||
1562 | * then the counter is messed up somewhere. | ||
1563 | * but since this function is called from invalidate | ||
1564 | * page, it's harmless to return without any action | ||
1565 | */ | ||
1566 | printk(KERN_INFO "ext4 delalloc try to release %d reserved " | ||
1567 | "blocks for inode %lu, but there is no reserved " | ||
1568 | "data blocks\n", to_free, inode->i_ino); | ||
1569 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
1570 | return; | ||
1571 | } | ||
1572 | |||
1563 | /* recalculate the number of metablocks still need to be reserved */ | 1573 | /* recalculate the number of metablocks still need to be reserved */ |
1564 | total = EXT4_I(inode)->i_reserved_data_blocks - to_free; | 1574 | total = EXT4_I(inode)->i_reserved_data_blocks - to_free; |
1565 | mdb = ext4_calc_metadata_amount(inode, total); | 1575 | mdb = ext4_calc_metadata_amount(inode, total); |
@@ -1570,8 +1580,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1570 | 1580 | ||
1571 | release = to_free + mdb_free; | 1581 | release = to_free + mdb_free; |
1572 | 1582 | ||
1573 | /* update fs free blocks counter for truncate case */ | 1583 | /* update fs dirty blocks counter for truncate case */ |
1574 | percpu_counter_add(&sbi->s_freeblocks_counter, release); | 1584 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, release); |
1575 | 1585 | ||
1576 | /* update per-inode reservations */ | 1586 | /* update per-inode reservations */ |
1577 | BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); | 1587 | BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); |
@@ -1613,11 +1623,14 @@ struct mpage_da_data { | |||
1613 | unsigned long first_page, next_page; /* extent of pages */ | 1623 | unsigned long first_page, next_page; /* extent of pages */ |
1614 | get_block_t *get_block; | 1624 | get_block_t *get_block; |
1615 | struct writeback_control *wbc; | 1625 | struct writeback_control *wbc; |
1626 | int io_done; | ||
1627 | long pages_written; | ||
1628 | int retval; | ||
1616 | }; | 1629 | }; |
1617 | 1630 | ||
1618 | /* | 1631 | /* |
1619 | * mpage_da_submit_io - walks through extent of pages and try to write | 1632 | * mpage_da_submit_io - walks through extent of pages and try to write |
1620 | * them with __mpage_writepage() | 1633 | * them with writepage() call back |
1621 | * | 1634 | * |
1622 | * @mpd->inode: inode | 1635 | * @mpd->inode: inode |
1623 | * @mpd->first_page: first page of the extent | 1636 | * @mpd->first_page: first page of the extent |
@@ -1632,18 +1645,11 @@ struct mpage_da_data { | |||
1632 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | 1645 | static int mpage_da_submit_io(struct mpage_da_data *mpd) |
1633 | { | 1646 | { |
1634 | struct address_space *mapping = mpd->inode->i_mapping; | 1647 | struct address_space *mapping = mpd->inode->i_mapping; |
1635 | struct mpage_data mpd_pp = { | ||
1636 | .bio = NULL, | ||
1637 | .last_block_in_bio = 0, | ||
1638 | .get_block = mpd->get_block, | ||
1639 | .use_writepage = 1, | ||
1640 | }; | ||
1641 | int ret = 0, err, nr_pages, i; | 1648 | int ret = 0, err, nr_pages, i; |
1642 | unsigned long index, end; | 1649 | unsigned long index, end; |
1643 | struct pagevec pvec; | 1650 | struct pagevec pvec; |
1644 | 1651 | ||
1645 | BUG_ON(mpd->next_page <= mpd->first_page); | 1652 | BUG_ON(mpd->next_page <= mpd->first_page); |
1646 | |||
1647 | pagevec_init(&pvec, 0); | 1653 | pagevec_init(&pvec, 0); |
1648 | index = mpd->first_page; | 1654 | index = mpd->first_page; |
1649 | end = mpd->next_page - 1; | 1655 | end = mpd->next_page - 1; |
@@ -1661,8 +1667,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1661 | break; | 1667 | break; |
1662 | index++; | 1668 | index++; |
1663 | 1669 | ||
1664 | err = __mpage_writepage(page, mpd->wbc, &mpd_pp); | 1670 | err = mapping->a_ops->writepage(page, mpd->wbc); |
1665 | 1671 | if (!err) | |
1672 | mpd->pages_written++; | ||
1666 | /* | 1673 | /* |
1667 | * In error case, we have to continue because | 1674 | * In error case, we have to continue because |
1668 | * remaining pages are still locked | 1675 | * remaining pages are still locked |
@@ -1673,9 +1680,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1673 | } | 1680 | } |
1674 | pagevec_release(&pvec); | 1681 | pagevec_release(&pvec); |
1675 | } | 1682 | } |
1676 | if (mpd_pp.bio) | ||
1677 | mpage_bio_submit(WRITE, mpd_pp.bio); | ||
1678 | |||
1679 | return ret; | 1683 | return ret; |
1680 | } | 1684 | } |
1681 | 1685 | ||
@@ -1698,7 +1702,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
1698 | int blocks = exbh->b_size >> inode->i_blkbits; | 1702 | int blocks = exbh->b_size >> inode->i_blkbits; |
1699 | sector_t pblock = exbh->b_blocknr, cur_logical; | 1703 | sector_t pblock = exbh->b_blocknr, cur_logical; |
1700 | struct buffer_head *head, *bh; | 1704 | struct buffer_head *head, *bh; |
1701 | unsigned long index, end; | 1705 | pgoff_t index, end; |
1702 | struct pagevec pvec; | 1706 | struct pagevec pvec; |
1703 | int nr_pages, i; | 1707 | int nr_pages, i; |
1704 | 1708 | ||
@@ -1741,6 +1745,13 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
1741 | if (buffer_delay(bh)) { | 1745 | if (buffer_delay(bh)) { |
1742 | bh->b_blocknr = pblock; | 1746 | bh->b_blocknr = pblock; |
1743 | clear_buffer_delay(bh); | 1747 | clear_buffer_delay(bh); |
1748 | bh->b_bdev = inode->i_sb->s_bdev; | ||
1749 | } else if (buffer_unwritten(bh)) { | ||
1750 | bh->b_blocknr = pblock; | ||
1751 | clear_buffer_unwritten(bh); | ||
1752 | set_buffer_mapped(bh); | ||
1753 | set_buffer_new(bh); | ||
1754 | bh->b_bdev = inode->i_sb->s_bdev; | ||
1744 | } else if (buffer_mapped(bh)) | 1755 | } else if (buffer_mapped(bh)) |
1745 | BUG_ON(bh->b_blocknr != pblock); | 1756 | BUG_ON(bh->b_blocknr != pblock); |
1746 | 1757 | ||
@@ -1768,6 +1779,57 @@ static inline void __unmap_underlying_blocks(struct inode *inode, | |||
1768 | unmap_underlying_metadata(bdev, bh->b_blocknr + i); | 1779 | unmap_underlying_metadata(bdev, bh->b_blocknr + i); |
1769 | } | 1780 | } |
1770 | 1781 | ||
1782 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | ||
1783 | sector_t logical, long blk_cnt) | ||
1784 | { | ||
1785 | int nr_pages, i; | ||
1786 | pgoff_t index, end; | ||
1787 | struct pagevec pvec; | ||
1788 | struct inode *inode = mpd->inode; | ||
1789 | struct address_space *mapping = inode->i_mapping; | ||
1790 | |||
1791 | index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1792 | end = (logical + blk_cnt - 1) >> | ||
1793 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1794 | while (index <= end) { | ||
1795 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
1796 | if (nr_pages == 0) | ||
1797 | break; | ||
1798 | for (i = 0; i < nr_pages; i++) { | ||
1799 | struct page *page = pvec.pages[i]; | ||
1800 | index = page->index; | ||
1801 | if (index > end) | ||
1802 | break; | ||
1803 | index++; | ||
1804 | |||
1805 | BUG_ON(!PageLocked(page)); | ||
1806 | BUG_ON(PageWriteback(page)); | ||
1807 | block_invalidatepage(page, 0); | ||
1808 | ClearPageUptodate(page); | ||
1809 | unlock_page(page); | ||
1810 | } | ||
1811 | } | ||
1812 | return; | ||
1813 | } | ||
1814 | |||
1815 | static void ext4_print_free_blocks(struct inode *inode) | ||
1816 | { | ||
1817 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
1818 | printk(KERN_EMERG "Total free blocks count %lld\n", | ||
1819 | ext4_count_free_blocks(inode->i_sb)); | ||
1820 | printk(KERN_EMERG "Free/Dirty block details\n"); | ||
1821 | printk(KERN_EMERG "free_blocks=%lld\n", | ||
1822 | percpu_counter_sum(&sbi->s_freeblocks_counter)); | ||
1823 | printk(KERN_EMERG "dirty_blocks=%lld\n", | ||
1824 | percpu_counter_sum(&sbi->s_dirtyblocks_counter)); | ||
1825 | printk(KERN_EMERG "Block reservation details\n"); | ||
1826 | printk(KERN_EMERG "i_reserved_data_blocks=%lu\n", | ||
1827 | EXT4_I(inode)->i_reserved_data_blocks); | ||
1828 | printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n", | ||
1829 | EXT4_I(inode)->i_reserved_meta_blocks); | ||
1830 | return; | ||
1831 | } | ||
1832 | |||
1771 | /* | 1833 | /* |
1772 | * mpage_da_map_blocks - go through given space | 1834 | * mpage_da_map_blocks - go through given space |
1773 | * | 1835 | * |
@@ -1776,54 +1838,87 @@ static inline void __unmap_underlying_blocks(struct inode *inode, | |||
1776 | * | 1838 | * |
1777 | * The function skips space we know is already mapped to disk blocks. | 1839 | * The function skips space we know is already mapped to disk blocks. |
1778 | * | 1840 | * |
1779 | * The function ignores errors ->get_block() returns, thus real | ||
1780 | * error handling is postponed to __mpage_writepage() | ||
1781 | */ | 1841 | */ |
1782 | static void mpage_da_map_blocks(struct mpage_da_data *mpd) | 1842 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) |
1783 | { | 1843 | { |
1784 | struct buffer_head *lbh = &mpd->lbh; | 1844 | int err = 0; |
1785 | int err = 0, remain = lbh->b_size; | ||
1786 | sector_t next = lbh->b_blocknr; | ||
1787 | struct buffer_head new; | 1845 | struct buffer_head new; |
1846 | struct buffer_head *lbh = &mpd->lbh; | ||
1847 | sector_t next; | ||
1788 | 1848 | ||
1789 | /* | 1849 | /* |
1790 | * We consider only non-mapped and non-allocated blocks | 1850 | * We consider only non-mapped and non-allocated blocks |
1791 | */ | 1851 | */ |
1792 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) | 1852 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) |
1793 | return; | 1853 | return 0; |
1854 | new.b_state = lbh->b_state; | ||
1855 | new.b_blocknr = 0; | ||
1856 | new.b_size = lbh->b_size; | ||
1857 | next = lbh->b_blocknr; | ||
1858 | /* | ||
1859 | * If we didn't accumulate anything | ||
1860 | * to write simply return | ||
1861 | */ | ||
1862 | if (!new.b_size) | ||
1863 | return 0; | ||
1864 | err = mpd->get_block(mpd->inode, next, &new, 1); | ||
1865 | if (err) { | ||
1794 | 1866 | ||
1795 | while (remain) { | 1867 | /* If get block returns with error |
1796 | new.b_state = lbh->b_state; | 1868 | * we simply return. Later writepage |
1797 | new.b_blocknr = 0; | 1869 | * will redirty the page and writepages |
1798 | new.b_size = remain; | 1870 | * will find the dirty page again |
1799 | err = mpd->get_block(mpd->inode, next, &new, 1); | 1871 | */ |
1800 | if (err) { | 1872 | if (err == -EAGAIN) |
1801 | /* | 1873 | return 0; |
1802 | * Rather than implement own error handling | ||
1803 | * here, we just leave remaining blocks | ||
1804 | * unallocated and try again with ->writepage() | ||
1805 | */ | ||
1806 | break; | ||
1807 | } | ||
1808 | BUG_ON(new.b_size == 0); | ||
1809 | 1874 | ||
1810 | if (buffer_new(&new)) | 1875 | if (err == -ENOSPC && |
1811 | __unmap_underlying_blocks(mpd->inode, &new); | 1876 | ext4_count_free_blocks(mpd->inode->i_sb)) { |
1877 | mpd->retval = err; | ||
1878 | return 0; | ||
1879 | } | ||
1812 | 1880 | ||
1813 | /* | 1881 | /* |
1814 | * If blocks are delayed marked, we need to | 1882 | * get block failure will cause us |
1815 | * put actual blocknr and drop delayed bit | 1883 | * to loop in writepages. Because |
1884 | * a_ops->writepage won't be able to | ||
1885 | * make progress. The page will be redirtied | ||
1886 | * by writepage and writepages will again | ||
1887 | * try to write the same. | ||
1816 | */ | 1888 | */ |
1817 | if (buffer_delay(lbh)) | 1889 | printk(KERN_EMERG "%s block allocation failed for inode %lu " |
1818 | mpage_put_bnr_to_bhs(mpd, next, &new); | 1890 | "at logical offset %llu with max blocks " |
1819 | 1891 | "%zd with error %d\n", | |
1820 | /* go for the remaining blocks */ | 1892 | __func__, mpd->inode->i_ino, |
1821 | next += new.b_size >> mpd->inode->i_blkbits; | 1893 | (unsigned long long)next, |
1822 | remain -= new.b_size; | 1894 | lbh->b_size >> mpd->inode->i_blkbits, err); |
1895 | printk(KERN_EMERG "This should not happen.!! " | ||
1896 | "Data will be lost\n"); | ||
1897 | if (err == -ENOSPC) { | ||
1898 | ext4_print_free_blocks(mpd->inode); | ||
1899 | } | ||
1900 | /* invlaidate all the pages */ | ||
1901 | ext4_da_block_invalidatepages(mpd, next, | ||
1902 | lbh->b_size >> mpd->inode->i_blkbits); | ||
1903 | return err; | ||
1823 | } | 1904 | } |
1905 | BUG_ON(new.b_size == 0); | ||
1906 | |||
1907 | if (buffer_new(&new)) | ||
1908 | __unmap_underlying_blocks(mpd->inode, &new); | ||
1909 | |||
1910 | /* | ||
1911 | * If blocks are delayed marked, we need to | ||
1912 | * put actual blocknr and drop delayed bit | ||
1913 | */ | ||
1914 | if (buffer_delay(lbh) || buffer_unwritten(lbh)) | ||
1915 | mpage_put_bnr_to_bhs(mpd, next, &new); | ||
1916 | |||
1917 | return 0; | ||
1824 | } | 1918 | } |
1825 | 1919 | ||
1826 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay)) | 1920 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
1921 | (1 << BH_Delay) | (1 << BH_Unwritten)) | ||
1827 | 1922 | ||
1828 | /* | 1923 | /* |
1829 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks | 1924 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks |
@@ -1837,41 +1932,61 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
1837 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | 1932 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, |
1838 | sector_t logical, struct buffer_head *bh) | 1933 | sector_t logical, struct buffer_head *bh) |
1839 | { | 1934 | { |
1840 | struct buffer_head *lbh = &mpd->lbh; | ||
1841 | sector_t next; | 1935 | sector_t next; |
1936 | size_t b_size = bh->b_size; | ||
1937 | struct buffer_head *lbh = &mpd->lbh; | ||
1938 | int nrblocks = lbh->b_size >> mpd->inode->i_blkbits; | ||
1842 | 1939 | ||
1843 | next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits); | 1940 | /* check if thereserved journal credits might overflow */ |
1844 | 1941 | if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { | |
1942 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { | ||
1943 | /* | ||
1944 | * With non-extent format we are limited by the journal | ||
1945 | * credit available. Total credit needed to insert | ||
1946 | * nrblocks contiguous blocks is dependent on the | ||
1947 | * nrblocks. So limit nrblocks. | ||
1948 | */ | ||
1949 | goto flush_it; | ||
1950 | } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) > | ||
1951 | EXT4_MAX_TRANS_DATA) { | ||
1952 | /* | ||
1953 | * Adding the new buffer_head would make it cross the | ||
1954 | * allowed limit for which we have journal credit | ||
1955 | * reserved. So limit the new bh->b_size | ||
1956 | */ | ||
1957 | b_size = (EXT4_MAX_TRANS_DATA - nrblocks) << | ||
1958 | mpd->inode->i_blkbits; | ||
1959 | /* we will do mpage_da_submit_io in the next loop */ | ||
1960 | } | ||
1961 | } | ||
1845 | /* | 1962 | /* |
1846 | * First block in the extent | 1963 | * First block in the extent |
1847 | */ | 1964 | */ |
1848 | if (lbh->b_size == 0) { | 1965 | if (lbh->b_size == 0) { |
1849 | lbh->b_blocknr = logical; | 1966 | lbh->b_blocknr = logical; |
1850 | lbh->b_size = bh->b_size; | 1967 | lbh->b_size = b_size; |
1851 | lbh->b_state = bh->b_state & BH_FLAGS; | 1968 | lbh->b_state = bh->b_state & BH_FLAGS; |
1852 | return; | 1969 | return; |
1853 | } | 1970 | } |
1854 | 1971 | ||
1972 | next = lbh->b_blocknr + nrblocks; | ||
1855 | /* | 1973 | /* |
1856 | * Can we merge the block to our big extent? | 1974 | * Can we merge the block to our big extent? |
1857 | */ | 1975 | */ |
1858 | if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { | 1976 | if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { |
1859 | lbh->b_size += bh->b_size; | 1977 | lbh->b_size += b_size; |
1860 | return; | 1978 | return; |
1861 | } | 1979 | } |
1862 | 1980 | ||
1981 | flush_it: | ||
1863 | /* | 1982 | /* |
1864 | * We couldn't merge the block to our extent, so we | 1983 | * We couldn't merge the block to our extent, so we |
1865 | * need to flush current extent and start new one | 1984 | * need to flush current extent and start new one |
1866 | */ | 1985 | */ |
1867 | mpage_da_map_blocks(mpd); | 1986 | if (mpage_da_map_blocks(mpd) == 0) |
1868 | 1987 | mpage_da_submit_io(mpd); | |
1869 | /* | 1988 | mpd->io_done = 1; |
1870 | * Now start a new extent | 1989 | return; |
1871 | */ | ||
1872 | lbh->b_size = bh->b_size; | ||
1873 | lbh->b_state = bh->b_state & BH_FLAGS; | ||
1874 | lbh->b_blocknr = logical; | ||
1875 | } | 1990 | } |
1876 | 1991 | ||
1877 | /* | 1992 | /* |
@@ -1891,17 +2006,35 @@ static int __mpage_da_writepage(struct page *page, | |||
1891 | struct buffer_head *bh, *head, fake; | 2006 | struct buffer_head *bh, *head, fake; |
1892 | sector_t logical; | 2007 | sector_t logical; |
1893 | 2008 | ||
2009 | if (mpd->io_done) { | ||
2010 | /* | ||
2011 | * Rest of the page in the page_vec | ||
2012 | * redirty then and skip then. We will | ||
2013 | * try to to write them again after | ||
2014 | * starting a new transaction | ||
2015 | */ | ||
2016 | redirty_page_for_writepage(wbc, page); | ||
2017 | unlock_page(page); | ||
2018 | return MPAGE_DA_EXTENT_TAIL; | ||
2019 | } | ||
1894 | /* | 2020 | /* |
1895 | * Can we merge this page to current extent? | 2021 | * Can we merge this page to current extent? |
1896 | */ | 2022 | */ |
1897 | if (mpd->next_page != page->index) { | 2023 | if (mpd->next_page != page->index) { |
1898 | /* | 2024 | /* |
1899 | * Nope, we can't. So, we map non-allocated blocks | 2025 | * Nope, we can't. So, we map non-allocated blocks |
1900 | * and start IO on them using __mpage_writepage() | 2026 | * and start IO on them using writepage() |
1901 | */ | 2027 | */ |
1902 | if (mpd->next_page != mpd->first_page) { | 2028 | if (mpd->next_page != mpd->first_page) { |
1903 | mpage_da_map_blocks(mpd); | 2029 | if (mpage_da_map_blocks(mpd) == 0) |
1904 | mpage_da_submit_io(mpd); | 2030 | mpage_da_submit_io(mpd); |
2031 | /* | ||
2032 | * skip rest of the page in the page_vec | ||
2033 | */ | ||
2034 | mpd->io_done = 1; | ||
2035 | redirty_page_for_writepage(wbc, page); | ||
2036 | unlock_page(page); | ||
2037 | return MPAGE_DA_EXTENT_TAIL; | ||
1905 | } | 2038 | } |
1906 | 2039 | ||
1907 | /* | 2040 | /* |
@@ -1932,6 +2065,8 @@ static int __mpage_da_writepage(struct page *page, | |||
1932 | set_buffer_dirty(bh); | 2065 | set_buffer_dirty(bh); |
1933 | set_buffer_uptodate(bh); | 2066 | set_buffer_uptodate(bh); |
1934 | mpage_add_bh_to_extent(mpd, logical, bh); | 2067 | mpage_add_bh_to_extent(mpd, logical, bh); |
2068 | if (mpd->io_done) | ||
2069 | return MPAGE_DA_EXTENT_TAIL; | ||
1935 | } else { | 2070 | } else { |
1936 | /* | 2071 | /* |
1937 | * Page with regular buffer heads, just add all dirty ones | 2072 | * Page with regular buffer heads, just add all dirty ones |
@@ -1940,8 +2075,12 @@ static int __mpage_da_writepage(struct page *page, | |||
1940 | bh = head; | 2075 | bh = head; |
1941 | do { | 2076 | do { |
1942 | BUG_ON(buffer_locked(bh)); | 2077 | BUG_ON(buffer_locked(bh)); |
1943 | if (buffer_dirty(bh)) | 2078 | if (buffer_dirty(bh) && |
2079 | (!buffer_mapped(bh) || buffer_delay(bh))) { | ||
1944 | mpage_add_bh_to_extent(mpd, logical, bh); | 2080 | mpage_add_bh_to_extent(mpd, logical, bh); |
2081 | if (mpd->io_done) | ||
2082 | return MPAGE_DA_EXTENT_TAIL; | ||
2083 | } | ||
1945 | logical++; | 2084 | logical++; |
1946 | } while ((bh = bh->b_this_page) != head); | 2085 | } while ((bh = bh->b_this_page) != head); |
1947 | } | 2086 | } |
@@ -1960,46 +2099,39 @@ static int __mpage_da_writepage(struct page *page, | |||
1960 | * | 2099 | * |
1961 | * This is a library function, which implements the writepages() | 2100 | * This is a library function, which implements the writepages() |
1962 | * address_space_operation. | 2101 | * address_space_operation. |
1963 | * | ||
1964 | * In order to avoid duplication of logic that deals with partial pages, | ||
1965 | * multiple bio per page, etc, we find non-allocated blocks, allocate | ||
1966 | * them with minimal calls to ->get_block() and re-use __mpage_writepage() | ||
1967 | * | ||
1968 | * It's important that we call __mpage_writepage() only once for each | ||
1969 | * involved page, otherwise we'd have to implement more complicated logic | ||
1970 | * to deal with pages w/o PG_lock or w/ PG_writeback and so on. | ||
1971 | * | ||
1972 | * See comments to mpage_writepages() | ||
1973 | */ | 2102 | */ |
1974 | static int mpage_da_writepages(struct address_space *mapping, | 2103 | static int mpage_da_writepages(struct address_space *mapping, |
1975 | struct writeback_control *wbc, | 2104 | struct writeback_control *wbc, |
1976 | get_block_t get_block) | 2105 | struct mpage_da_data *mpd) |
1977 | { | 2106 | { |
1978 | struct mpage_da_data mpd; | 2107 | long to_write; |
1979 | int ret; | 2108 | int ret; |
1980 | 2109 | ||
1981 | if (!get_block) | 2110 | if (!mpd->get_block) |
1982 | return generic_writepages(mapping, wbc); | 2111 | return generic_writepages(mapping, wbc); |
1983 | 2112 | ||
1984 | mpd.wbc = wbc; | 2113 | mpd->lbh.b_size = 0; |
1985 | mpd.inode = mapping->host; | 2114 | mpd->lbh.b_state = 0; |
1986 | mpd.lbh.b_size = 0; | 2115 | mpd->lbh.b_blocknr = 0; |
1987 | mpd.lbh.b_state = 0; | 2116 | mpd->first_page = 0; |
1988 | mpd.lbh.b_blocknr = 0; | 2117 | mpd->next_page = 0; |
1989 | mpd.first_page = 0; | 2118 | mpd->io_done = 0; |
1990 | mpd.next_page = 0; | 2119 | mpd->pages_written = 0; |
1991 | mpd.get_block = get_block; | 2120 | mpd->retval = 0; |
2121 | |||
2122 | to_write = wbc->nr_to_write; | ||
1992 | 2123 | ||
1993 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); | 2124 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd); |
1994 | 2125 | ||
1995 | /* | 2126 | /* |
1996 | * Handle last extent of pages | 2127 | * Handle last extent of pages |
1997 | */ | 2128 | */ |
1998 | if (mpd.next_page != mpd.first_page) { | 2129 | if (!mpd->io_done && mpd->next_page != mpd->first_page) { |
1999 | mpage_da_map_blocks(&mpd); | 2130 | if (mpage_da_map_blocks(mpd) == 0) |
2000 | mpage_da_submit_io(&mpd); | 2131 | mpage_da_submit_io(mpd); |
2001 | } | 2132 | } |
2002 | 2133 | ||
2134 | wbc->nr_to_write = to_write - mpd->pages_written; | ||
2003 | return ret; | 2135 | return ret; |
2004 | } | 2136 | } |
2005 | 2137 | ||
@@ -2052,18 +2184,24 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | |||
2052 | handle_t *handle = NULL; | 2184 | handle_t *handle = NULL; |
2053 | 2185 | ||
2054 | handle = ext4_journal_current_handle(); | 2186 | handle = ext4_journal_current_handle(); |
2055 | if (!handle) { | 2187 | BUG_ON(!handle); |
2056 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | 2188 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, |
2057 | bh_result, 0, 0, 0); | 2189 | bh_result, create, 0, EXT4_DELALLOC_RSVED); |
2058 | BUG_ON(!ret); | ||
2059 | } else { | ||
2060 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
2061 | bh_result, create, 0, EXT4_DELALLOC_RSVED); | ||
2062 | } | ||
2063 | |||
2064 | if (ret > 0) { | 2190 | if (ret > 0) { |
2191 | |||
2065 | bh_result->b_size = (ret << inode->i_blkbits); | 2192 | bh_result->b_size = (ret << inode->i_blkbits); |
2066 | 2193 | ||
2194 | if (ext4_should_order_data(inode)) { | ||
2195 | int retval; | ||
2196 | retval = ext4_jbd2_file_inode(handle, inode); | ||
2197 | if (retval) | ||
2198 | /* | ||
2199 | * Failed to add inode for ordered | ||
2200 | * mode. Don't update file size | ||
2201 | */ | ||
2202 | return retval; | ||
2203 | } | ||
2204 | |||
2067 | /* | 2205 | /* |
2068 | * Update on-disk size along with block allocation | 2206 | * Update on-disk size along with block allocation |
2069 | * we don't use 'extend_disksize' as size may change | 2207 | * we don't use 'extend_disksize' as size may change |
@@ -2073,18 +2211,9 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | |||
2073 | if (disksize > i_size_read(inode)) | 2211 | if (disksize > i_size_read(inode)) |
2074 | disksize = i_size_read(inode); | 2212 | disksize = i_size_read(inode); |
2075 | if (disksize > EXT4_I(inode)->i_disksize) { | 2213 | if (disksize > EXT4_I(inode)->i_disksize) { |
2076 | /* | 2214 | ext4_update_i_disksize(inode, disksize); |
2077 | * XXX: replace with spinlock if seen contended -bzzz | 2215 | ret = ext4_mark_inode_dirty(handle, inode); |
2078 | */ | 2216 | return ret; |
2079 | down_write(&EXT4_I(inode)->i_data_sem); | ||
2080 | if (disksize > EXT4_I(inode)->i_disksize) | ||
2081 | EXT4_I(inode)->i_disksize = disksize; | ||
2082 | up_write(&EXT4_I(inode)->i_data_sem); | ||
2083 | |||
2084 | if (EXT4_I(inode)->i_disksize == disksize) { | ||
2085 | ret = ext4_mark_inode_dirty(handle, inode); | ||
2086 | return ret; | ||
2087 | } | ||
2088 | } | 2217 | } |
2089 | ret = 0; | 2218 | ret = 0; |
2090 | } | 2219 | } |
@@ -2204,84 +2333,114 @@ static int ext4_da_writepage(struct page *page, | |||
2204 | } | 2333 | } |
2205 | 2334 | ||
2206 | /* | 2335 | /* |
2207 | * For now just follow the DIO way to estimate the max credits | 2336 | * This is called via ext4_da_writepages() to |
2208 | * needed to write out EXT4_MAX_WRITEBACK_PAGES. | 2337 | * calulate the total number of credits to reserve to fit |
2209 | * todo: need to calculate the max credits need for | 2338 | * a single extent allocation into a single transaction, |
2210 | * extent based files, currently the DIO credits is based on | 2339 | * ext4_da_writpeages() will loop calling this before |
2211 | * indirect-blocks mapping way. | 2340 | * the block allocation. |
2212 | * | ||
2213 | * Probably should have a generic way to calculate credits | ||
2214 | * for DIO, writepages, and truncate | ||
2215 | */ | 2341 | */ |
2216 | #define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS | 2342 | |
2217 | #define EXT4_MAX_WRITEBACK_CREDITS DIO_CREDITS | 2343 | static int ext4_da_writepages_trans_blocks(struct inode *inode) |
2344 | { | ||
2345 | int max_blocks = EXT4_I(inode)->i_reserved_data_blocks; | ||
2346 | |||
2347 | /* | ||
2348 | * With non-extent format the journal credit needed to | ||
2349 | * insert nrblocks contiguous block is dependent on | ||
2350 | * number of contiguous block. So we will limit | ||
2351 | * number of contiguous block to a sane value | ||
2352 | */ | ||
2353 | if (!(inode->i_flags & EXT4_EXTENTS_FL) && | ||
2354 | (max_blocks > EXT4_MAX_TRANS_DATA)) | ||
2355 | max_blocks = EXT4_MAX_TRANS_DATA; | ||
2356 | |||
2357 | return ext4_chunk_trans_blocks(inode, max_blocks); | ||
2358 | } | ||
2218 | 2359 | ||
2219 | static int ext4_da_writepages(struct address_space *mapping, | 2360 | static int ext4_da_writepages(struct address_space *mapping, |
2220 | struct writeback_control *wbc) | 2361 | struct writeback_control *wbc) |
2221 | { | 2362 | { |
2222 | struct inode *inode = mapping->host; | ||
2223 | handle_t *handle = NULL; | 2363 | handle_t *handle = NULL; |
2224 | int needed_blocks; | ||
2225 | int ret = 0; | ||
2226 | long to_write; | ||
2227 | loff_t range_start = 0; | 2364 | loff_t range_start = 0; |
2365 | struct mpage_da_data mpd; | ||
2366 | struct inode *inode = mapping->host; | ||
2367 | int needed_blocks, ret = 0, nr_to_writebump = 0; | ||
2368 | long to_write, pages_skipped = 0; | ||
2369 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | ||
2228 | 2370 | ||
2229 | /* | 2371 | /* |
2230 | * No pages to write? This is mainly a kludge to avoid starting | 2372 | * No pages to write? This is mainly a kludge to avoid starting |
2231 | * a transaction for special inodes like journal inode on last iput() | 2373 | * a transaction for special inodes like journal inode on last iput() |
2232 | * because that could violate lock ordering on umount | 2374 | * because that could violate lock ordering on umount |
2233 | */ | 2375 | */ |
2234 | if (!mapping->nrpages) | 2376 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
2235 | return 0; | 2377 | return 0; |
2236 | |||
2237 | /* | 2378 | /* |
2238 | * Estimate the worse case needed credits to write out | 2379 | * Make sure nr_to_write is >= sbi->s_mb_stream_request |
2239 | * EXT4_MAX_BUF_BLOCKS pages | 2380 | * This make sure small files blocks are allocated in |
2381 | * single attempt. This ensure that small files | ||
2382 | * get less fragmented. | ||
2240 | */ | 2383 | */ |
2241 | needed_blocks = EXT4_MAX_WRITEBACK_CREDITS; | 2384 | if (wbc->nr_to_write < sbi->s_mb_stream_request) { |
2385 | nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; | ||
2386 | wbc->nr_to_write = sbi->s_mb_stream_request; | ||
2387 | } | ||
2242 | 2388 | ||
2243 | to_write = wbc->nr_to_write; | 2389 | if (!wbc->range_cyclic) |
2244 | if (!wbc->range_cyclic) { | ||
2245 | /* | 2390 | /* |
2246 | * If range_cyclic is not set force range_cont | 2391 | * If range_cyclic is not set force range_cont |
2247 | * and save the old writeback_index | 2392 | * and save the old writeback_index |
2248 | */ | 2393 | */ |
2249 | wbc->range_cont = 1; | 2394 | wbc->range_cont = 1; |
2250 | range_start = wbc->range_start; | ||
2251 | } | ||
2252 | 2395 | ||
2253 | while (!ret && to_write) { | 2396 | range_start = wbc->range_start; |
2397 | pages_skipped = wbc->pages_skipped; | ||
2398 | |||
2399 | mpd.wbc = wbc; | ||
2400 | mpd.inode = mapping->host; | ||
2401 | |||
2402 | restart_loop: | ||
2403 | to_write = wbc->nr_to_write; | ||
2404 | while (!ret && to_write > 0) { | ||
2405 | |||
2406 | /* | ||
2407 | * we insert one extent at a time. So we need | ||
2408 | * credit needed for single extent allocation. | ||
2409 | * journalled mode is currently not supported | ||
2410 | * by delalloc | ||
2411 | */ | ||
2412 | BUG_ON(ext4_should_journal_data(inode)); | ||
2413 | needed_blocks = ext4_da_writepages_trans_blocks(inode); | ||
2414 | |||
2254 | /* start a new transaction*/ | 2415 | /* start a new transaction*/ |
2255 | handle = ext4_journal_start(inode, needed_blocks); | 2416 | handle = ext4_journal_start(inode, needed_blocks); |
2256 | if (IS_ERR(handle)) { | 2417 | if (IS_ERR(handle)) { |
2257 | ret = PTR_ERR(handle); | 2418 | ret = PTR_ERR(handle); |
2419 | printk(KERN_EMERG "%s: jbd2_start: " | ||
2420 | "%ld pages, ino %lu; err %d\n", __func__, | ||
2421 | wbc->nr_to_write, inode->i_ino, ret); | ||
2422 | dump_stack(); | ||
2258 | goto out_writepages; | 2423 | goto out_writepages; |
2259 | } | 2424 | } |
2260 | if (ext4_should_order_data(inode)) { | 2425 | to_write -= wbc->nr_to_write; |
2261 | /* | ||
2262 | * With ordered mode we need to add | ||
2263 | * the inode to the journal handle | ||
2264 | * when we do block allocation. | ||
2265 | */ | ||
2266 | ret = ext4_jbd2_file_inode(handle, inode); | ||
2267 | if (ret) { | ||
2268 | ext4_journal_stop(handle); | ||
2269 | goto out_writepages; | ||
2270 | } | ||
2271 | 2426 | ||
2272 | } | 2427 | mpd.get_block = ext4_da_get_block_write; |
2273 | /* | 2428 | ret = mpage_da_writepages(mapping, wbc, &mpd); |
2274 | * set the max dirty pages could be write at a time | ||
2275 | * to fit into the reserved transaction credits | ||
2276 | */ | ||
2277 | if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES) | ||
2278 | wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES; | ||
2279 | 2429 | ||
2280 | to_write -= wbc->nr_to_write; | ||
2281 | ret = mpage_da_writepages(mapping, wbc, | ||
2282 | ext4_da_get_block_write); | ||
2283 | ext4_journal_stop(handle); | 2430 | ext4_journal_stop(handle); |
2284 | if (wbc->nr_to_write) { | 2431 | |
2432 | if (mpd.retval == -ENOSPC) | ||
2433 | jbd2_journal_force_commit_nested(sbi->s_journal); | ||
2434 | |||
2435 | /* reset the retry count */ | ||
2436 | if (ret == MPAGE_DA_EXTENT_TAIL) { | ||
2437 | /* | ||
2438 | * got one extent now try with | ||
2439 | * rest of the pages | ||
2440 | */ | ||
2441 | to_write += wbc->nr_to_write; | ||
2442 | ret = 0; | ||
2443 | } else if (wbc->nr_to_write) { | ||
2285 | /* | 2444 | /* |
2286 | * There is no more writeout needed | 2445 | * There is no more writeout needed |
2287 | * or we requested for a noblocking writeout | 2446 | * or we requested for a noblocking writeout |
@@ -2293,13 +2452,48 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2293 | wbc->nr_to_write = to_write; | 2452 | wbc->nr_to_write = to_write; |
2294 | } | 2453 | } |
2295 | 2454 | ||
2296 | out_writepages: | 2455 | if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) { |
2297 | wbc->nr_to_write = to_write; | 2456 | /* We skipped pages in this loop */ |
2298 | if (range_start) | ||
2299 | wbc->range_start = range_start; | 2457 | wbc->range_start = range_start; |
2458 | wbc->nr_to_write = to_write + | ||
2459 | wbc->pages_skipped - pages_skipped; | ||
2460 | wbc->pages_skipped = pages_skipped; | ||
2461 | goto restart_loop; | ||
2462 | } | ||
2463 | |||
2464 | out_writepages: | ||
2465 | wbc->nr_to_write = to_write - nr_to_writebump; | ||
2466 | wbc->range_start = range_start; | ||
2300 | return ret; | 2467 | return ret; |
2301 | } | 2468 | } |
2302 | 2469 | ||
2470 | #define FALL_BACK_TO_NONDELALLOC 1 | ||
2471 | static int ext4_nonda_switch(struct super_block *sb) | ||
2472 | { | ||
2473 | s64 free_blocks, dirty_blocks; | ||
2474 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2475 | |||
2476 | /* | ||
2477 | * switch to non delalloc mode if we are running low | ||
2478 | * on free block. The free block accounting via percpu | ||
2479 | * counters can get slightly wrong with FBC_BATCH getting | ||
2480 | * accumulated on each CPU without updating global counters | ||
2481 | * Delalloc need an accurate free block accounting. So switch | ||
2482 | * to non delalloc when we are near to error range. | ||
2483 | */ | ||
2484 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | ||
2485 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter); | ||
2486 | if (2 * free_blocks < 3 * dirty_blocks || | ||
2487 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { | ||
2488 | /* | ||
2489 | * free block count is less that 150% of dirty blocks | ||
2490 | * or free blocks is less that watermark | ||
2491 | */ | ||
2492 | return 1; | ||
2493 | } | ||
2494 | return 0; | ||
2495 | } | ||
2496 | |||
2303 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | 2497 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, |
2304 | loff_t pos, unsigned len, unsigned flags, | 2498 | loff_t pos, unsigned len, unsigned flags, |
2305 | struct page **pagep, void **fsdata) | 2499 | struct page **pagep, void **fsdata) |
@@ -2315,6 +2509,12 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
2315 | from = pos & (PAGE_CACHE_SIZE - 1); | 2509 | from = pos & (PAGE_CACHE_SIZE - 1); |
2316 | to = from + len; | 2510 | to = from + len; |
2317 | 2511 | ||
2512 | if (ext4_nonda_switch(inode->i_sb)) { | ||
2513 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; | ||
2514 | return ext4_write_begin(file, mapping, pos, | ||
2515 | len, flags, pagep, fsdata); | ||
2516 | } | ||
2517 | *fsdata = (void *)0; | ||
2318 | retry: | 2518 | retry: |
2319 | /* | 2519 | /* |
2320 | * With delayed allocation, we don't log the i_disksize update | 2520 | * With delayed allocation, we don't log the i_disksize update |
@@ -2342,6 +2542,13 @@ retry: | |||
2342 | unlock_page(page); | 2542 | unlock_page(page); |
2343 | ext4_journal_stop(handle); | 2543 | ext4_journal_stop(handle); |
2344 | page_cache_release(page); | 2544 | page_cache_release(page); |
2545 | /* | ||
2546 | * block_write_begin may have instantiated a few blocks | ||
2547 | * outside i_size. Trim these off again. Don't need | ||
2548 | * i_size_read because we hold i_mutex. | ||
2549 | */ | ||
2550 | if (pos + len > inode->i_size) | ||
2551 | vmtruncate(inode, inode->i_size); | ||
2345 | } | 2552 | } |
2346 | 2553 | ||
2347 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 2554 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -2365,7 +2572,7 @@ static int ext4_da_should_update_i_disksize(struct page *page, | |||
2365 | bh = page_buffers(page); | 2572 | bh = page_buffers(page); |
2366 | idx = offset >> inode->i_blkbits; | 2573 | idx = offset >> inode->i_blkbits; |
2367 | 2574 | ||
2368 | for (i=0; i < idx; i++) | 2575 | for (i = 0; i < idx; i++) |
2369 | bh = bh->b_this_page; | 2576 | bh = bh->b_this_page; |
2370 | 2577 | ||
2371 | if (!buffer_mapped(bh) || (buffer_delay(bh))) | 2578 | if (!buffer_mapped(bh) || (buffer_delay(bh))) |
@@ -2383,9 +2590,22 @@ static int ext4_da_write_end(struct file *file, | |||
2383 | handle_t *handle = ext4_journal_current_handle(); | 2590 | handle_t *handle = ext4_journal_current_handle(); |
2384 | loff_t new_i_size; | 2591 | loff_t new_i_size; |
2385 | unsigned long start, end; | 2592 | unsigned long start, end; |
2593 | int write_mode = (int)(unsigned long)fsdata; | ||
2594 | |||
2595 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { | ||
2596 | if (ext4_should_order_data(inode)) { | ||
2597 | return ext4_ordered_write_end(file, mapping, pos, | ||
2598 | len, copied, page, fsdata); | ||
2599 | } else if (ext4_should_writeback_data(inode)) { | ||
2600 | return ext4_writeback_write_end(file, mapping, pos, | ||
2601 | len, copied, page, fsdata); | ||
2602 | } else { | ||
2603 | BUG(); | ||
2604 | } | ||
2605 | } | ||
2386 | 2606 | ||
2387 | start = pos & (PAGE_CACHE_SIZE - 1); | 2607 | start = pos & (PAGE_CACHE_SIZE - 1); |
2388 | end = start + copied -1; | 2608 | end = start + copied - 1; |
2389 | 2609 | ||
2390 | /* | 2610 | /* |
2391 | * generic_write_end() will run mark_inode_dirty() if i_size | 2611 | * generic_write_end() will run mark_inode_dirty() if i_size |
@@ -2409,6 +2629,11 @@ static int ext4_da_write_end(struct file *file, | |||
2409 | EXT4_I(inode)->i_disksize = new_i_size; | 2629 | EXT4_I(inode)->i_disksize = new_i_size; |
2410 | } | 2630 | } |
2411 | up_write(&EXT4_I(inode)->i_data_sem); | 2631 | up_write(&EXT4_I(inode)->i_data_sem); |
2632 | /* We need to mark inode dirty even if | ||
2633 | * new_i_size is less that inode->i_size | ||
2634 | * bu greater than i_disksize.(hint delalloc) | ||
2635 | */ | ||
2636 | ext4_mark_inode_dirty(handle, inode); | ||
2412 | } | 2637 | } |
2413 | } | 2638 | } |
2414 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 2639 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
@@ -2500,7 +2725,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
2500 | return 0; | 2725 | return 0; |
2501 | } | 2726 | } |
2502 | 2727 | ||
2503 | return generic_block_bmap(mapping,block,ext4_get_block); | 2728 | return generic_block_bmap(mapping, block, ext4_get_block); |
2504 | } | 2729 | } |
2505 | 2730 | ||
2506 | static int bget_one(handle_t *handle, struct buffer_head *bh) | 2731 | static int bget_one(handle_t *handle, struct buffer_head *bh) |
@@ -3106,7 +3331,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth, | |||
3106 | if (!partial->key && *partial->p) | 3331 | if (!partial->key && *partial->p) |
3107 | /* Writer: end */ | 3332 | /* Writer: end */ |
3108 | goto no_top; | 3333 | goto no_top; |
3109 | for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) | 3334 | for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) |
3110 | ; | 3335 | ; |
3111 | /* | 3336 | /* |
3112 | * OK, we've found the last block that must survive. The rest of our | 3337 | * OK, we've found the last block that must survive. The rest of our |
@@ -3125,7 +3350,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth, | |||
3125 | } | 3350 | } |
3126 | /* Writer: end */ | 3351 | /* Writer: end */ |
3127 | 3352 | ||
3128 | while(partial > p) { | 3353 | while (partial > p) { |
3129 | brelse(partial->bh); | 3354 | brelse(partial->bh); |
3130 | partial--; | 3355 | partial--; |
3131 | } | 3356 | } |
@@ -3317,9 +3542,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
3317 | /* This zaps the entire block. Bottom up. */ | 3542 | /* This zaps the entire block. Bottom up. */ |
3318 | BUFFER_TRACE(bh, "free child branches"); | 3543 | BUFFER_TRACE(bh, "free child branches"); |
3319 | ext4_free_branches(handle, inode, bh, | 3544 | ext4_free_branches(handle, inode, bh, |
3320 | (__le32*)bh->b_data, | 3545 | (__le32 *) bh->b_data, |
3321 | (__le32*)bh->b_data + addr_per_block, | 3546 | (__le32 *) bh->b_data + addr_per_block, |
3322 | depth); | 3547 | depth); |
3323 | 3548 | ||
3324 | /* | 3549 | /* |
3325 | * We've probably journalled the indirect block several | 3550 | * We've probably journalled the indirect block several |
@@ -3486,6 +3711,9 @@ void ext4_truncate(struct inode *inode) | |||
3486 | * modify the block allocation tree. | 3711 | * modify the block allocation tree. |
3487 | */ | 3712 | */ |
3488 | down_write(&ei->i_data_sem); | 3713 | down_write(&ei->i_data_sem); |
3714 | |||
3715 | ext4_discard_preallocations(inode); | ||
3716 | |||
3489 | /* | 3717 | /* |
3490 | * The orphan list entry will now protect us from any crash which | 3718 | * The orphan list entry will now protect us from any crash which |
3491 | * occurs before the truncate completes, so it is now safe to propagate | 3719 | * occurs before the truncate completes, so it is now safe to propagate |
@@ -3555,8 +3783,6 @@ do_indirects: | |||
3555 | ; | 3783 | ; |
3556 | } | 3784 | } |
3557 | 3785 | ||
3558 | ext4_discard_reservation(inode); | ||
3559 | |||
3560 | up_write(&ei->i_data_sem); | 3786 | up_write(&ei->i_data_sem); |
3561 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 3787 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
3562 | ext4_mark_inode_dirty(handle, inode); | 3788 | ext4_mark_inode_dirty(handle, inode); |
@@ -3581,41 +3807,6 @@ out_stop: | |||
3581 | ext4_journal_stop(handle); | 3807 | ext4_journal_stop(handle); |
3582 | } | 3808 | } |
3583 | 3809 | ||
3584 | static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, | ||
3585 | unsigned long ino, struct ext4_iloc *iloc) | ||
3586 | { | ||
3587 | ext4_group_t block_group; | ||
3588 | unsigned long offset; | ||
3589 | ext4_fsblk_t block; | ||
3590 | struct ext4_group_desc *gdp; | ||
3591 | |||
3592 | if (!ext4_valid_inum(sb, ino)) { | ||
3593 | /* | ||
3594 | * This error is already checked for in namei.c unless we are | ||
3595 | * looking at an NFS filehandle, in which case no error | ||
3596 | * report is needed | ||
3597 | */ | ||
3598 | return 0; | ||
3599 | } | ||
3600 | |||
3601 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); | ||
3602 | gdp = ext4_get_group_desc(sb, block_group, NULL); | ||
3603 | if (!gdp) | ||
3604 | return 0; | ||
3605 | |||
3606 | /* | ||
3607 | * Figure out the offset within the block group inode table | ||
3608 | */ | ||
3609 | offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) * | ||
3610 | EXT4_INODE_SIZE(sb); | ||
3611 | block = ext4_inode_table(sb, gdp) + | ||
3612 | (offset >> EXT4_BLOCK_SIZE_BITS(sb)); | ||
3613 | |||
3614 | iloc->block_group = block_group; | ||
3615 | iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1); | ||
3616 | return block; | ||
3617 | } | ||
3618 | |||
3619 | /* | 3810 | /* |
3620 | * ext4_get_inode_loc returns with an extra refcount against the inode's | 3811 | * ext4_get_inode_loc returns with an extra refcount against the inode's |
3621 | * underlying buffer_head on success. If 'in_mem' is true, we have all | 3812 | * underlying buffer_head on success. If 'in_mem' is true, we have all |
@@ -3625,19 +3816,35 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, | |||
3625 | static int __ext4_get_inode_loc(struct inode *inode, | 3816 | static int __ext4_get_inode_loc(struct inode *inode, |
3626 | struct ext4_iloc *iloc, int in_mem) | 3817 | struct ext4_iloc *iloc, int in_mem) |
3627 | { | 3818 | { |
3628 | ext4_fsblk_t block; | 3819 | struct ext4_group_desc *gdp; |
3629 | struct buffer_head *bh; | 3820 | struct buffer_head *bh; |
3821 | struct super_block *sb = inode->i_sb; | ||
3822 | ext4_fsblk_t block; | ||
3823 | int inodes_per_block, inode_offset; | ||
3824 | |||
3825 | iloc->bh = 0; | ||
3826 | if (!ext4_valid_inum(sb, inode->i_ino)) | ||
3827 | return -EIO; | ||
3630 | 3828 | ||
3631 | block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc); | 3829 | iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); |
3632 | if (!block) | 3830 | gdp = ext4_get_group_desc(sb, iloc->block_group, NULL); |
3831 | if (!gdp) | ||
3633 | return -EIO; | 3832 | return -EIO; |
3634 | 3833 | ||
3635 | bh = sb_getblk(inode->i_sb, block); | 3834 | /* |
3835 | * Figure out the offset within the block group inode table | ||
3836 | */ | ||
3837 | inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb)); | ||
3838 | inode_offset = ((inode->i_ino - 1) % | ||
3839 | EXT4_INODES_PER_GROUP(sb)); | ||
3840 | block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); | ||
3841 | iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); | ||
3842 | |||
3843 | bh = sb_getblk(sb, block); | ||
3636 | if (!bh) { | 3844 | if (!bh) { |
3637 | ext4_error (inode->i_sb, "ext4_get_inode_loc", | 3845 | ext4_error(sb, "ext4_get_inode_loc", "unable to read " |
3638 | "unable to read inode block - " | 3846 | "inode block - inode=%lu, block=%llu", |
3639 | "inode=%lu, block=%llu", | 3847 | inode->i_ino, block); |
3640 | inode->i_ino, block); | ||
3641 | return -EIO; | 3848 | return -EIO; |
3642 | } | 3849 | } |
3643 | if (!buffer_uptodate(bh)) { | 3850 | if (!buffer_uptodate(bh)) { |
@@ -3665,28 +3872,12 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
3665 | */ | 3872 | */ |
3666 | if (in_mem) { | 3873 | if (in_mem) { |
3667 | struct buffer_head *bitmap_bh; | 3874 | struct buffer_head *bitmap_bh; |
3668 | struct ext4_group_desc *desc; | 3875 | int i, start; |
3669 | int inodes_per_buffer; | ||
3670 | int inode_offset, i; | ||
3671 | ext4_group_t block_group; | ||
3672 | int start; | ||
3673 | |||
3674 | block_group = (inode->i_ino - 1) / | ||
3675 | EXT4_INODES_PER_GROUP(inode->i_sb); | ||
3676 | inodes_per_buffer = bh->b_size / | ||
3677 | EXT4_INODE_SIZE(inode->i_sb); | ||
3678 | inode_offset = ((inode->i_ino - 1) % | ||
3679 | EXT4_INODES_PER_GROUP(inode->i_sb)); | ||
3680 | start = inode_offset & ~(inodes_per_buffer - 1); | ||
3681 | 3876 | ||
3682 | /* Is the inode bitmap in cache? */ | 3877 | start = inode_offset & ~(inodes_per_block - 1); |
3683 | desc = ext4_get_group_desc(inode->i_sb, | ||
3684 | block_group, NULL); | ||
3685 | if (!desc) | ||
3686 | goto make_io; | ||
3687 | 3878 | ||
3688 | bitmap_bh = sb_getblk(inode->i_sb, | 3879 | /* Is the inode bitmap in cache? */ |
3689 | ext4_inode_bitmap(inode->i_sb, desc)); | 3880 | bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); |
3690 | if (!bitmap_bh) | 3881 | if (!bitmap_bh) |
3691 | goto make_io; | 3882 | goto make_io; |
3692 | 3883 | ||
@@ -3699,14 +3890,14 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
3699 | brelse(bitmap_bh); | 3890 | brelse(bitmap_bh); |
3700 | goto make_io; | 3891 | goto make_io; |
3701 | } | 3892 | } |
3702 | for (i = start; i < start + inodes_per_buffer; i++) { | 3893 | for (i = start; i < start + inodes_per_block; i++) { |
3703 | if (i == inode_offset) | 3894 | if (i == inode_offset) |
3704 | continue; | 3895 | continue; |
3705 | if (ext4_test_bit(i, bitmap_bh->b_data)) | 3896 | if (ext4_test_bit(i, bitmap_bh->b_data)) |
3706 | break; | 3897 | break; |
3707 | } | 3898 | } |
3708 | brelse(bitmap_bh); | 3899 | brelse(bitmap_bh); |
3709 | if (i == start + inodes_per_buffer) { | 3900 | if (i == start + inodes_per_block) { |
3710 | /* all other inodes are free, so skip I/O */ | 3901 | /* all other inodes are free, so skip I/O */ |
3711 | memset(bh->b_data, 0, bh->b_size); | 3902 | memset(bh->b_data, 0, bh->b_size); |
3712 | set_buffer_uptodate(bh); | 3903 | set_buffer_uptodate(bh); |
@@ -3717,6 +3908,36 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
3717 | 3908 | ||
3718 | make_io: | 3909 | make_io: |
3719 | /* | 3910 | /* |
3911 | * If we need to do any I/O, try to pre-readahead extra | ||
3912 | * blocks from the inode table. | ||
3913 | */ | ||
3914 | if (EXT4_SB(sb)->s_inode_readahead_blks) { | ||
3915 | ext4_fsblk_t b, end, table; | ||
3916 | unsigned num; | ||
3917 | |||
3918 | table = ext4_inode_table(sb, gdp); | ||
3919 | /* Make sure s_inode_readahead_blks is a power of 2 */ | ||
3920 | while (EXT4_SB(sb)->s_inode_readahead_blks & | ||
3921 | (EXT4_SB(sb)->s_inode_readahead_blks-1)) | ||
3922 | EXT4_SB(sb)->s_inode_readahead_blks = | ||
3923 | (EXT4_SB(sb)->s_inode_readahead_blks & | ||
3924 | (EXT4_SB(sb)->s_inode_readahead_blks-1)); | ||
3925 | b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1); | ||
3926 | if (table > b) | ||
3927 | b = table; | ||
3928 | end = b + EXT4_SB(sb)->s_inode_readahead_blks; | ||
3929 | num = EXT4_INODES_PER_GROUP(sb); | ||
3930 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
3931 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) | ||
3932 | num -= le16_to_cpu(gdp->bg_itable_unused); | ||
3933 | table += num / inodes_per_block; | ||
3934 | if (end > table) | ||
3935 | end = table; | ||
3936 | while (b <= end) | ||
3937 | sb_breadahead(sb, b++); | ||
3938 | } | ||
3939 | |||
3940 | /* | ||
3720 | * There are other valid inodes in the buffer, this inode | 3941 | * There are other valid inodes in the buffer, this inode |
3721 | * has in-inode xattrs, or we don't have this inode in memory. | 3942 | * has in-inode xattrs, or we don't have this inode in memory. |
3722 | * Read the block from disk. | 3943 | * Read the block from disk. |
@@ -3726,10 +3947,9 @@ make_io: | |||
3726 | submit_bh(READ_META, bh); | 3947 | submit_bh(READ_META, bh); |
3727 | wait_on_buffer(bh); | 3948 | wait_on_buffer(bh); |
3728 | if (!buffer_uptodate(bh)) { | 3949 | if (!buffer_uptodate(bh)) { |
3729 | ext4_error(inode->i_sb, "ext4_get_inode_loc", | 3950 | ext4_error(sb, __func__, |
3730 | "unable to read inode block - " | 3951 | "unable to read inode block - inode=%lu, " |
3731 | "inode=%lu, block=%llu", | 3952 | "block=%llu", inode->i_ino, block); |
3732 | inode->i_ino, block); | ||
3733 | brelse(bh); | 3953 | brelse(bh); |
3734 | return -EIO; | 3954 | return -EIO; |
3735 | } | 3955 | } |
@@ -3821,11 +4041,10 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3821 | return inode; | 4041 | return inode; |
3822 | 4042 | ||
3823 | ei = EXT4_I(inode); | 4043 | ei = EXT4_I(inode); |
3824 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 4044 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
3825 | ei->i_acl = EXT4_ACL_NOT_CACHED; | 4045 | ei->i_acl = EXT4_ACL_NOT_CACHED; |
3826 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; | 4046 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; |
3827 | #endif | 4047 | #endif |
3828 | ei->i_block_alloc_info = NULL; | ||
3829 | 4048 | ||
3830 | ret = __ext4_get_inode_loc(inode, &iloc, 0); | 4049 | ret = __ext4_get_inode_loc(inode, &iloc, 0); |
3831 | if (ret < 0) | 4050 | if (ret < 0) |
@@ -3835,7 +4054,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3835 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | 4054 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); |
3836 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); | 4055 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); |
3837 | inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); | 4056 | inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); |
3838 | if(!(test_opt (inode->i_sb, NO_UID32))) { | 4057 | if (!(test_opt(inode->i_sb, NO_UID32))) { |
3839 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; | 4058 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; |
3840 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; | 4059 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; |
3841 | } | 4060 | } |
@@ -3853,7 +4072,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3853 | if (inode->i_mode == 0 || | 4072 | if (inode->i_mode == 0 || |
3854 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { | 4073 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { |
3855 | /* this inode is deleted */ | 4074 | /* this inode is deleted */ |
3856 | brelse (bh); | 4075 | brelse(bh); |
3857 | ret = -ESTALE; | 4076 | ret = -ESTALE; |
3858 | goto bad_inode; | 4077 | goto bad_inode; |
3859 | } | 4078 | } |
@@ -3886,7 +4105,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3886 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); | 4105 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); |
3887 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > | 4106 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > |
3888 | EXT4_INODE_SIZE(inode->i_sb)) { | 4107 | EXT4_INODE_SIZE(inode->i_sb)) { |
3889 | brelse (bh); | 4108 | brelse(bh); |
3890 | ret = -EIO; | 4109 | ret = -EIO; |
3891 | goto bad_inode; | 4110 | goto bad_inode; |
3892 | } | 4111 | } |
@@ -3939,7 +4158,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3939 | init_special_inode(inode, inode->i_mode, | 4158 | init_special_inode(inode, inode->i_mode, |
3940 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | 4159 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); |
3941 | } | 4160 | } |
3942 | brelse (iloc.bh); | 4161 | brelse(iloc.bh); |
3943 | ext4_set_inode_flags(inode); | 4162 | ext4_set_inode_flags(inode); |
3944 | unlock_new_inode(inode); | 4163 | unlock_new_inode(inode); |
3945 | return inode; | 4164 | return inode; |
@@ -4021,14 +4240,14 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4021 | 4240 | ||
4022 | ext4_get_inode_flags(ei); | 4241 | ext4_get_inode_flags(ei); |
4023 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); | 4242 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); |
4024 | if(!(test_opt(inode->i_sb, NO_UID32))) { | 4243 | if (!(test_opt(inode->i_sb, NO_UID32))) { |
4025 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); | 4244 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); |
4026 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); | 4245 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); |
4027 | /* | 4246 | /* |
4028 | * Fix up interoperability with old kernels. Otherwise, old inodes get | 4247 | * Fix up interoperability with old kernels. Otherwise, old inodes get |
4029 | * re-used with the upper 16 bits of the uid/gid intact | 4248 | * re-used with the upper 16 bits of the uid/gid intact |
4030 | */ | 4249 | */ |
4031 | if(!ei->i_dtime) { | 4250 | if (!ei->i_dtime) { |
4032 | raw_inode->i_uid_high = | 4251 | raw_inode->i_uid_high = |
4033 | cpu_to_le16(high_16_bits(inode->i_uid)); | 4252 | cpu_to_le16(high_16_bits(inode->i_uid)); |
4034 | raw_inode->i_gid_high = | 4253 | raw_inode->i_gid_high = |
@@ -4116,7 +4335,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4116 | ei->i_state &= ~EXT4_STATE_NEW; | 4335 | ei->i_state &= ~EXT4_STATE_NEW; |
4117 | 4336 | ||
4118 | out_brelse: | 4337 | out_brelse: |
4119 | brelse (bh); | 4338 | brelse(bh); |
4120 | ext4_std_error(inode->i_sb, err); | 4339 | ext4_std_error(inode->i_sb, err); |
4121 | return err; | 4340 | return err; |
4122 | } | 4341 | } |
@@ -4324,57 +4543,129 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
4324 | return 0; | 4543 | return 0; |
4325 | } | 4544 | } |
4326 | 4545 | ||
4546 | static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, | ||
4547 | int chunk) | ||
4548 | { | ||
4549 | int indirects; | ||
4550 | |||
4551 | /* if nrblocks are contiguous */ | ||
4552 | if (chunk) { | ||
4553 | /* | ||
4554 | * With N contiguous data blocks, it need at most | ||
4555 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks | ||
4556 | * 2 dindirect blocks | ||
4557 | * 1 tindirect block | ||
4558 | */ | ||
4559 | indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
4560 | return indirects + 3; | ||
4561 | } | ||
4562 | /* | ||
4563 | * if nrblocks are not contiguous, worse case, each block touch | ||
4564 | * a indirect block, and each indirect block touch a double indirect | ||
4565 | * block, plus a triple indirect block | ||
4566 | */ | ||
4567 | indirects = nrblocks * 2 + 1; | ||
4568 | return indirects; | ||
4569 | } | ||
4570 | |||
4571 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | ||
4572 | { | ||
4573 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
4574 | return ext4_indirect_trans_blocks(inode, nrblocks, 0); | ||
4575 | return ext4_ext_index_trans_blocks(inode, nrblocks, 0); | ||
4576 | } | ||
4327 | /* | 4577 | /* |
4328 | * How many blocks doth make a writepage()? | 4578 | * Account for index blocks, block groups bitmaps and block group |
4329 | * | 4579 | * descriptor blocks if modify datablocks and index blocks |
4330 | * With N blocks per page, it may be: | 4580 | * worse case, the indexs blocks spread over different block groups |
4331 | * N data blocks | ||
4332 | * 2 indirect block | ||
4333 | * 2 dindirect | ||
4334 | * 1 tindirect | ||
4335 | * N+5 bitmap blocks (from the above) | ||
4336 | * N+5 group descriptor summary blocks | ||
4337 | * 1 inode block | ||
4338 | * 1 superblock. | ||
4339 | * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files | ||
4340 | * | 4581 | * |
4341 | * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS | 4582 | * If datablocks are discontiguous, they are possible to spread over |
4583 | * different block groups too. If they are contiugous, with flexbg, | ||
4584 | * they could still across block group boundary. | ||
4342 | * | 4585 | * |
4343 | * With ordered or writeback data it's the same, less the N data blocks. | 4586 | * Also account for superblock, inode, quota and xattr blocks |
4587 | */ | ||
4588 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | ||
4589 | { | ||
4590 | int groups, gdpblocks; | ||
4591 | int idxblocks; | ||
4592 | int ret = 0; | ||
4593 | |||
4594 | /* | ||
4595 | * How many index blocks need to touch to modify nrblocks? | ||
4596 | * The "Chunk" flag indicating whether the nrblocks is | ||
4597 | * physically contiguous on disk | ||
4598 | * | ||
4599 | * For Direct IO and fallocate, they calls get_block to allocate | ||
4600 | * one single extent at a time, so they could set the "Chunk" flag | ||
4601 | */ | ||
4602 | idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk); | ||
4603 | |||
4604 | ret = idxblocks; | ||
4605 | |||
4606 | /* | ||
4607 | * Now let's see how many group bitmaps and group descriptors need | ||
4608 | * to account | ||
4609 | */ | ||
4610 | groups = idxblocks; | ||
4611 | if (chunk) | ||
4612 | groups += 1; | ||
4613 | else | ||
4614 | groups += nrblocks; | ||
4615 | |||
4616 | gdpblocks = groups; | ||
4617 | if (groups > EXT4_SB(inode->i_sb)->s_groups_count) | ||
4618 | groups = EXT4_SB(inode->i_sb)->s_groups_count; | ||
4619 | if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) | ||
4620 | gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; | ||
4621 | |||
4622 | /* bitmaps and block group descriptor blocks */ | ||
4623 | ret += groups + gdpblocks; | ||
4624 | |||
4625 | /* Blocks for super block, inode, quota and xattr blocks */ | ||
4626 | ret += EXT4_META_TRANS_BLOCKS(inode->i_sb); | ||
4627 | |||
4628 | return ret; | ||
4629 | } | ||
4630 | |||
4631 | /* | ||
4632 | * Calulate the total number of credits to reserve to fit | ||
4633 | * the modification of a single pages into a single transaction, | ||
4634 | * which may include multiple chunks of block allocations. | ||
4344 | * | 4635 | * |
4345 | * If the inode's direct blocks can hold an integral number of pages then a | 4636 | * This could be called via ext4_write_begin() |
4346 | * page cannot straddle two indirect blocks, and we can only touch one indirect | ||
4347 | * and dindirect block, and the "5" above becomes "3". | ||
4348 | * | 4637 | * |
4349 | * This still overestimates under most circumstances. If we were to pass the | 4638 | * We need to consider the worse case, when |
4350 | * start and end offsets in here as well we could do block_to_path() on each | 4639 | * one new block per extent. |
4351 | * block and work out the exact number of indirects which are touched. Pah. | ||
4352 | */ | 4640 | */ |
4353 | |||
4354 | int ext4_writepage_trans_blocks(struct inode *inode) | 4641 | int ext4_writepage_trans_blocks(struct inode *inode) |
4355 | { | 4642 | { |
4356 | int bpp = ext4_journal_blocks_per_page(inode); | 4643 | int bpp = ext4_journal_blocks_per_page(inode); |
4357 | int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3; | ||
4358 | int ret; | 4644 | int ret; |
4359 | 4645 | ||
4360 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 4646 | ret = ext4_meta_trans_blocks(inode, bpp, 0); |
4361 | return ext4_ext_writepage_trans_blocks(inode, bpp); | ||
4362 | 4647 | ||
4648 | /* Account for data blocks for journalled mode */ | ||
4363 | if (ext4_should_journal_data(inode)) | 4649 | if (ext4_should_journal_data(inode)) |
4364 | ret = 3 * (bpp + indirects) + 2; | 4650 | ret += bpp; |
4365 | else | ||
4366 | ret = 2 * (bpp + indirects) + 2; | ||
4367 | |||
4368 | #ifdef CONFIG_QUOTA | ||
4369 | /* We know that structure was already allocated during DQUOT_INIT so | ||
4370 | * we will be updating only the data blocks + inodes */ | ||
4371 | ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | ||
4372 | #endif | ||
4373 | |||
4374 | return ret; | 4651 | return ret; |
4375 | } | 4652 | } |
4376 | 4653 | ||
4377 | /* | 4654 | /* |
4655 | * Calculate the journal credits for a chunk of data modification. | ||
4656 | * | ||
4657 | * This is called from DIO, fallocate or whoever calling | ||
4658 | * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks. | ||
4659 | * | ||
4660 | * journal buffers for data blocks are not included here, as DIO | ||
4661 | * and fallocate do no need to journal data buffers. | ||
4662 | */ | ||
4663 | int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks) | ||
4664 | { | ||
4665 | return ext4_meta_trans_blocks(inode, nrblocks, 1); | ||
4666 | } | ||
4667 | |||
4668 | /* | ||
4378 | * The caller must have previously called ext4_reserve_inode_write(). | 4669 | * The caller must have previously called ext4_reserve_inode_write(). |
4379 | * Give this, we know that the caller already has write access to iloc->bh. | 4670 | * Give this, we know that the caller already has write access to iloc->bh. |
4380 | */ | 4671 | */ |
@@ -4647,6 +4938,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
4647 | loff_t size; | 4938 | loff_t size; |
4648 | unsigned long len; | 4939 | unsigned long len; |
4649 | int ret = -EINVAL; | 4940 | int ret = -EINVAL; |
4941 | void *fsdata; | ||
4650 | struct file *file = vma->vm_file; | 4942 | struct file *file = vma->vm_file; |
4651 | struct inode *inode = file->f_path.dentry->d_inode; | 4943 | struct inode *inode = file->f_path.dentry->d_inode; |
4652 | struct address_space *mapping = inode->i_mapping; | 4944 | struct address_space *mapping = inode->i_mapping; |
@@ -4685,11 +4977,11 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
4685 | * on the same page though | 4977 | * on the same page though |
4686 | */ | 4978 | */ |
4687 | ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), | 4979 | ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), |
4688 | len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | 4980 | len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); |
4689 | if (ret < 0) | 4981 | if (ret < 0) |
4690 | goto out_unlock; | 4982 | goto out_unlock; |
4691 | ret = mapping->a_ops->write_end(file, mapping, page_offset(page), | 4983 | ret = mapping->a_ops->write_end(file, mapping, page_offset(page), |
4692 | len, len, page, NULL); | 4984 | len, len, page, fsdata); |
4693 | if (ret < 0) | 4985 | if (ret < 0) |
4694 | goto out_unlock; | 4986 | goto out_unlock; |
4695 | ret = 0; | 4987 | ret = 0; |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7a6c2f1faba6..dc99b4776d58 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -23,9 +23,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
23 | struct inode *inode = filp->f_dentry->d_inode; | 23 | struct inode *inode = filp->f_dentry->d_inode; |
24 | struct ext4_inode_info *ei = EXT4_I(inode); | 24 | struct ext4_inode_info *ei = EXT4_I(inode); |
25 | unsigned int flags; | 25 | unsigned int flags; |
26 | unsigned short rsv_window_size; | ||
27 | 26 | ||
28 | ext4_debug ("cmd = %u, arg = %lu\n", cmd, arg); | 27 | ext4_debug("cmd = %u, arg = %lu\n", cmd, arg); |
29 | 28 | ||
30 | switch (cmd) { | 29 | switch (cmd) { |
31 | case EXT4_IOC_GETFLAGS: | 30 | case EXT4_IOC_GETFLAGS: |
@@ -34,7 +33,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
34 | return put_user(flags, (int __user *) arg); | 33 | return put_user(flags, (int __user *) arg); |
35 | case EXT4_IOC_SETFLAGS: { | 34 | case EXT4_IOC_SETFLAGS: { |
36 | handle_t *handle = NULL; | 35 | handle_t *handle = NULL; |
37 | int err; | 36 | int err, migrate = 0; |
38 | struct ext4_iloc iloc; | 37 | struct ext4_iloc iloc; |
39 | unsigned int oldflags; | 38 | unsigned int oldflags; |
40 | unsigned int jflag; | 39 | unsigned int jflag; |
@@ -82,6 +81,17 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
82 | if (!capable(CAP_SYS_RESOURCE)) | 81 | if (!capable(CAP_SYS_RESOURCE)) |
83 | goto flags_out; | 82 | goto flags_out; |
84 | } | 83 | } |
84 | if (oldflags & EXT4_EXTENTS_FL) { | ||
85 | /* We don't support clearning extent flags */ | ||
86 | if (!(flags & EXT4_EXTENTS_FL)) { | ||
87 | err = -EOPNOTSUPP; | ||
88 | goto flags_out; | ||
89 | } | ||
90 | } else if (flags & EXT4_EXTENTS_FL) { | ||
91 | /* migrate the file */ | ||
92 | migrate = 1; | ||
93 | flags &= ~EXT4_EXTENTS_FL; | ||
94 | } | ||
85 | 95 | ||
86 | handle = ext4_journal_start(inode, 1); | 96 | handle = ext4_journal_start(inode, 1); |
87 | if (IS_ERR(handle)) { | 97 | if (IS_ERR(handle)) { |
@@ -109,6 +119,10 @@ flags_err: | |||
109 | 119 | ||
110 | if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) | 120 | if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) |
111 | err = ext4_change_inode_journal_flag(inode, jflag); | 121 | err = ext4_change_inode_journal_flag(inode, jflag); |
122 | if (err) | ||
123 | goto flags_out; | ||
124 | if (migrate) | ||
125 | err = ext4_ext_migrate(inode); | ||
112 | flags_out: | 126 | flags_out: |
113 | mutex_unlock(&inode->i_mutex); | 127 | mutex_unlock(&inode->i_mutex); |
114 | mnt_drop_write(filp->f_path.mnt); | 128 | mnt_drop_write(filp->f_path.mnt); |
@@ -175,53 +189,10 @@ setversion_out: | |||
175 | return ret; | 189 | return ret; |
176 | } | 190 | } |
177 | #endif | 191 | #endif |
178 | case EXT4_IOC_GETRSVSZ: | ||
179 | if (test_opt(inode->i_sb, RESERVATION) | ||
180 | && S_ISREG(inode->i_mode) | ||
181 | && ei->i_block_alloc_info) { | ||
182 | rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size; | ||
183 | return put_user(rsv_window_size, (int __user *)arg); | ||
184 | } | ||
185 | return -ENOTTY; | ||
186 | case EXT4_IOC_SETRSVSZ: { | ||
187 | int err; | ||
188 | |||
189 | if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) | ||
190 | return -ENOTTY; | ||
191 | |||
192 | if (!is_owner_or_cap(inode)) | ||
193 | return -EACCES; | ||
194 | |||
195 | if (get_user(rsv_window_size, (int __user *)arg)) | ||
196 | return -EFAULT; | ||
197 | |||
198 | err = mnt_want_write(filp->f_path.mnt); | ||
199 | if (err) | ||
200 | return err; | ||
201 | |||
202 | if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS) | ||
203 | rsv_window_size = EXT4_MAX_RESERVE_BLOCKS; | ||
204 | |||
205 | /* | ||
206 | * need to allocate reservation structure for this inode | ||
207 | * before set the window size | ||
208 | */ | ||
209 | down_write(&ei->i_data_sem); | ||
210 | if (!ei->i_block_alloc_info) | ||
211 | ext4_init_block_alloc_info(inode); | ||
212 | |||
213 | if (ei->i_block_alloc_info){ | ||
214 | struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; | ||
215 | rsv->rsv_goal_size = rsv_window_size; | ||
216 | } | ||
217 | up_write(&ei->i_data_sem); | ||
218 | mnt_drop_write(filp->f_path.mnt); | ||
219 | return 0; | ||
220 | } | ||
221 | case EXT4_IOC_GROUP_EXTEND: { | 192 | case EXT4_IOC_GROUP_EXTEND: { |
222 | ext4_fsblk_t n_blocks_count; | 193 | ext4_fsblk_t n_blocks_count; |
223 | struct super_block *sb = inode->i_sb; | 194 | struct super_block *sb = inode->i_sb; |
224 | int err; | 195 | int err, err2; |
225 | 196 | ||
226 | if (!capable(CAP_SYS_RESOURCE)) | 197 | if (!capable(CAP_SYS_RESOURCE)) |
227 | return -EPERM; | 198 | return -EPERM; |
@@ -235,8 +206,10 @@ setversion_out: | |||
235 | 206 | ||
236 | err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); | 207 | err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); |
237 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 208 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
238 | jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 209 | err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
239 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 210 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
211 | if (err == 0) | ||
212 | err = err2; | ||
240 | mnt_drop_write(filp->f_path.mnt); | 213 | mnt_drop_write(filp->f_path.mnt); |
241 | 214 | ||
242 | return err; | 215 | return err; |
@@ -244,7 +217,7 @@ setversion_out: | |||
244 | case EXT4_IOC_GROUP_ADD: { | 217 | case EXT4_IOC_GROUP_ADD: { |
245 | struct ext4_new_group_data input; | 218 | struct ext4_new_group_data input; |
246 | struct super_block *sb = inode->i_sb; | 219 | struct super_block *sb = inode->i_sb; |
247 | int err; | 220 | int err, err2; |
248 | 221 | ||
249 | if (!capable(CAP_SYS_RESOURCE)) | 222 | if (!capable(CAP_SYS_RESOURCE)) |
250 | return -EPERM; | 223 | return -EPERM; |
@@ -259,15 +232,36 @@ setversion_out: | |||
259 | 232 | ||
260 | err = ext4_group_add(sb, &input); | 233 | err = ext4_group_add(sb, &input); |
261 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 234 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
262 | jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 235 | err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
263 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 236 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
237 | if (err == 0) | ||
238 | err = err2; | ||
264 | mnt_drop_write(filp->f_path.mnt); | 239 | mnt_drop_write(filp->f_path.mnt); |
265 | 240 | ||
266 | return err; | 241 | return err; |
267 | } | 242 | } |
268 | 243 | ||
269 | case EXT4_IOC_MIGRATE: | 244 | case EXT4_IOC_MIGRATE: |
270 | return ext4_ext_migrate(inode, filp, cmd, arg); | 245 | { |
246 | int err; | ||
247 | if (!is_owner_or_cap(inode)) | ||
248 | return -EACCES; | ||
249 | |||
250 | err = mnt_want_write(filp->f_path.mnt); | ||
251 | if (err) | ||
252 | return err; | ||
253 | /* | ||
254 | * inode_mutex prevent write and truncate on the file. | ||
255 | * Read still goes through. We take i_data_sem in | ||
256 | * ext4_ext_swap_inode_data before we switch the | ||
257 | * inode format to prevent read. | ||
258 | */ | ||
259 | mutex_lock(&(inode->i_mutex)); | ||
260 | err = ext4_ext_migrate(inode); | ||
261 | mutex_unlock(&(inode->i_mutex)); | ||
262 | mnt_drop_write(filp->f_path.mnt); | ||
263 | return err; | ||
264 | } | ||
271 | 265 | ||
272 | default: | 266 | default: |
273 | return -ENOTTY; | 267 | return -ENOTTY; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 865e9ddb44d4..b580714f0d85 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -477,9 +477,10 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) | |||
477 | b2 = (unsigned char *) bitmap; | 477 | b2 = (unsigned char *) bitmap; |
478 | for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { | 478 | for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { |
479 | if (b1[i] != b2[i]) { | 479 | if (b1[i] != b2[i]) { |
480 | printk("corruption in group %lu at byte %u(%u):" | 480 | printk(KERN_ERR "corruption in group %lu " |
481 | " %x in copy != %x on disk/prealloc\n", | 481 | "at byte %u(%u): %x in copy != %x " |
482 | e4b->bd_group, i, i * 8, b1[i], b2[i]); | 482 | "on disk/prealloc\n", |
483 | e4b->bd_group, i, i * 8, b1[i], b2[i]); | ||
483 | BUG(); | 484 | BUG(); |
484 | } | 485 | } |
485 | } | 486 | } |
@@ -533,9 +534,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
533 | void *buddy; | 534 | void *buddy; |
534 | void *buddy2; | 535 | void *buddy2; |
535 | 536 | ||
536 | if (!test_opt(sb, MBALLOC)) | ||
537 | return 0; | ||
538 | |||
539 | { | 537 | { |
540 | static int mb_check_counter; | 538 | static int mb_check_counter; |
541 | if (mb_check_counter++ % 100 != 0) | 539 | if (mb_check_counter++ % 100 != 0) |
@@ -784,9 +782,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
784 | if (bh[i] == NULL) | 782 | if (bh[i] == NULL) |
785 | goto out; | 783 | goto out; |
786 | 784 | ||
787 | if (bh_uptodate_or_lock(bh[i])) | 785 | if (buffer_uptodate(bh[i]) && |
786 | !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) | ||
788 | continue; | 787 | continue; |
789 | 788 | ||
789 | lock_buffer(bh[i]); | ||
790 | spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); | 790 | spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); |
791 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 791 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
792 | ext4_init_block_bitmap(sb, bh[i], | 792 | ext4_init_block_bitmap(sb, bh[i], |
@@ -2169,9 +2169,10 @@ static void ext4_mb_history_release(struct super_block *sb) | |||
2169 | { | 2169 | { |
2170 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2170 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2171 | 2171 | ||
2172 | remove_proc_entry("mb_groups", sbi->s_mb_proc); | 2172 | if (sbi->s_proc != NULL) { |
2173 | remove_proc_entry("mb_history", sbi->s_mb_proc); | 2173 | remove_proc_entry("mb_groups", sbi->s_proc); |
2174 | 2174 | remove_proc_entry("mb_history", sbi->s_proc); | |
2175 | } | ||
2175 | kfree(sbi->s_mb_history); | 2176 | kfree(sbi->s_mb_history); |
2176 | } | 2177 | } |
2177 | 2178 | ||
@@ -2180,10 +2181,10 @@ static void ext4_mb_history_init(struct super_block *sb) | |||
2180 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2181 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2181 | int i; | 2182 | int i; |
2182 | 2183 | ||
2183 | if (sbi->s_mb_proc != NULL) { | 2184 | if (sbi->s_proc != NULL) { |
2184 | proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc, | 2185 | proc_create_data("mb_history", S_IRUGO, sbi->s_proc, |
2185 | &ext4_mb_seq_history_fops, sb); | 2186 | &ext4_mb_seq_history_fops, sb); |
2186 | proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc, | 2187 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, |
2187 | &ext4_mb_seq_groups_fops, sb); | 2188 | &ext4_mb_seq_groups_fops, sb); |
2188 | } | 2189 | } |
2189 | 2190 | ||
@@ -2485,19 +2486,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2485 | unsigned max; | 2486 | unsigned max; |
2486 | int ret; | 2487 | int ret; |
2487 | 2488 | ||
2488 | if (!test_opt(sb, MBALLOC)) | ||
2489 | return 0; | ||
2490 | |||
2491 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); | 2489 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); |
2492 | 2490 | ||
2493 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); | 2491 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); |
2494 | if (sbi->s_mb_offsets == NULL) { | 2492 | if (sbi->s_mb_offsets == NULL) { |
2495 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2496 | return -ENOMEM; | 2493 | return -ENOMEM; |
2497 | } | 2494 | } |
2498 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | 2495 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); |
2499 | if (sbi->s_mb_maxs == NULL) { | 2496 | if (sbi->s_mb_maxs == NULL) { |
2500 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2501 | kfree(sbi->s_mb_maxs); | 2497 | kfree(sbi->s_mb_maxs); |
2502 | return -ENOMEM; | 2498 | return -ENOMEM; |
2503 | } | 2499 | } |
@@ -2520,7 +2516,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2520 | /* init file for buddy data */ | 2516 | /* init file for buddy data */ |
2521 | ret = ext4_mb_init_backend(sb); | 2517 | ret = ext4_mb_init_backend(sb); |
2522 | if (ret != 0) { | 2518 | if (ret != 0) { |
2523 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2524 | kfree(sbi->s_mb_offsets); | 2519 | kfree(sbi->s_mb_offsets); |
2525 | kfree(sbi->s_mb_maxs); | 2520 | kfree(sbi->s_mb_maxs); |
2526 | return ret; | 2521 | return ret; |
@@ -2540,17 +2535,15 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2540 | sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; | 2535 | sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; |
2541 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; | 2536 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; |
2542 | 2537 | ||
2543 | i = sizeof(struct ext4_locality_group) * nr_cpu_ids; | 2538 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); |
2544 | sbi->s_locality_groups = kmalloc(i, GFP_KERNEL); | ||
2545 | if (sbi->s_locality_groups == NULL) { | 2539 | if (sbi->s_locality_groups == NULL) { |
2546 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2547 | kfree(sbi->s_mb_offsets); | 2540 | kfree(sbi->s_mb_offsets); |
2548 | kfree(sbi->s_mb_maxs); | 2541 | kfree(sbi->s_mb_maxs); |
2549 | return -ENOMEM; | 2542 | return -ENOMEM; |
2550 | } | 2543 | } |
2551 | for (i = 0; i < nr_cpu_ids; i++) { | 2544 | for_each_possible_cpu(i) { |
2552 | struct ext4_locality_group *lg; | 2545 | struct ext4_locality_group *lg; |
2553 | lg = &sbi->s_locality_groups[i]; | 2546 | lg = per_cpu_ptr(sbi->s_locality_groups, i); |
2554 | mutex_init(&lg->lg_mutex); | 2547 | mutex_init(&lg->lg_mutex); |
2555 | for (j = 0; j < PREALLOC_TB_SIZE; j++) | 2548 | for (j = 0; j < PREALLOC_TB_SIZE; j++) |
2556 | INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); | 2549 | INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); |
@@ -2560,7 +2553,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2560 | ext4_mb_init_per_dev_proc(sb); | 2553 | ext4_mb_init_per_dev_proc(sb); |
2561 | ext4_mb_history_init(sb); | 2554 | ext4_mb_history_init(sb); |
2562 | 2555 | ||
2563 | printk("EXT4-fs: mballoc enabled\n"); | 2556 | printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); |
2564 | return 0; | 2557 | return 0; |
2565 | } | 2558 | } |
2566 | 2559 | ||
@@ -2589,9 +2582,6 @@ int ext4_mb_release(struct super_block *sb) | |||
2589 | struct ext4_group_info *grinfo; | 2582 | struct ext4_group_info *grinfo; |
2590 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2583 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2591 | 2584 | ||
2592 | if (!test_opt(sb, MBALLOC)) | ||
2593 | return 0; | ||
2594 | |||
2595 | /* release freed, non-committed blocks */ | 2585 | /* release freed, non-committed blocks */ |
2596 | spin_lock(&sbi->s_md_lock); | 2586 | spin_lock(&sbi->s_md_lock); |
2597 | list_splice_init(&sbi->s_closed_transaction, | 2587 | list_splice_init(&sbi->s_closed_transaction, |
@@ -2647,8 +2637,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2647 | atomic_read(&sbi->s_mb_discarded)); | 2637 | atomic_read(&sbi->s_mb_discarded)); |
2648 | } | 2638 | } |
2649 | 2639 | ||
2650 | kfree(sbi->s_locality_groups); | 2640 | free_percpu(sbi->s_locality_groups); |
2651 | |||
2652 | ext4_mb_history_release(sb); | 2641 | ext4_mb_history_release(sb); |
2653 | ext4_mb_destroy_per_dev_proc(sb); | 2642 | ext4_mb_destroy_per_dev_proc(sb); |
2654 | 2643 | ||
@@ -2721,118 +2710,46 @@ ext4_mb_free_committed_blocks(struct super_block *sb) | |||
2721 | #define EXT4_MB_STREAM_REQ "stream_req" | 2710 | #define EXT4_MB_STREAM_REQ "stream_req" |
2722 | #define EXT4_MB_GROUP_PREALLOC "group_prealloc" | 2711 | #define EXT4_MB_GROUP_PREALLOC "group_prealloc" |
2723 | 2712 | ||
2724 | |||
2725 | |||
2726 | #define MB_PROC_FOPS(name) \ | ||
2727 | static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \ | ||
2728 | { \ | ||
2729 | struct ext4_sb_info *sbi = m->private; \ | ||
2730 | \ | ||
2731 | seq_printf(m, "%ld\n", sbi->s_mb_##name); \ | ||
2732 | return 0; \ | ||
2733 | } \ | ||
2734 | \ | ||
2735 | static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\ | ||
2736 | { \ | ||
2737 | return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\ | ||
2738 | } \ | ||
2739 | \ | ||
2740 | static ssize_t ext4_mb_##name##_proc_write(struct file *file, \ | ||
2741 | const char __user *buf, size_t cnt, loff_t *ppos) \ | ||
2742 | { \ | ||
2743 | struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\ | ||
2744 | char str[32]; \ | ||
2745 | long value; \ | ||
2746 | if (cnt >= sizeof(str)) \ | ||
2747 | return -EINVAL; \ | ||
2748 | if (copy_from_user(str, buf, cnt)) \ | ||
2749 | return -EFAULT; \ | ||
2750 | value = simple_strtol(str, NULL, 0); \ | ||
2751 | if (value <= 0) \ | ||
2752 | return -ERANGE; \ | ||
2753 | sbi->s_mb_##name = value; \ | ||
2754 | return cnt; \ | ||
2755 | } \ | ||
2756 | \ | ||
2757 | static const struct file_operations ext4_mb_##name##_proc_fops = { \ | ||
2758 | .owner = THIS_MODULE, \ | ||
2759 | .open = ext4_mb_##name##_proc_open, \ | ||
2760 | .read = seq_read, \ | ||
2761 | .llseek = seq_lseek, \ | ||
2762 | .release = single_release, \ | ||
2763 | .write = ext4_mb_##name##_proc_write, \ | ||
2764 | }; | ||
2765 | |||
2766 | MB_PROC_FOPS(stats); | ||
2767 | MB_PROC_FOPS(max_to_scan); | ||
2768 | MB_PROC_FOPS(min_to_scan); | ||
2769 | MB_PROC_FOPS(order2_reqs); | ||
2770 | MB_PROC_FOPS(stream_request); | ||
2771 | MB_PROC_FOPS(group_prealloc); | ||
2772 | |||
2773 | #define MB_PROC_HANDLER(name, var) \ | ||
2774 | do { \ | ||
2775 | proc = proc_create_data(name, mode, sbi->s_mb_proc, \ | ||
2776 | &ext4_mb_##var##_proc_fops, sbi); \ | ||
2777 | if (proc == NULL) { \ | ||
2778 | printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \ | ||
2779 | goto err_out; \ | ||
2780 | } \ | ||
2781 | } while (0) | ||
2782 | |||
2783 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) | 2713 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) |
2784 | { | 2714 | { |
2785 | mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; | 2715 | mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; |
2786 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2716 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2787 | struct proc_dir_entry *proc; | 2717 | struct proc_dir_entry *proc; |
2788 | char devname[64]; | ||
2789 | 2718 | ||
2790 | if (proc_root_ext4 == NULL) { | 2719 | if (sbi->s_proc == NULL) |
2791 | sbi->s_mb_proc = NULL; | ||
2792 | return -EINVAL; | 2720 | return -EINVAL; |
2793 | } | ||
2794 | bdevname(sb->s_bdev, devname); | ||
2795 | sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); | ||
2796 | |||
2797 | MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats); | ||
2798 | MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan); | ||
2799 | MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan); | ||
2800 | MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs); | ||
2801 | MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request); | ||
2802 | MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc); | ||
2803 | 2721 | ||
2722 | EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats); | ||
2723 | EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan); | ||
2724 | EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan); | ||
2725 | EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs); | ||
2726 | EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request); | ||
2727 | EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc); | ||
2804 | return 0; | 2728 | return 0; |
2805 | 2729 | ||
2806 | err_out: | 2730 | err_out: |
2807 | printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname); | 2731 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); |
2808 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); | 2732 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); |
2809 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); | 2733 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); |
2810 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); | 2734 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); |
2811 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); | 2735 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); |
2812 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); | 2736 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); |
2813 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); | ||
2814 | remove_proc_entry(devname, proc_root_ext4); | ||
2815 | sbi->s_mb_proc = NULL; | ||
2816 | |||
2817 | return -ENOMEM; | 2737 | return -ENOMEM; |
2818 | } | 2738 | } |
2819 | 2739 | ||
2820 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) | 2740 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) |
2821 | { | 2741 | { |
2822 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2742 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2823 | char devname[64]; | ||
2824 | 2743 | ||
2825 | if (sbi->s_mb_proc == NULL) | 2744 | if (sbi->s_proc == NULL) |
2826 | return -EINVAL; | 2745 | return -EINVAL; |
2827 | 2746 | ||
2828 | bdevname(sb->s_bdev, devname); | 2747 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); |
2829 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); | 2748 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); |
2830 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); | 2749 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); |
2831 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); | 2750 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); |
2832 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); | 2751 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); |
2833 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); | 2752 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); |
2834 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); | ||
2835 | remove_proc_entry(devname, proc_root_ext4); | ||
2836 | 2753 | ||
2837 | return 0; | 2754 | return 0; |
2838 | } | 2755 | } |
@@ -2854,11 +2771,6 @@ int __init init_ext4_mballoc(void) | |||
2854 | kmem_cache_destroy(ext4_pspace_cachep); | 2771 | kmem_cache_destroy(ext4_pspace_cachep); |
2855 | return -ENOMEM; | 2772 | return -ENOMEM; |
2856 | } | 2773 | } |
2857 | #ifdef CONFIG_PROC_FS | ||
2858 | proc_root_ext4 = proc_mkdir("fs/ext4", NULL); | ||
2859 | if (proc_root_ext4 == NULL) | ||
2860 | printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n"); | ||
2861 | #endif | ||
2862 | return 0; | 2774 | return 0; |
2863 | } | 2775 | } |
2864 | 2776 | ||
@@ -2867,9 +2779,6 @@ void exit_ext4_mballoc(void) | |||
2867 | /* XXX: synchronize_rcu(); */ | 2779 | /* XXX: synchronize_rcu(); */ |
2868 | kmem_cache_destroy(ext4_pspace_cachep); | 2780 | kmem_cache_destroy(ext4_pspace_cachep); |
2869 | kmem_cache_destroy(ext4_ac_cachep); | 2781 | kmem_cache_destroy(ext4_ac_cachep); |
2870 | #ifdef CONFIG_PROC_FS | ||
2871 | remove_proc_entry("fs/ext4", NULL); | ||
2872 | #endif | ||
2873 | } | 2782 | } |
2874 | 2783 | ||
2875 | 2784 | ||
@@ -2879,7 +2788,7 @@ void exit_ext4_mballoc(void) | |||
2879 | */ | 2788 | */ |
2880 | static noinline_for_stack int | 2789 | static noinline_for_stack int |
2881 | ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | 2790 | ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, |
2882 | handle_t *handle) | 2791 | handle_t *handle, unsigned long reserv_blks) |
2883 | { | 2792 | { |
2884 | struct buffer_head *bitmap_bh = NULL; | 2793 | struct buffer_head *bitmap_bh = NULL; |
2885 | struct ext4_super_block *es; | 2794 | struct ext4_super_block *es; |
@@ -2968,15 +2877,16 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2968 | le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); | 2877 | le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); |
2969 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); | 2878 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); |
2970 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); | 2879 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); |
2971 | 2880 | percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); | |
2972 | /* | 2881 | /* |
2973 | * free blocks account has already be reduced/reserved | 2882 | * Now reduce the dirty block count also. Should not go negative |
2974 | * at write_begin() time for delayed allocation | ||
2975 | * do not double accounting | ||
2976 | */ | 2883 | */ |
2977 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) | 2884 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) |
2978 | percpu_counter_sub(&sbi->s_freeblocks_counter, | 2885 | /* release all the reserved blocks if non delalloc */ |
2979 | ac->ac_b_ex.fe_len); | 2886 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); |
2887 | else | ||
2888 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | ||
2889 | ac->ac_b_ex.fe_len); | ||
2980 | 2890 | ||
2981 | if (sbi->s_log_groups_per_flex) { | 2891 | if (sbi->s_log_groups_per_flex) { |
2982 | ext4_group_t flex_group = ext4_flex_group(sbi, | 2892 | ext4_group_t flex_group = ext4_flex_group(sbi, |
@@ -3282,6 +3192,35 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, | |||
3282 | } | 3192 | } |
3283 | 3193 | ||
3284 | /* | 3194 | /* |
3195 | * Return the prealloc space that have minimal distance | ||
3196 | * from the goal block. @cpa is the prealloc | ||
3197 | * space that is having currently known minimal distance | ||
3198 | * from the goal block. | ||
3199 | */ | ||
3200 | static struct ext4_prealloc_space * | ||
3201 | ext4_mb_check_group_pa(ext4_fsblk_t goal_block, | ||
3202 | struct ext4_prealloc_space *pa, | ||
3203 | struct ext4_prealloc_space *cpa) | ||
3204 | { | ||
3205 | ext4_fsblk_t cur_distance, new_distance; | ||
3206 | |||
3207 | if (cpa == NULL) { | ||
3208 | atomic_inc(&pa->pa_count); | ||
3209 | return pa; | ||
3210 | } | ||
3211 | cur_distance = abs(goal_block - cpa->pa_pstart); | ||
3212 | new_distance = abs(goal_block - pa->pa_pstart); | ||
3213 | |||
3214 | if (cur_distance < new_distance) | ||
3215 | return cpa; | ||
3216 | |||
3217 | /* drop the previous reference */ | ||
3218 | atomic_dec(&cpa->pa_count); | ||
3219 | atomic_inc(&pa->pa_count); | ||
3220 | return pa; | ||
3221 | } | ||
3222 | |||
3223 | /* | ||
3285 | * search goal blocks in preallocated space | 3224 | * search goal blocks in preallocated space |
3286 | */ | 3225 | */ |
3287 | static noinline_for_stack int | 3226 | static noinline_for_stack int |
@@ -3290,7 +3229,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3290 | int order, i; | 3229 | int order, i; |
3291 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); | 3230 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); |
3292 | struct ext4_locality_group *lg; | 3231 | struct ext4_locality_group *lg; |
3293 | struct ext4_prealloc_space *pa; | 3232 | struct ext4_prealloc_space *pa, *cpa = NULL; |
3233 | ext4_fsblk_t goal_block; | ||
3294 | 3234 | ||
3295 | /* only data can be preallocated */ | 3235 | /* only data can be preallocated */ |
3296 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) | 3236 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) |
@@ -3333,6 +3273,13 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3333 | /* The max size of hash table is PREALLOC_TB_SIZE */ | 3273 | /* The max size of hash table is PREALLOC_TB_SIZE */ |
3334 | order = PREALLOC_TB_SIZE - 1; | 3274 | order = PREALLOC_TB_SIZE - 1; |
3335 | 3275 | ||
3276 | goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) + | ||
3277 | ac->ac_g_ex.fe_start + | ||
3278 | le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block); | ||
3279 | /* | ||
3280 | * search for the prealloc space that is having | ||
3281 | * minimal distance from the goal block. | ||
3282 | */ | ||
3336 | for (i = order; i < PREALLOC_TB_SIZE; i++) { | 3283 | for (i = order; i < PREALLOC_TB_SIZE; i++) { |
3337 | rcu_read_lock(); | 3284 | rcu_read_lock(); |
3338 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], | 3285 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], |
@@ -3340,17 +3287,19 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3340 | spin_lock(&pa->pa_lock); | 3287 | spin_lock(&pa->pa_lock); |
3341 | if (pa->pa_deleted == 0 && | 3288 | if (pa->pa_deleted == 0 && |
3342 | pa->pa_free >= ac->ac_o_ex.fe_len) { | 3289 | pa->pa_free >= ac->ac_o_ex.fe_len) { |
3343 | atomic_inc(&pa->pa_count); | 3290 | |
3344 | ext4_mb_use_group_pa(ac, pa); | 3291 | cpa = ext4_mb_check_group_pa(goal_block, |
3345 | spin_unlock(&pa->pa_lock); | 3292 | pa, cpa); |
3346 | ac->ac_criteria = 20; | ||
3347 | rcu_read_unlock(); | ||
3348 | return 1; | ||
3349 | } | 3293 | } |
3350 | spin_unlock(&pa->pa_lock); | 3294 | spin_unlock(&pa->pa_lock); |
3351 | } | 3295 | } |
3352 | rcu_read_unlock(); | 3296 | rcu_read_unlock(); |
3353 | } | 3297 | } |
3298 | if (cpa) { | ||
3299 | ext4_mb_use_group_pa(ac, cpa); | ||
3300 | ac->ac_criteria = 20; | ||
3301 | return 1; | ||
3302 | } | ||
3354 | return 0; | 3303 | return 0; |
3355 | } | 3304 | } |
3356 | 3305 | ||
@@ -3845,7 +3794,7 @@ out: | |||
3845 | * | 3794 | * |
3846 | * FIXME!! Make sure it is valid at all the call sites | 3795 | * FIXME!! Make sure it is valid at all the call sites |
3847 | */ | 3796 | */ |
3848 | void ext4_mb_discard_inode_preallocations(struct inode *inode) | 3797 | void ext4_discard_preallocations(struct inode *inode) |
3849 | { | 3798 | { |
3850 | struct ext4_inode_info *ei = EXT4_I(inode); | 3799 | struct ext4_inode_info *ei = EXT4_I(inode); |
3851 | struct super_block *sb = inode->i_sb; | 3800 | struct super_block *sb = inode->i_sb; |
@@ -3857,7 +3806,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode) | |||
3857 | struct ext4_buddy e4b; | 3806 | struct ext4_buddy e4b; |
3858 | int err; | 3807 | int err; |
3859 | 3808 | ||
3860 | if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) { | 3809 | if (!S_ISREG(inode->i_mode)) { |
3861 | /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ | 3810 | /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ |
3862 | return; | 3811 | return; |
3863 | } | 3812 | } |
@@ -4055,8 +4004,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | |||
4055 | * per cpu locality group is to reduce the contention between block | 4004 | * per cpu locality group is to reduce the contention between block |
4056 | * request from multiple CPUs. | 4005 | * request from multiple CPUs. |
4057 | */ | 4006 | */ |
4058 | ac->ac_lg = &sbi->s_locality_groups[get_cpu()]; | 4007 | ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id()); |
4059 | put_cpu(); | ||
4060 | 4008 | ||
4061 | /* we're going to use group allocation */ | 4009 | /* we're going to use group allocation */ |
4062 | ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; | 4010 | ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; |
@@ -4330,33 +4278,32 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) | |||
4330 | ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | 4278 | ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, |
4331 | struct ext4_allocation_request *ar, int *errp) | 4279 | struct ext4_allocation_request *ar, int *errp) |
4332 | { | 4280 | { |
4281 | int freed; | ||
4333 | struct ext4_allocation_context *ac = NULL; | 4282 | struct ext4_allocation_context *ac = NULL; |
4334 | struct ext4_sb_info *sbi; | 4283 | struct ext4_sb_info *sbi; |
4335 | struct super_block *sb; | 4284 | struct super_block *sb; |
4336 | ext4_fsblk_t block = 0; | 4285 | ext4_fsblk_t block = 0; |
4337 | int freed; | 4286 | unsigned long inquota; |
4338 | int inquota; | 4287 | unsigned long reserv_blks = 0; |
4339 | 4288 | ||
4340 | sb = ar->inode->i_sb; | 4289 | sb = ar->inode->i_sb; |
4341 | sbi = EXT4_SB(sb); | 4290 | sbi = EXT4_SB(sb); |
4342 | 4291 | ||
4343 | if (!test_opt(sb, MBALLOC)) { | ||
4344 | block = ext4_old_new_blocks(handle, ar->inode, ar->goal, | ||
4345 | &(ar->len), errp); | ||
4346 | return block; | ||
4347 | } | ||
4348 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { | 4292 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { |
4349 | /* | 4293 | /* |
4350 | * With delalloc we already reserved the blocks | 4294 | * With delalloc we already reserved the blocks |
4351 | */ | 4295 | */ |
4352 | ar->len = ext4_has_free_blocks(sbi, ar->len); | 4296 | while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) { |
4353 | } | 4297 | /* let others to free the space */ |
4354 | 4298 | yield(); | |
4355 | if (ar->len == 0) { | 4299 | ar->len = ar->len >> 1; |
4356 | *errp = -ENOSPC; | 4300 | } |
4357 | return 0; | 4301 | if (!ar->len) { |
4302 | *errp = -ENOSPC; | ||
4303 | return 0; | ||
4304 | } | ||
4305 | reserv_blks = ar->len; | ||
4358 | } | 4306 | } |
4359 | |||
4360 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { | 4307 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { |
4361 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | 4308 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; |
4362 | ar->len--; | 4309 | ar->len--; |
@@ -4402,7 +4349,7 @@ repeat: | |||
4402 | } | 4349 | } |
4403 | 4350 | ||
4404 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { | 4351 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { |
4405 | *errp = ext4_mb_mark_diskspace_used(ac, handle); | 4352 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); |
4406 | if (*errp == -EAGAIN) { | 4353 | if (*errp == -EAGAIN) { |
4407 | ac->ac_b_ex.fe_group = 0; | 4354 | ac->ac_b_ex.fe_group = 0; |
4408 | ac->ac_b_ex.fe_start = 0; | 4355 | ac->ac_b_ex.fe_start = 0; |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index c7c9906c2a75..b3b4828f8b89 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -257,7 +257,6 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac); | |||
257 | 257 | ||
258 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | 258 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) |
259 | 259 | ||
260 | static struct proc_dir_entry *proc_root_ext4; | ||
261 | struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); | 260 | struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); |
262 | 261 | ||
263 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | 262 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index b9e077ba07e9..f2a9cf498ecd 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -53,7 +53,8 @@ static int finish_range(handle_t *handle, struct inode *inode, | |||
53 | * credit. But below we try to not accumalate too much | 53 | * credit. But below we try to not accumalate too much |
54 | * of them by restarting the journal. | 54 | * of them by restarting the journal. |
55 | */ | 55 | */ |
56 | needed = ext4_ext_calc_credits_for_insert(inode, path); | 56 | needed = ext4_ext_calc_credits_for_single_extent(inode, |
57 | lb->last_block - lb->first_block + 1, path); | ||
57 | 58 | ||
58 | /* | 59 | /* |
59 | * Make sure the credit we accumalated is not really high | 60 | * Make sure the credit we accumalated is not really high |
@@ -446,8 +447,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode) | |||
446 | 447 | ||
447 | } | 448 | } |
448 | 449 | ||
449 | int ext4_ext_migrate(struct inode *inode, struct file *filp, | 450 | int ext4_ext_migrate(struct inode *inode) |
450 | unsigned int cmd, unsigned long arg) | ||
451 | { | 451 | { |
452 | handle_t *handle; | 452 | handle_t *handle; |
453 | int retval = 0, i; | 453 | int retval = 0, i; |
@@ -515,12 +515,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, | |||
515 | * when we add extents we extent the journal | 515 | * when we add extents we extent the journal |
516 | */ | 516 | */ |
517 | /* | 517 | /* |
518 | * inode_mutex prevent write and truncate on the file. Read still goes | ||
519 | * through. We take i_data_sem in ext4_ext_swap_inode_data before we | ||
520 | * switch the inode format to prevent read. | ||
521 | */ | ||
522 | mutex_lock(&(inode->i_mutex)); | ||
523 | /* | ||
524 | * Even though we take i_mutex we can still cause block allocation | 518 | * Even though we take i_mutex we can still cause block allocation |
525 | * via mmap write to holes. If we have allocated new blocks we fail | 519 | * via mmap write to holes. If we have allocated new blocks we fail |
526 | * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. | 520 | * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. |
@@ -622,7 +616,6 @@ err_out: | |||
622 | tmp_inode->i_nlink = 0; | 616 | tmp_inode->i_nlink = 0; |
623 | 617 | ||
624 | ext4_journal_stop(handle); | 618 | ext4_journal_stop(handle); |
625 | mutex_unlock(&(inode->i_mutex)); | ||
626 | 619 | ||
627 | if (tmp_inode) | 620 | if (tmp_inode) |
628 | iput(tmp_inode); | 621 | iput(tmp_inode); |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 387ad98350c3..92db9e945147 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -151,34 +151,36 @@ struct dx_map_entry | |||
151 | 151 | ||
152 | static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); | 152 | static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); |
153 | static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); | 153 | static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); |
154 | static inline unsigned dx_get_hash (struct dx_entry *entry); | 154 | static inline unsigned dx_get_hash(struct dx_entry *entry); |
155 | static void dx_set_hash (struct dx_entry *entry, unsigned value); | 155 | static void dx_set_hash(struct dx_entry *entry, unsigned value); |
156 | static unsigned dx_get_count (struct dx_entry *entries); | 156 | static unsigned dx_get_count(struct dx_entry *entries); |
157 | static unsigned dx_get_limit (struct dx_entry *entries); | 157 | static unsigned dx_get_limit(struct dx_entry *entries); |
158 | static void dx_set_count (struct dx_entry *entries, unsigned value); | 158 | static void dx_set_count(struct dx_entry *entries, unsigned value); |
159 | static void dx_set_limit (struct dx_entry *entries, unsigned value); | 159 | static void dx_set_limit(struct dx_entry *entries, unsigned value); |
160 | static unsigned dx_root_limit (struct inode *dir, unsigned infosize); | 160 | static unsigned dx_root_limit(struct inode *dir, unsigned infosize); |
161 | static unsigned dx_node_limit (struct inode *dir); | 161 | static unsigned dx_node_limit(struct inode *dir); |
162 | static struct dx_frame *dx_probe(struct dentry *dentry, | 162 | static struct dx_frame *dx_probe(const struct qstr *d_name, |
163 | struct inode *dir, | 163 | struct inode *dir, |
164 | struct dx_hash_info *hinfo, | 164 | struct dx_hash_info *hinfo, |
165 | struct dx_frame *frame, | 165 | struct dx_frame *frame, |
166 | int *err); | 166 | int *err); |
167 | static void dx_release (struct dx_frame *frames); | 167 | static void dx_release(struct dx_frame *frames); |
168 | static int dx_make_map (struct ext4_dir_entry_2 *de, int size, | 168 | static int dx_make_map(struct ext4_dir_entry_2 *de, int size, |
169 | struct dx_hash_info *hinfo, struct dx_map_entry map[]); | 169 | struct dx_hash_info *hinfo, struct dx_map_entry map[]); |
170 | static void dx_sort_map(struct dx_map_entry *map, unsigned count); | 170 | static void dx_sort_map(struct dx_map_entry *map, unsigned count); |
171 | static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to, | 171 | static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to, |
172 | struct dx_map_entry *offsets, int count); | 172 | struct dx_map_entry *offsets, int count); |
173 | static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size); | 173 | static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size); |
174 | static void dx_insert_block(struct dx_frame *frame, | 174 | static void dx_insert_block(struct dx_frame *frame, |
175 | u32 hash, ext4_lblk_t block); | 175 | u32 hash, ext4_lblk_t block); |
176 | static int ext4_htree_next_block(struct inode *dir, __u32 hash, | 176 | static int ext4_htree_next_block(struct inode *dir, __u32 hash, |
177 | struct dx_frame *frame, | 177 | struct dx_frame *frame, |
178 | struct dx_frame *frames, | 178 | struct dx_frame *frames, |
179 | __u32 *start_hash); | 179 | __u32 *start_hash); |
180 | static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | 180 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, |
181 | struct ext4_dir_entry_2 **res_dir, int *err); | 181 | const struct qstr *d_name, |
182 | struct ext4_dir_entry_2 **res_dir, | ||
183 | int *err); | ||
182 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | 184 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, |
183 | struct inode *inode); | 185 | struct inode *inode); |
184 | 186 | ||
@@ -207,44 +209,44 @@ static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value) | |||
207 | entry->block = cpu_to_le32(value); | 209 | entry->block = cpu_to_le32(value); |
208 | } | 210 | } |
209 | 211 | ||
210 | static inline unsigned dx_get_hash (struct dx_entry *entry) | 212 | static inline unsigned dx_get_hash(struct dx_entry *entry) |
211 | { | 213 | { |
212 | return le32_to_cpu(entry->hash); | 214 | return le32_to_cpu(entry->hash); |
213 | } | 215 | } |
214 | 216 | ||
215 | static inline void dx_set_hash (struct dx_entry *entry, unsigned value) | 217 | static inline void dx_set_hash(struct dx_entry *entry, unsigned value) |
216 | { | 218 | { |
217 | entry->hash = cpu_to_le32(value); | 219 | entry->hash = cpu_to_le32(value); |
218 | } | 220 | } |
219 | 221 | ||
220 | static inline unsigned dx_get_count (struct dx_entry *entries) | 222 | static inline unsigned dx_get_count(struct dx_entry *entries) |
221 | { | 223 | { |
222 | return le16_to_cpu(((struct dx_countlimit *) entries)->count); | 224 | return le16_to_cpu(((struct dx_countlimit *) entries)->count); |
223 | } | 225 | } |
224 | 226 | ||
225 | static inline unsigned dx_get_limit (struct dx_entry *entries) | 227 | static inline unsigned dx_get_limit(struct dx_entry *entries) |
226 | { | 228 | { |
227 | return le16_to_cpu(((struct dx_countlimit *) entries)->limit); | 229 | return le16_to_cpu(((struct dx_countlimit *) entries)->limit); |
228 | } | 230 | } |
229 | 231 | ||
230 | static inline void dx_set_count (struct dx_entry *entries, unsigned value) | 232 | static inline void dx_set_count(struct dx_entry *entries, unsigned value) |
231 | { | 233 | { |
232 | ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); | 234 | ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); |
233 | } | 235 | } |
234 | 236 | ||
235 | static inline void dx_set_limit (struct dx_entry *entries, unsigned value) | 237 | static inline void dx_set_limit(struct dx_entry *entries, unsigned value) |
236 | { | 238 | { |
237 | ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); | 239 | ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); |
238 | } | 240 | } |
239 | 241 | ||
240 | static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) | 242 | static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) |
241 | { | 243 | { |
242 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - | 244 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - |
243 | EXT4_DIR_REC_LEN(2) - infosize; | 245 | EXT4_DIR_REC_LEN(2) - infosize; |
244 | return entry_space / sizeof(struct dx_entry); | 246 | return entry_space / sizeof(struct dx_entry); |
245 | } | 247 | } |
246 | 248 | ||
247 | static inline unsigned dx_node_limit (struct inode *dir) | 249 | static inline unsigned dx_node_limit(struct inode *dir) |
248 | { | 250 | { |
249 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); | 251 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); |
250 | return entry_space / sizeof(struct dx_entry); | 252 | return entry_space / sizeof(struct dx_entry); |
@@ -254,12 +256,12 @@ static inline unsigned dx_node_limit (struct inode *dir) | |||
254 | * Debug | 256 | * Debug |
255 | */ | 257 | */ |
256 | #ifdef DX_DEBUG | 258 | #ifdef DX_DEBUG |
257 | static void dx_show_index (char * label, struct dx_entry *entries) | 259 | static void dx_show_index(char * label, struct dx_entry *entries) |
258 | { | 260 | { |
259 | int i, n = dx_get_count (entries); | 261 | int i, n = dx_get_count (entries); |
260 | printk("%s index ", label); | 262 | printk(KERN_DEBUG "%s index ", label); |
261 | for (i = 0; i < n; i++) { | 263 | for (i = 0; i < n; i++) { |
262 | printk("%x->%lu ", i? dx_get_hash(entries + i) : | 264 | printk("%x->%lu ", i ? dx_get_hash(entries + i) : |
263 | 0, (unsigned long)dx_get_block(entries + i)); | 265 | 0, (unsigned long)dx_get_block(entries + i)); |
264 | } | 266 | } |
265 | printk("\n"); | 267 | printk("\n"); |
@@ -306,7 +308,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
306 | struct dx_entry *entries, int levels) | 308 | struct dx_entry *entries, int levels) |
307 | { | 309 | { |
308 | unsigned blocksize = dir->i_sb->s_blocksize; | 310 | unsigned blocksize = dir->i_sb->s_blocksize; |
309 | unsigned count = dx_get_count (entries), names = 0, space = 0, i; | 311 | unsigned count = dx_get_count(entries), names = 0, space = 0, i; |
310 | unsigned bcount = 0; | 312 | unsigned bcount = 0; |
311 | struct buffer_head *bh; | 313 | struct buffer_head *bh; |
312 | int err; | 314 | int err; |
@@ -325,11 +327,12 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
325 | names += stats.names; | 327 | names += stats.names; |
326 | space += stats.space; | 328 | space += stats.space; |
327 | bcount += stats.bcount; | 329 | bcount += stats.bcount; |
328 | brelse (bh); | 330 | brelse(bh); |
329 | } | 331 | } |
330 | if (bcount) | 332 | if (bcount) |
331 | printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", | 333 | printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", |
332 | names, space/bcount,(space/bcount)*100/blocksize); | 334 | levels ? "" : " ", names, space/bcount, |
335 | (space/bcount)*100/blocksize); | ||
333 | return (struct stats) { names, space, bcount}; | 336 | return (struct stats) { names, space, bcount}; |
334 | } | 337 | } |
335 | #endif /* DX_DEBUG */ | 338 | #endif /* DX_DEBUG */ |
@@ -344,7 +347,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
344 | * back to userspace. | 347 | * back to userspace. |
345 | */ | 348 | */ |
346 | static struct dx_frame * | 349 | static struct dx_frame * |
347 | dx_probe(struct dentry *dentry, struct inode *dir, | 350 | dx_probe(const struct qstr *d_name, struct inode *dir, |
348 | struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) | 351 | struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) |
349 | { | 352 | { |
350 | unsigned count, indirect; | 353 | unsigned count, indirect; |
@@ -355,8 +358,6 @@ dx_probe(struct dentry *dentry, struct inode *dir, | |||
355 | u32 hash; | 358 | u32 hash; |
356 | 359 | ||
357 | frame->bh = NULL; | 360 | frame->bh = NULL; |
358 | if (dentry) | ||
359 | dir = dentry->d_parent->d_inode; | ||
360 | if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) | 361 | if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) |
361 | goto fail; | 362 | goto fail; |
362 | root = (struct dx_root *) bh->b_data; | 363 | root = (struct dx_root *) bh->b_data; |
@@ -372,8 +373,8 @@ dx_probe(struct dentry *dentry, struct inode *dir, | |||
372 | } | 373 | } |
373 | hinfo->hash_version = root->info.hash_version; | 374 | hinfo->hash_version = root->info.hash_version; |
374 | hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; | 375 | hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; |
375 | if (dentry) | 376 | if (d_name) |
376 | ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); | 377 | ext4fs_dirhash(d_name->name, d_name->len, hinfo); |
377 | hash = hinfo->hash; | 378 | hash = hinfo->hash; |
378 | 379 | ||
379 | if (root->info.unused_flags & 1) { | 380 | if (root->info.unused_flags & 1) { |
@@ -406,7 +407,7 @@ dx_probe(struct dentry *dentry, struct inode *dir, | |||
406 | goto fail; | 407 | goto fail; |
407 | } | 408 | } |
408 | 409 | ||
409 | dxtrace (printk("Look up %x", hash)); | 410 | dxtrace(printk("Look up %x", hash)); |
410 | while (1) | 411 | while (1) |
411 | { | 412 | { |
412 | count = dx_get_count(entries); | 413 | count = dx_get_count(entries); |
@@ -555,7 +556,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, | |||
555 | 0, &err))) | 556 | 0, &err))) |
556 | return err; /* Failure */ | 557 | return err; /* Failure */ |
557 | p++; | 558 | p++; |
558 | brelse (p->bh); | 559 | brelse(p->bh); |
559 | p->bh = bh; | 560 | p->bh = bh; |
560 | p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; | 561 | p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; |
561 | } | 562 | } |
@@ -593,7 +594,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
593 | /* On error, skip the f_pos to the next block. */ | 594 | /* On error, skip the f_pos to the next block. */ |
594 | dir_file->f_pos = (dir_file->f_pos | | 595 | dir_file->f_pos = (dir_file->f_pos | |
595 | (dir->i_sb->s_blocksize - 1)) + 1; | 596 | (dir->i_sb->s_blocksize - 1)) + 1; |
596 | brelse (bh); | 597 | brelse(bh); |
597 | return count; | 598 | return count; |
598 | } | 599 | } |
599 | ext4fs_dirhash(de->name, de->name_len, hinfo); | 600 | ext4fs_dirhash(de->name, de->name_len, hinfo); |
@@ -635,8 +636,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
635 | int ret, err; | 636 | int ret, err; |
636 | __u32 hashval; | 637 | __u32 hashval; |
637 | 638 | ||
638 | dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, | 639 | dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", |
639 | start_minor_hash)); | 640 | start_hash, start_minor_hash)); |
640 | dir = dir_file->f_path.dentry->d_inode; | 641 | dir = dir_file->f_path.dentry->d_inode; |
641 | if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { | 642 | if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { |
642 | hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; | 643 | hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; |
@@ -648,7 +649,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
648 | } | 649 | } |
649 | hinfo.hash = start_hash; | 650 | hinfo.hash = start_hash; |
650 | hinfo.minor_hash = 0; | 651 | hinfo.minor_hash = 0; |
651 | frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err); | 652 | frame = dx_probe(NULL, dir, &hinfo, frames, &err); |
652 | if (!frame) | 653 | if (!frame) |
653 | return err; | 654 | return err; |
654 | 655 | ||
@@ -694,8 +695,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
694 | break; | 695 | break; |
695 | } | 696 | } |
696 | dx_release(frames); | 697 | dx_release(frames); |
697 | dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", | 698 | dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, " |
698 | count, *next_hash)); | 699 | "next hash: %x\n", count, *next_hash)); |
699 | return count; | 700 | return count; |
700 | errout: | 701 | errout: |
701 | dx_release(frames); | 702 | dx_release(frames); |
@@ -802,17 +803,17 @@ static inline int ext4_match (int len, const char * const name, | |||
802 | /* | 803 | /* |
803 | * Returns 0 if not found, -1 on failure, and 1 on success | 804 | * Returns 0 if not found, -1 on failure, and 1 on success |
804 | */ | 805 | */ |
805 | static inline int search_dirblock(struct buffer_head * bh, | 806 | static inline int search_dirblock(struct buffer_head *bh, |
806 | struct inode *dir, | 807 | struct inode *dir, |
807 | struct dentry *dentry, | 808 | const struct qstr *d_name, |
808 | unsigned long offset, | 809 | unsigned long offset, |
809 | struct ext4_dir_entry_2 ** res_dir) | 810 | struct ext4_dir_entry_2 ** res_dir) |
810 | { | 811 | { |
811 | struct ext4_dir_entry_2 * de; | 812 | struct ext4_dir_entry_2 * de; |
812 | char * dlimit; | 813 | char * dlimit; |
813 | int de_len; | 814 | int de_len; |
814 | const char *name = dentry->d_name.name; | 815 | const char *name = d_name->name; |
815 | int namelen = dentry->d_name.len; | 816 | int namelen = d_name->len; |
816 | 817 | ||
817 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 818 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
818 | dlimit = bh->b_data + dir->i_sb->s_blocksize; | 819 | dlimit = bh->b_data + dir->i_sb->s_blocksize; |
@@ -851,12 +852,13 @@ static inline int search_dirblock(struct buffer_head * bh, | |||
851 | * The returned buffer_head has ->b_count elevated. The caller is expected | 852 | * The returned buffer_head has ->b_count elevated. The caller is expected |
852 | * to brelse() it when appropriate. | 853 | * to brelse() it when appropriate. |
853 | */ | 854 | */ |
854 | static struct buffer_head * ext4_find_entry (struct dentry *dentry, | 855 | static struct buffer_head * ext4_find_entry (struct inode *dir, |
856 | const struct qstr *d_name, | ||
855 | struct ext4_dir_entry_2 ** res_dir) | 857 | struct ext4_dir_entry_2 ** res_dir) |
856 | { | 858 | { |
857 | struct super_block * sb; | 859 | struct super_block *sb; |
858 | struct buffer_head * bh_use[NAMEI_RA_SIZE]; | 860 | struct buffer_head *bh_use[NAMEI_RA_SIZE]; |
859 | struct buffer_head * bh, *ret = NULL; | 861 | struct buffer_head *bh, *ret = NULL; |
860 | ext4_lblk_t start, block, b; | 862 | ext4_lblk_t start, block, b; |
861 | int ra_max = 0; /* Number of bh's in the readahead | 863 | int ra_max = 0; /* Number of bh's in the readahead |
862 | buffer, bh_use[] */ | 864 | buffer, bh_use[] */ |
@@ -865,16 +867,15 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry, | |||
865 | int num = 0; | 867 | int num = 0; |
866 | ext4_lblk_t nblocks; | 868 | ext4_lblk_t nblocks; |
867 | int i, err; | 869 | int i, err; |
868 | struct inode *dir = dentry->d_parent->d_inode; | ||
869 | int namelen; | 870 | int namelen; |
870 | 871 | ||
871 | *res_dir = NULL; | 872 | *res_dir = NULL; |
872 | sb = dir->i_sb; | 873 | sb = dir->i_sb; |
873 | namelen = dentry->d_name.len; | 874 | namelen = d_name->len; |
874 | if (namelen > EXT4_NAME_LEN) | 875 | if (namelen > EXT4_NAME_LEN) |
875 | return NULL; | 876 | return NULL; |
876 | if (is_dx(dir)) { | 877 | if (is_dx(dir)) { |
877 | bh = ext4_dx_find_entry(dentry, res_dir, &err); | 878 | bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); |
878 | /* | 879 | /* |
879 | * On success, or if the error was file not found, | 880 | * On success, or if the error was file not found, |
880 | * return. Otherwise, fall back to doing a search the | 881 | * return. Otherwise, fall back to doing a search the |
@@ -882,7 +883,8 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry, | |||
882 | */ | 883 | */ |
883 | if (bh || (err != ERR_BAD_DX_DIR)) | 884 | if (bh || (err != ERR_BAD_DX_DIR)) |
884 | return bh; | 885 | return bh; |
885 | dxtrace(printk("ext4_find_entry: dx failed, falling back\n")); | 886 | dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " |
887 | "falling back\n")); | ||
886 | } | 888 | } |
887 | nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); | 889 | nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); |
888 | start = EXT4_I(dir)->i_dir_start_lookup; | 890 | start = EXT4_I(dir)->i_dir_start_lookup; |
@@ -926,7 +928,7 @@ restart: | |||
926 | brelse(bh); | 928 | brelse(bh); |
927 | goto next; | 929 | goto next; |
928 | } | 930 | } |
929 | i = search_dirblock(bh, dir, dentry, | 931 | i = search_dirblock(bh, dir, d_name, |
930 | block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); | 932 | block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); |
931 | if (i == 1) { | 933 | if (i == 1) { |
932 | EXT4_I(dir)->i_dir_start_lookup = block; | 934 | EXT4_I(dir)->i_dir_start_lookup = block; |
@@ -956,11 +958,11 @@ restart: | |||
956 | cleanup_and_exit: | 958 | cleanup_and_exit: |
957 | /* Clean up the read-ahead blocks */ | 959 | /* Clean up the read-ahead blocks */ |
958 | for (; ra_ptr < ra_max; ra_ptr++) | 960 | for (; ra_ptr < ra_max; ra_ptr++) |
959 | brelse (bh_use[ra_ptr]); | 961 | brelse(bh_use[ra_ptr]); |
960 | return ret; | 962 | return ret; |
961 | } | 963 | } |
962 | 964 | ||
963 | static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | 965 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, |
964 | struct ext4_dir_entry_2 **res_dir, int *err) | 966 | struct ext4_dir_entry_2 **res_dir, int *err) |
965 | { | 967 | { |
966 | struct super_block * sb; | 968 | struct super_block * sb; |
@@ -971,14 +973,13 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | |||
971 | struct buffer_head *bh; | 973 | struct buffer_head *bh; |
972 | ext4_lblk_t block; | 974 | ext4_lblk_t block; |
973 | int retval; | 975 | int retval; |
974 | int namelen = dentry->d_name.len; | 976 | int namelen = d_name->len; |
975 | const u8 *name = dentry->d_name.name; | 977 | const u8 *name = d_name->name; |
976 | struct inode *dir = dentry->d_parent->d_inode; | ||
977 | 978 | ||
978 | sb = dir->i_sb; | 979 | sb = dir->i_sb; |
979 | /* NFS may look up ".." - look at dx_root directory block */ | 980 | /* NFS may look up ".." - look at dx_root directory block */ |
980 | if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ | 981 | if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ |
981 | if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) | 982 | if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) |
982 | return NULL; | 983 | return NULL; |
983 | } else { | 984 | } else { |
984 | frame = frames; | 985 | frame = frames; |
@@ -1010,7 +1011,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | |||
1010 | return bh; | 1011 | return bh; |
1011 | } | 1012 | } |
1012 | } | 1013 | } |
1013 | brelse (bh); | 1014 | brelse(bh); |
1014 | /* Check to see if we should continue to search */ | 1015 | /* Check to see if we should continue to search */ |
1015 | retval = ext4_htree_next_block(dir, hash, frame, | 1016 | retval = ext4_htree_next_block(dir, hash, frame, |
1016 | frames, NULL); | 1017 | frames, NULL); |
@@ -1025,25 +1026,25 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | |||
1025 | 1026 | ||
1026 | *err = -ENOENT; | 1027 | *err = -ENOENT; |
1027 | errout: | 1028 | errout: |
1028 | dxtrace(printk("%s not found\n", name)); | 1029 | dxtrace(printk(KERN_DEBUG "%s not found\n", name)); |
1029 | dx_release (frames); | 1030 | dx_release (frames); |
1030 | return NULL; | 1031 | return NULL; |
1031 | } | 1032 | } |
1032 | 1033 | ||
1033 | static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) | 1034 | static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) |
1034 | { | 1035 | { |
1035 | struct inode * inode; | 1036 | struct inode *inode; |
1036 | struct ext4_dir_entry_2 * de; | 1037 | struct ext4_dir_entry_2 *de; |
1037 | struct buffer_head * bh; | 1038 | struct buffer_head *bh; |
1038 | 1039 | ||
1039 | if (dentry->d_name.len > EXT4_NAME_LEN) | 1040 | if (dentry->d_name.len > EXT4_NAME_LEN) |
1040 | return ERR_PTR(-ENAMETOOLONG); | 1041 | return ERR_PTR(-ENAMETOOLONG); |
1041 | 1042 | ||
1042 | bh = ext4_find_entry(dentry, &de); | 1043 | bh = ext4_find_entry(dir, &dentry->d_name, &de); |
1043 | inode = NULL; | 1044 | inode = NULL; |
1044 | if (bh) { | 1045 | if (bh) { |
1045 | unsigned long ino = le32_to_cpu(de->inode); | 1046 | unsigned long ino = le32_to_cpu(de->inode); |
1046 | brelse (bh); | 1047 | brelse(bh); |
1047 | if (!ext4_valid_inum(dir->i_sb, ino)) { | 1048 | if (!ext4_valid_inum(dir->i_sb, ino)) { |
1048 | ext4_error(dir->i_sb, "ext4_lookup", | 1049 | ext4_error(dir->i_sb, "ext4_lookup", |
1049 | "bad inode number: %lu", ino); | 1050 | "bad inode number: %lu", ino); |
@@ -1062,15 +1063,14 @@ struct dentry *ext4_get_parent(struct dentry *child) | |||
1062 | unsigned long ino; | 1063 | unsigned long ino; |
1063 | struct dentry *parent; | 1064 | struct dentry *parent; |
1064 | struct inode *inode; | 1065 | struct inode *inode; |
1065 | struct dentry dotdot; | 1066 | static const struct qstr dotdot = { |
1067 | .name = "..", | ||
1068 | .len = 2, | ||
1069 | }; | ||
1066 | struct ext4_dir_entry_2 * de; | 1070 | struct ext4_dir_entry_2 * de; |
1067 | struct buffer_head *bh; | 1071 | struct buffer_head *bh; |
1068 | 1072 | ||
1069 | dotdot.d_name.name = ".."; | 1073 | bh = ext4_find_entry(child->d_inode, &dotdot, &de); |
1070 | dotdot.d_name.len = 2; | ||
1071 | dotdot.d_parent = child; /* confusing, isn't it! */ | ||
1072 | |||
1073 | bh = ext4_find_entry(&dotdot, &de); | ||
1074 | inode = NULL; | 1074 | inode = NULL; |
1075 | if (!bh) | 1075 | if (!bh) |
1076 | return ERR_PTR(-ENOENT); | 1076 | return ERR_PTR(-ENOENT); |
@@ -1201,10 +1201,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1201 | 1201 | ||
1202 | /* create map in the end of data2 block */ | 1202 | /* create map in the end of data2 block */ |
1203 | map = (struct dx_map_entry *) (data2 + blocksize); | 1203 | map = (struct dx_map_entry *) (data2 + blocksize); |
1204 | count = dx_make_map ((struct ext4_dir_entry_2 *) data1, | 1204 | count = dx_make_map((struct ext4_dir_entry_2 *) data1, |
1205 | blocksize, hinfo, map); | 1205 | blocksize, hinfo, map); |
1206 | map -= count; | 1206 | map -= count; |
1207 | dx_sort_map (map, count); | 1207 | dx_sort_map(map, count); |
1208 | /* Split the existing block in the middle, size-wise */ | 1208 | /* Split the existing block in the middle, size-wise */ |
1209 | size = 0; | 1209 | size = 0; |
1210 | move = 0; | 1210 | move = 0; |
@@ -1225,7 +1225,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1225 | 1225 | ||
1226 | /* Fancy dance to stay within two buffers */ | 1226 | /* Fancy dance to stay within two buffers */ |
1227 | de2 = dx_move_dirents(data1, data2, map + split, count - split); | 1227 | de2 = dx_move_dirents(data1, data2, map + split, count - split); |
1228 | de = dx_pack_dirents(data1,blocksize); | 1228 | de = dx_pack_dirents(data1, blocksize); |
1229 | de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); | 1229 | de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); |
1230 | de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2); | 1230 | de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2); |
1231 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); | 1231 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); |
@@ -1237,15 +1237,15 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1237 | swap(*bh, bh2); | 1237 | swap(*bh, bh2); |
1238 | de = de2; | 1238 | de = de2; |
1239 | } | 1239 | } |
1240 | dx_insert_block (frame, hash2 + continued, newblock); | 1240 | dx_insert_block(frame, hash2 + continued, newblock); |
1241 | err = ext4_journal_dirty_metadata (handle, bh2); | 1241 | err = ext4_journal_dirty_metadata(handle, bh2); |
1242 | if (err) | 1242 | if (err) |
1243 | goto journal_error; | 1243 | goto journal_error; |
1244 | err = ext4_journal_dirty_metadata (handle, frame->bh); | 1244 | err = ext4_journal_dirty_metadata(handle, frame->bh); |
1245 | if (err) | 1245 | if (err) |
1246 | goto journal_error; | 1246 | goto journal_error; |
1247 | brelse (bh2); | 1247 | brelse(bh2); |
1248 | dxtrace(dx_show_index ("frame", frame->entries)); | 1248 | dxtrace(dx_show_index("frame", frame->entries)); |
1249 | return de; | 1249 | return de; |
1250 | 1250 | ||
1251 | journal_error: | 1251 | journal_error: |
@@ -1271,7 +1271,7 @@ errout: | |||
1271 | */ | 1271 | */ |
1272 | static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | 1272 | static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, |
1273 | struct inode *inode, struct ext4_dir_entry_2 *de, | 1273 | struct inode *inode, struct ext4_dir_entry_2 *de, |
1274 | struct buffer_head * bh) | 1274 | struct buffer_head *bh) |
1275 | { | 1275 | { |
1276 | struct inode *dir = dentry->d_parent->d_inode; | 1276 | struct inode *dir = dentry->d_parent->d_inode; |
1277 | const char *name = dentry->d_name.name; | 1277 | const char *name = dentry->d_name.name; |
@@ -1288,11 +1288,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1288 | while ((char *) de <= top) { | 1288 | while ((char *) de <= top) { |
1289 | if (!ext4_check_dir_entry("ext4_add_entry", dir, de, | 1289 | if (!ext4_check_dir_entry("ext4_add_entry", dir, de, |
1290 | bh, offset)) { | 1290 | bh, offset)) { |
1291 | brelse (bh); | 1291 | brelse(bh); |
1292 | return -EIO; | 1292 | return -EIO; |
1293 | } | 1293 | } |
1294 | if (ext4_match (namelen, name, de)) { | 1294 | if (ext4_match(namelen, name, de)) { |
1295 | brelse (bh); | 1295 | brelse(bh); |
1296 | return -EEXIST; | 1296 | return -EEXIST; |
1297 | } | 1297 | } |
1298 | nlen = EXT4_DIR_REC_LEN(de->name_len); | 1298 | nlen = EXT4_DIR_REC_LEN(de->name_len); |
@@ -1329,7 +1329,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1329 | } else | 1329 | } else |
1330 | de->inode = 0; | 1330 | de->inode = 0; |
1331 | de->name_len = namelen; | 1331 | de->name_len = namelen; |
1332 | memcpy (de->name, name, namelen); | 1332 | memcpy(de->name, name, namelen); |
1333 | /* | 1333 | /* |
1334 | * XXX shouldn't update any times until successful | 1334 | * XXX shouldn't update any times until successful |
1335 | * completion of syscall, but too many callers depend | 1335 | * completion of syscall, but too many callers depend |
@@ -1377,7 +1377,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1377 | struct fake_dirent *fde; | 1377 | struct fake_dirent *fde; |
1378 | 1378 | ||
1379 | blocksize = dir->i_sb->s_blocksize; | 1379 | blocksize = dir->i_sb->s_blocksize; |
1380 | dxtrace(printk("Creating index\n")); | 1380 | dxtrace(printk(KERN_DEBUG "Creating index\n")); |
1381 | retval = ext4_journal_get_write_access(handle, bh); | 1381 | retval = ext4_journal_get_write_access(handle, bh); |
1382 | if (retval) { | 1382 | if (retval) { |
1383 | ext4_std_error(dir->i_sb, retval); | 1383 | ext4_std_error(dir->i_sb, retval); |
@@ -1386,7 +1386,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1386 | } | 1386 | } |
1387 | root = (struct dx_root *) bh->b_data; | 1387 | root = (struct dx_root *) bh->b_data; |
1388 | 1388 | ||
1389 | bh2 = ext4_append (handle, dir, &block, &retval); | 1389 | bh2 = ext4_append(handle, dir, &block, &retval); |
1390 | if (!(bh2)) { | 1390 | if (!(bh2)) { |
1391 | brelse(bh); | 1391 | brelse(bh); |
1392 | return retval; | 1392 | return retval; |
@@ -1412,9 +1412,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1412 | root->info.info_length = sizeof(root->info); | 1412 | root->info.info_length = sizeof(root->info); |
1413 | root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; | 1413 | root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; |
1414 | entries = root->entries; | 1414 | entries = root->entries; |
1415 | dx_set_block (entries, 1); | 1415 | dx_set_block(entries, 1); |
1416 | dx_set_count (entries, 1); | 1416 | dx_set_count(entries, 1); |
1417 | dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); | 1417 | dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info))); |
1418 | 1418 | ||
1419 | /* Initialize as for dx_probe */ | 1419 | /* Initialize as for dx_probe */ |
1420 | hinfo.hash_version = root->info.hash_version; | 1420 | hinfo.hash_version = root->info.hash_version; |
@@ -1443,14 +1443,14 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1443 | * may not sleep between calling this and putting something into | 1443 | * may not sleep between calling this and putting something into |
1444 | * the entry, as someone else might have used it while you slept. | 1444 | * the entry, as someone else might have used it while you slept. |
1445 | */ | 1445 | */ |
1446 | static int ext4_add_entry (handle_t *handle, struct dentry *dentry, | 1446 | static int ext4_add_entry(handle_t *handle, struct dentry *dentry, |
1447 | struct inode *inode) | 1447 | struct inode *inode) |
1448 | { | 1448 | { |
1449 | struct inode *dir = dentry->d_parent->d_inode; | 1449 | struct inode *dir = dentry->d_parent->d_inode; |
1450 | unsigned long offset; | 1450 | unsigned long offset; |
1451 | struct buffer_head * bh; | 1451 | struct buffer_head *bh; |
1452 | struct ext4_dir_entry_2 *de; | 1452 | struct ext4_dir_entry_2 *de; |
1453 | struct super_block * sb; | 1453 | struct super_block *sb; |
1454 | int retval; | 1454 | int retval; |
1455 | int dx_fallback=0; | 1455 | int dx_fallback=0; |
1456 | unsigned blocksize; | 1456 | unsigned blocksize; |
@@ -1500,13 +1500,13 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1500 | struct dx_frame frames[2], *frame; | 1500 | struct dx_frame frames[2], *frame; |
1501 | struct dx_entry *entries, *at; | 1501 | struct dx_entry *entries, *at; |
1502 | struct dx_hash_info hinfo; | 1502 | struct dx_hash_info hinfo; |
1503 | struct buffer_head * bh; | 1503 | struct buffer_head *bh; |
1504 | struct inode *dir = dentry->d_parent->d_inode; | 1504 | struct inode *dir = dentry->d_parent->d_inode; |
1505 | struct super_block * sb = dir->i_sb; | 1505 | struct super_block *sb = dir->i_sb; |
1506 | struct ext4_dir_entry_2 *de; | 1506 | struct ext4_dir_entry_2 *de; |
1507 | int err; | 1507 | int err; |
1508 | 1508 | ||
1509 | frame = dx_probe(dentry, NULL, &hinfo, frames, &err); | 1509 | frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); |
1510 | if (!frame) | 1510 | if (!frame) |
1511 | return err; | 1511 | return err; |
1512 | entries = frame->entries; | 1512 | entries = frame->entries; |
@@ -1527,7 +1527,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1527 | } | 1527 | } |
1528 | 1528 | ||
1529 | /* Block full, should compress but for now just split */ | 1529 | /* Block full, should compress but for now just split */ |
1530 | dxtrace(printk("using %u of %u node entries\n", | 1530 | dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", |
1531 | dx_get_count(entries), dx_get_limit(entries))); | 1531 | dx_get_count(entries), dx_get_limit(entries))); |
1532 | /* Need to split index? */ | 1532 | /* Need to split index? */ |
1533 | if (dx_get_count(entries) == dx_get_limit(entries)) { | 1533 | if (dx_get_count(entries) == dx_get_limit(entries)) { |
@@ -1559,7 +1559,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1559 | if (levels) { | 1559 | if (levels) { |
1560 | unsigned icount1 = icount/2, icount2 = icount - icount1; | 1560 | unsigned icount1 = icount/2, icount2 = icount - icount1; |
1561 | unsigned hash2 = dx_get_hash(entries + icount1); | 1561 | unsigned hash2 = dx_get_hash(entries + icount1); |
1562 | dxtrace(printk("Split index %i/%i\n", icount1, icount2)); | 1562 | dxtrace(printk(KERN_DEBUG "Split index %i/%i\n", |
1563 | icount1, icount2)); | ||
1563 | 1564 | ||
1564 | BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ | 1565 | BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ |
1565 | err = ext4_journal_get_write_access(handle, | 1566 | err = ext4_journal_get_write_access(handle, |
@@ -1567,11 +1568,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1567 | if (err) | 1568 | if (err) |
1568 | goto journal_error; | 1569 | goto journal_error; |
1569 | 1570 | ||
1570 | memcpy ((char *) entries2, (char *) (entries + icount1), | 1571 | memcpy((char *) entries2, (char *) (entries + icount1), |
1571 | icount2 * sizeof(struct dx_entry)); | 1572 | icount2 * sizeof(struct dx_entry)); |
1572 | dx_set_count (entries, icount1); | 1573 | dx_set_count(entries, icount1); |
1573 | dx_set_count (entries2, icount2); | 1574 | dx_set_count(entries2, icount2); |
1574 | dx_set_limit (entries2, dx_node_limit(dir)); | 1575 | dx_set_limit(entries2, dx_node_limit(dir)); |
1575 | 1576 | ||
1576 | /* Which index block gets the new entry? */ | 1577 | /* Which index block gets the new entry? */ |
1577 | if (at - entries >= icount1) { | 1578 | if (at - entries >= icount1) { |
@@ -1579,16 +1580,17 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1579 | frame->entries = entries = entries2; | 1580 | frame->entries = entries = entries2; |
1580 | swap(frame->bh, bh2); | 1581 | swap(frame->bh, bh2); |
1581 | } | 1582 | } |
1582 | dx_insert_block (frames + 0, hash2, newblock); | 1583 | dx_insert_block(frames + 0, hash2, newblock); |
1583 | dxtrace(dx_show_index ("node", frames[1].entries)); | 1584 | dxtrace(dx_show_index("node", frames[1].entries)); |
1584 | dxtrace(dx_show_index ("node", | 1585 | dxtrace(dx_show_index("node", |
1585 | ((struct dx_node *) bh2->b_data)->entries)); | 1586 | ((struct dx_node *) bh2->b_data)->entries)); |
1586 | err = ext4_journal_dirty_metadata(handle, bh2); | 1587 | err = ext4_journal_dirty_metadata(handle, bh2); |
1587 | if (err) | 1588 | if (err) |
1588 | goto journal_error; | 1589 | goto journal_error; |
1589 | brelse (bh2); | 1590 | brelse (bh2); |
1590 | } else { | 1591 | } else { |
1591 | dxtrace(printk("Creating second level index...\n")); | 1592 | dxtrace(printk(KERN_DEBUG |
1593 | "Creating second level index...\n")); | ||
1592 | memcpy((char *) entries2, (char *) entries, | 1594 | memcpy((char *) entries2, (char *) entries, |
1593 | icount * sizeof(struct dx_entry)); | 1595 | icount * sizeof(struct dx_entry)); |
1594 | dx_set_limit(entries2, dx_node_limit(dir)); | 1596 | dx_set_limit(entries2, dx_node_limit(dir)); |
@@ -1630,12 +1632,12 @@ cleanup: | |||
1630 | * ext4_delete_entry deletes a directory entry by merging it with the | 1632 | * ext4_delete_entry deletes a directory entry by merging it with the |
1631 | * previous entry | 1633 | * previous entry |
1632 | */ | 1634 | */ |
1633 | static int ext4_delete_entry (handle_t *handle, | 1635 | static int ext4_delete_entry(handle_t *handle, |
1634 | struct inode * dir, | 1636 | struct inode *dir, |
1635 | struct ext4_dir_entry_2 * de_del, | 1637 | struct ext4_dir_entry_2 *de_del, |
1636 | struct buffer_head * bh) | 1638 | struct buffer_head *bh) |
1637 | { | 1639 | { |
1638 | struct ext4_dir_entry_2 * de, * pde; | 1640 | struct ext4_dir_entry_2 *de, *pde; |
1639 | int i; | 1641 | int i; |
1640 | 1642 | ||
1641 | i = 0; | 1643 | i = 0; |
@@ -1716,11 +1718,11 @@ static int ext4_add_nondir(handle_t *handle, | |||
1716 | * If the create succeeds, we fill in the inode information | 1718 | * If the create succeeds, we fill in the inode information |
1717 | * with d_instantiate(). | 1719 | * with d_instantiate(). |
1718 | */ | 1720 | */ |
1719 | static int ext4_create (struct inode * dir, struct dentry * dentry, int mode, | 1721 | static int ext4_create(struct inode *dir, struct dentry *dentry, int mode, |
1720 | struct nameidata *nd) | 1722 | struct nameidata *nd) |
1721 | { | 1723 | { |
1722 | handle_t *handle; | 1724 | handle_t *handle; |
1723 | struct inode * inode; | 1725 | struct inode *inode; |
1724 | int err, retries = 0; | 1726 | int err, retries = 0; |
1725 | 1727 | ||
1726 | retry: | 1728 | retry: |
@@ -1747,8 +1749,8 @@ retry: | |||
1747 | return err; | 1749 | return err; |
1748 | } | 1750 | } |
1749 | 1751 | ||
1750 | static int ext4_mknod (struct inode * dir, struct dentry *dentry, | 1752 | static int ext4_mknod(struct inode *dir, struct dentry *dentry, |
1751 | int mode, dev_t rdev) | 1753 | int mode, dev_t rdev) |
1752 | { | 1754 | { |
1753 | handle_t *handle; | 1755 | handle_t *handle; |
1754 | struct inode *inode; | 1756 | struct inode *inode; |
@@ -1767,11 +1769,11 @@ retry: | |||
1767 | if (IS_DIRSYNC(dir)) | 1769 | if (IS_DIRSYNC(dir)) |
1768 | handle->h_sync = 1; | 1770 | handle->h_sync = 1; |
1769 | 1771 | ||
1770 | inode = ext4_new_inode (handle, dir, mode); | 1772 | inode = ext4_new_inode(handle, dir, mode); |
1771 | err = PTR_ERR(inode); | 1773 | err = PTR_ERR(inode); |
1772 | if (!IS_ERR(inode)) { | 1774 | if (!IS_ERR(inode)) { |
1773 | init_special_inode(inode, inode->i_mode, rdev); | 1775 | init_special_inode(inode, inode->i_mode, rdev); |
1774 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 1776 | #ifdef CONFIG_EXT4_FS_XATTR |
1775 | inode->i_op = &ext4_special_inode_operations; | 1777 | inode->i_op = &ext4_special_inode_operations; |
1776 | #endif | 1778 | #endif |
1777 | err = ext4_add_nondir(handle, dentry, inode); | 1779 | err = ext4_add_nondir(handle, dentry, inode); |
@@ -1782,12 +1784,12 @@ retry: | |||
1782 | return err; | 1784 | return err; |
1783 | } | 1785 | } |
1784 | 1786 | ||
1785 | static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode) | 1787 | static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) |
1786 | { | 1788 | { |
1787 | handle_t *handle; | 1789 | handle_t *handle; |
1788 | struct inode * inode; | 1790 | struct inode *inode; |
1789 | struct buffer_head * dir_block; | 1791 | struct buffer_head *dir_block; |
1790 | struct ext4_dir_entry_2 * de; | 1792 | struct ext4_dir_entry_2 *de; |
1791 | int err, retries = 0; | 1793 | int err, retries = 0; |
1792 | 1794 | ||
1793 | if (EXT4_DIR_LINK_MAX(dir)) | 1795 | if (EXT4_DIR_LINK_MAX(dir)) |
@@ -1803,7 +1805,7 @@ retry: | |||
1803 | if (IS_DIRSYNC(dir)) | 1805 | if (IS_DIRSYNC(dir)) |
1804 | handle->h_sync = 1; | 1806 | handle->h_sync = 1; |
1805 | 1807 | ||
1806 | inode = ext4_new_inode (handle, dir, S_IFDIR | mode); | 1808 | inode = ext4_new_inode(handle, dir, S_IFDIR | mode); |
1807 | err = PTR_ERR(inode); | 1809 | err = PTR_ERR(inode); |
1808 | if (IS_ERR(inode)) | 1810 | if (IS_ERR(inode)) |
1809 | goto out_stop; | 1811 | goto out_stop; |
@@ -1811,7 +1813,7 @@ retry: | |||
1811 | inode->i_op = &ext4_dir_inode_operations; | 1813 | inode->i_op = &ext4_dir_inode_operations; |
1812 | inode->i_fop = &ext4_dir_operations; | 1814 | inode->i_fop = &ext4_dir_operations; |
1813 | inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; | 1815 | inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; |
1814 | dir_block = ext4_bread (handle, inode, 0, 1, &err); | 1816 | dir_block = ext4_bread(handle, inode, 0, 1, &err); |
1815 | if (!dir_block) | 1817 | if (!dir_block) |
1816 | goto out_clear_inode; | 1818 | goto out_clear_inode; |
1817 | BUFFER_TRACE(dir_block, "get_write_access"); | 1819 | BUFFER_TRACE(dir_block, "get_write_access"); |
@@ -1820,26 +1822,26 @@ retry: | |||
1820 | de->inode = cpu_to_le32(inode->i_ino); | 1822 | de->inode = cpu_to_le32(inode->i_ino); |
1821 | de->name_len = 1; | 1823 | de->name_len = 1; |
1822 | de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len)); | 1824 | de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len)); |
1823 | strcpy (de->name, "."); | 1825 | strcpy(de->name, "."); |
1824 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); | 1826 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); |
1825 | de = ext4_next_entry(de); | 1827 | de = ext4_next_entry(de); |
1826 | de->inode = cpu_to_le32(dir->i_ino); | 1828 | de->inode = cpu_to_le32(dir->i_ino); |
1827 | de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize - | 1829 | de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize - |
1828 | EXT4_DIR_REC_LEN(1)); | 1830 | EXT4_DIR_REC_LEN(1)); |
1829 | de->name_len = 2; | 1831 | de->name_len = 2; |
1830 | strcpy (de->name, ".."); | 1832 | strcpy(de->name, ".."); |
1831 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); | 1833 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); |
1832 | inode->i_nlink = 2; | 1834 | inode->i_nlink = 2; |
1833 | BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); | 1835 | BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); |
1834 | ext4_journal_dirty_metadata(handle, dir_block); | 1836 | ext4_journal_dirty_metadata(handle, dir_block); |
1835 | brelse (dir_block); | 1837 | brelse(dir_block); |
1836 | ext4_mark_inode_dirty(handle, inode); | 1838 | ext4_mark_inode_dirty(handle, inode); |
1837 | err = ext4_add_entry (handle, dentry, inode); | 1839 | err = ext4_add_entry(handle, dentry, inode); |
1838 | if (err) { | 1840 | if (err) { |
1839 | out_clear_inode: | 1841 | out_clear_inode: |
1840 | clear_nlink(inode); | 1842 | clear_nlink(inode); |
1841 | ext4_mark_inode_dirty(handle, inode); | 1843 | ext4_mark_inode_dirty(handle, inode); |
1842 | iput (inode); | 1844 | iput(inode); |
1843 | goto out_stop; | 1845 | goto out_stop; |
1844 | } | 1846 | } |
1845 | ext4_inc_count(handle, dir); | 1847 | ext4_inc_count(handle, dir); |
@@ -1856,17 +1858,17 @@ out_stop: | |||
1856 | /* | 1858 | /* |
1857 | * routine to check that the specified directory is empty (for rmdir) | 1859 | * routine to check that the specified directory is empty (for rmdir) |
1858 | */ | 1860 | */ |
1859 | static int empty_dir (struct inode * inode) | 1861 | static int empty_dir(struct inode *inode) |
1860 | { | 1862 | { |
1861 | unsigned long offset; | 1863 | unsigned long offset; |
1862 | struct buffer_head * bh; | 1864 | struct buffer_head *bh; |
1863 | struct ext4_dir_entry_2 * de, * de1; | 1865 | struct ext4_dir_entry_2 *de, *de1; |
1864 | struct super_block * sb; | 1866 | struct super_block *sb; |
1865 | int err = 0; | 1867 | int err = 0; |
1866 | 1868 | ||
1867 | sb = inode->i_sb; | 1869 | sb = inode->i_sb; |
1868 | if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || | 1870 | if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || |
1869 | !(bh = ext4_bread (NULL, inode, 0, 0, &err))) { | 1871 | !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { |
1870 | if (err) | 1872 | if (err) |
1871 | ext4_error(inode->i_sb, __func__, | 1873 | ext4_error(inode->i_sb, __func__, |
1872 | "error %d reading directory #%lu offset 0", | 1874 | "error %d reading directory #%lu offset 0", |
@@ -1881,23 +1883,23 @@ static int empty_dir (struct inode * inode) | |||
1881 | de1 = ext4_next_entry(de); | 1883 | de1 = ext4_next_entry(de); |
1882 | if (le32_to_cpu(de->inode) != inode->i_ino || | 1884 | if (le32_to_cpu(de->inode) != inode->i_ino || |
1883 | !le32_to_cpu(de1->inode) || | 1885 | !le32_to_cpu(de1->inode) || |
1884 | strcmp (".", de->name) || | 1886 | strcmp(".", de->name) || |
1885 | strcmp ("..", de1->name)) { | 1887 | strcmp("..", de1->name)) { |
1886 | ext4_warning (inode->i_sb, "empty_dir", | 1888 | ext4_warning(inode->i_sb, "empty_dir", |
1887 | "bad directory (dir #%lu) - no `.' or `..'", | 1889 | "bad directory (dir #%lu) - no `.' or `..'", |
1888 | inode->i_ino); | 1890 | inode->i_ino); |
1889 | brelse (bh); | 1891 | brelse(bh); |
1890 | return 1; | 1892 | return 1; |
1891 | } | 1893 | } |
1892 | offset = ext4_rec_len_from_disk(de->rec_len) + | 1894 | offset = ext4_rec_len_from_disk(de->rec_len) + |
1893 | ext4_rec_len_from_disk(de1->rec_len); | 1895 | ext4_rec_len_from_disk(de1->rec_len); |
1894 | de = ext4_next_entry(de1); | 1896 | de = ext4_next_entry(de1); |
1895 | while (offset < inode->i_size ) { | 1897 | while (offset < inode->i_size) { |
1896 | if (!bh || | 1898 | if (!bh || |
1897 | (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { | 1899 | (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { |
1898 | err = 0; | 1900 | err = 0; |
1899 | brelse (bh); | 1901 | brelse(bh); |
1900 | bh = ext4_bread (NULL, inode, | 1902 | bh = ext4_bread(NULL, inode, |
1901 | offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); | 1903 | offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); |
1902 | if (!bh) { | 1904 | if (!bh) { |
1903 | if (err) | 1905 | if (err) |
@@ -1917,13 +1919,13 @@ static int empty_dir (struct inode * inode) | |||
1917 | continue; | 1919 | continue; |
1918 | } | 1920 | } |
1919 | if (le32_to_cpu(de->inode)) { | 1921 | if (le32_to_cpu(de->inode)) { |
1920 | brelse (bh); | 1922 | brelse(bh); |
1921 | return 0; | 1923 | return 0; |
1922 | } | 1924 | } |
1923 | offset += ext4_rec_len_from_disk(de->rec_len); | 1925 | offset += ext4_rec_len_from_disk(de->rec_len); |
1924 | de = ext4_next_entry(de); | 1926 | de = ext4_next_entry(de); |
1925 | } | 1927 | } |
1926 | brelse (bh); | 1928 | brelse(bh); |
1927 | return 1; | 1929 | return 1; |
1928 | } | 1930 | } |
1929 | 1931 | ||
@@ -1954,8 +1956,8 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
1954 | * ->i_nlink. For, say it, character device. Not a regular file, | 1956 | * ->i_nlink. For, say it, character device. Not a regular file, |
1955 | * not a directory, not a symlink and ->i_nlink > 0. | 1957 | * not a directory, not a symlink and ->i_nlink > 0. |
1956 | */ | 1958 | */ |
1957 | J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 1959 | J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
1958 | S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); | 1960 | S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); |
1959 | 1961 | ||
1960 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); | 1962 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); |
1961 | err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); | 1963 | err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); |
@@ -2069,12 +2071,12 @@ out_brelse: | |||
2069 | goto out_err; | 2071 | goto out_err; |
2070 | } | 2072 | } |
2071 | 2073 | ||
2072 | static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | 2074 | static int ext4_rmdir(struct inode *dir, struct dentry *dentry) |
2073 | { | 2075 | { |
2074 | int retval; | 2076 | int retval; |
2075 | struct inode * inode; | 2077 | struct inode *inode; |
2076 | struct buffer_head * bh; | 2078 | struct buffer_head *bh; |
2077 | struct ext4_dir_entry_2 * de; | 2079 | struct ext4_dir_entry_2 *de; |
2078 | handle_t *handle; | 2080 | handle_t *handle; |
2079 | 2081 | ||
2080 | /* Initialize quotas before so that eventual writes go in | 2082 | /* Initialize quotas before so that eventual writes go in |
@@ -2085,7 +2087,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | |||
2085 | return PTR_ERR(handle); | 2087 | return PTR_ERR(handle); |
2086 | 2088 | ||
2087 | retval = -ENOENT; | 2089 | retval = -ENOENT; |
2088 | bh = ext4_find_entry (dentry, &de); | 2090 | bh = ext4_find_entry(dir, &dentry->d_name, &de); |
2089 | if (!bh) | 2091 | if (!bh) |
2090 | goto end_rmdir; | 2092 | goto end_rmdir; |
2091 | 2093 | ||
@@ -2099,16 +2101,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | |||
2099 | goto end_rmdir; | 2101 | goto end_rmdir; |
2100 | 2102 | ||
2101 | retval = -ENOTEMPTY; | 2103 | retval = -ENOTEMPTY; |
2102 | if (!empty_dir (inode)) | 2104 | if (!empty_dir(inode)) |
2103 | goto end_rmdir; | 2105 | goto end_rmdir; |
2104 | 2106 | ||
2105 | retval = ext4_delete_entry(handle, dir, de, bh); | 2107 | retval = ext4_delete_entry(handle, dir, de, bh); |
2106 | if (retval) | 2108 | if (retval) |
2107 | goto end_rmdir; | 2109 | goto end_rmdir; |
2108 | if (!EXT4_DIR_LINK_EMPTY(inode)) | 2110 | if (!EXT4_DIR_LINK_EMPTY(inode)) |
2109 | ext4_warning (inode->i_sb, "ext4_rmdir", | 2111 | ext4_warning(inode->i_sb, "ext4_rmdir", |
2110 | "empty directory has too many links (%d)", | 2112 | "empty directory has too many links (%d)", |
2111 | inode->i_nlink); | 2113 | inode->i_nlink); |
2112 | inode->i_version++; | 2114 | inode->i_version++; |
2113 | clear_nlink(inode); | 2115 | clear_nlink(inode); |
2114 | /* There's no need to set i_disksize: the fact that i_nlink is | 2116 | /* There's no need to set i_disksize: the fact that i_nlink is |
@@ -2124,16 +2126,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | |||
2124 | 2126 | ||
2125 | end_rmdir: | 2127 | end_rmdir: |
2126 | ext4_journal_stop(handle); | 2128 | ext4_journal_stop(handle); |
2127 | brelse (bh); | 2129 | brelse(bh); |
2128 | return retval; | 2130 | return retval; |
2129 | } | 2131 | } |
2130 | 2132 | ||
2131 | static int ext4_unlink(struct inode * dir, struct dentry *dentry) | 2133 | static int ext4_unlink(struct inode *dir, struct dentry *dentry) |
2132 | { | 2134 | { |
2133 | int retval; | 2135 | int retval; |
2134 | struct inode * inode; | 2136 | struct inode *inode; |
2135 | struct buffer_head * bh; | 2137 | struct buffer_head *bh; |
2136 | struct ext4_dir_entry_2 * de; | 2138 | struct ext4_dir_entry_2 *de; |
2137 | handle_t *handle; | 2139 | handle_t *handle; |
2138 | 2140 | ||
2139 | /* Initialize quotas before so that eventual writes go | 2141 | /* Initialize quotas before so that eventual writes go |
@@ -2147,7 +2149,7 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) | |||
2147 | handle->h_sync = 1; | 2149 | handle->h_sync = 1; |
2148 | 2150 | ||
2149 | retval = -ENOENT; | 2151 | retval = -ENOENT; |
2150 | bh = ext4_find_entry (dentry, &de); | 2152 | bh = ext4_find_entry(dir, &dentry->d_name, &de); |
2151 | if (!bh) | 2153 | if (!bh) |
2152 | goto end_unlink; | 2154 | goto end_unlink; |
2153 | 2155 | ||
@@ -2158,9 +2160,9 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) | |||
2158 | goto end_unlink; | 2160 | goto end_unlink; |
2159 | 2161 | ||
2160 | if (!inode->i_nlink) { | 2162 | if (!inode->i_nlink) { |
2161 | ext4_warning (inode->i_sb, "ext4_unlink", | 2163 | ext4_warning(inode->i_sb, "ext4_unlink", |
2162 | "Deleting nonexistent file (%lu), %d", | 2164 | "Deleting nonexistent file (%lu), %d", |
2163 | inode->i_ino, inode->i_nlink); | 2165 | inode->i_ino, inode->i_nlink); |
2164 | inode->i_nlink = 1; | 2166 | inode->i_nlink = 1; |
2165 | } | 2167 | } |
2166 | retval = ext4_delete_entry(handle, dir, de, bh); | 2168 | retval = ext4_delete_entry(handle, dir, de, bh); |
@@ -2178,15 +2180,15 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) | |||
2178 | 2180 | ||
2179 | end_unlink: | 2181 | end_unlink: |
2180 | ext4_journal_stop(handle); | 2182 | ext4_journal_stop(handle); |
2181 | brelse (bh); | 2183 | brelse(bh); |
2182 | return retval; | 2184 | return retval; |
2183 | } | 2185 | } |
2184 | 2186 | ||
2185 | static int ext4_symlink (struct inode * dir, | 2187 | static int ext4_symlink(struct inode *dir, |
2186 | struct dentry *dentry, const char * symname) | 2188 | struct dentry *dentry, const char *symname) |
2187 | { | 2189 | { |
2188 | handle_t *handle; | 2190 | handle_t *handle; |
2189 | struct inode * inode; | 2191 | struct inode *inode; |
2190 | int l, err, retries = 0; | 2192 | int l, err, retries = 0; |
2191 | 2193 | ||
2192 | l = strlen(symname)+1; | 2194 | l = strlen(symname)+1; |
@@ -2203,12 +2205,12 @@ retry: | |||
2203 | if (IS_DIRSYNC(dir)) | 2205 | if (IS_DIRSYNC(dir)) |
2204 | handle->h_sync = 1; | 2206 | handle->h_sync = 1; |
2205 | 2207 | ||
2206 | inode = ext4_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); | 2208 | inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO); |
2207 | err = PTR_ERR(inode); | 2209 | err = PTR_ERR(inode); |
2208 | if (IS_ERR(inode)) | 2210 | if (IS_ERR(inode)) |
2209 | goto out_stop; | 2211 | goto out_stop; |
2210 | 2212 | ||
2211 | if (l > sizeof (EXT4_I(inode)->i_data)) { | 2213 | if (l > sizeof(EXT4_I(inode)->i_data)) { |
2212 | inode->i_op = &ext4_symlink_inode_operations; | 2214 | inode->i_op = &ext4_symlink_inode_operations; |
2213 | ext4_set_aops(inode); | 2215 | ext4_set_aops(inode); |
2214 | /* | 2216 | /* |
@@ -2221,14 +2223,14 @@ retry: | |||
2221 | if (err) { | 2223 | if (err) { |
2222 | clear_nlink(inode); | 2224 | clear_nlink(inode); |
2223 | ext4_mark_inode_dirty(handle, inode); | 2225 | ext4_mark_inode_dirty(handle, inode); |
2224 | iput (inode); | 2226 | iput(inode); |
2225 | goto out_stop; | 2227 | goto out_stop; |
2226 | } | 2228 | } |
2227 | } else { | 2229 | } else { |
2228 | /* clear the extent format for fast symlink */ | 2230 | /* clear the extent format for fast symlink */ |
2229 | EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; | 2231 | EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; |
2230 | inode->i_op = &ext4_fast_symlink_inode_operations; | 2232 | inode->i_op = &ext4_fast_symlink_inode_operations; |
2231 | memcpy((char*)&EXT4_I(inode)->i_data,symname,l); | 2233 | memcpy((char *)&EXT4_I(inode)->i_data, symname, l); |
2232 | inode->i_size = l-1; | 2234 | inode->i_size = l-1; |
2233 | } | 2235 | } |
2234 | EXT4_I(inode)->i_disksize = inode->i_size; | 2236 | EXT4_I(inode)->i_disksize = inode->i_size; |
@@ -2240,8 +2242,8 @@ out_stop: | |||
2240 | return err; | 2242 | return err; |
2241 | } | 2243 | } |
2242 | 2244 | ||
2243 | static int ext4_link (struct dentry * old_dentry, | 2245 | static int ext4_link(struct dentry *old_dentry, |
2244 | struct inode * dir, struct dentry *dentry) | 2246 | struct inode *dir, struct dentry *dentry) |
2245 | { | 2247 | { |
2246 | handle_t *handle; | 2248 | handle_t *handle; |
2247 | struct inode *inode = old_dentry->d_inode; | 2249 | struct inode *inode = old_dentry->d_inode; |
@@ -2284,13 +2286,13 @@ retry: | |||
2284 | * Anybody can rename anything with this: the permission checks are left to the | 2286 | * Anybody can rename anything with this: the permission checks are left to the |
2285 | * higher-level routines. | 2287 | * higher-level routines. |
2286 | */ | 2288 | */ |
2287 | static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | 2289 | static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, |
2288 | struct inode * new_dir,struct dentry *new_dentry) | 2290 | struct inode *new_dir, struct dentry *new_dentry) |
2289 | { | 2291 | { |
2290 | handle_t *handle; | 2292 | handle_t *handle; |
2291 | struct inode * old_inode, * new_inode; | 2293 | struct inode *old_inode, *new_inode; |
2292 | struct buffer_head * old_bh, * new_bh, * dir_bh; | 2294 | struct buffer_head *old_bh, *new_bh, *dir_bh; |
2293 | struct ext4_dir_entry_2 * old_de, * new_de; | 2295 | struct ext4_dir_entry_2 *old_de, *new_de; |
2294 | int retval; | 2296 | int retval; |
2295 | 2297 | ||
2296 | old_bh = new_bh = dir_bh = NULL; | 2298 | old_bh = new_bh = dir_bh = NULL; |
@@ -2308,7 +2310,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2308 | if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) | 2310 | if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) |
2309 | handle->h_sync = 1; | 2311 | handle->h_sync = 1; |
2310 | 2312 | ||
2311 | old_bh = ext4_find_entry (old_dentry, &old_de); | 2313 | old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de); |
2312 | /* | 2314 | /* |
2313 | * Check for inode number is _not_ due to possible IO errors. | 2315 | * Check for inode number is _not_ due to possible IO errors. |
2314 | * We might rmdir the source, keep it as pwd of some process | 2316 | * We might rmdir the source, keep it as pwd of some process |
@@ -2321,32 +2323,32 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2321 | goto end_rename; | 2323 | goto end_rename; |
2322 | 2324 | ||
2323 | new_inode = new_dentry->d_inode; | 2325 | new_inode = new_dentry->d_inode; |
2324 | new_bh = ext4_find_entry (new_dentry, &new_de); | 2326 | new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, &new_de); |
2325 | if (new_bh) { | 2327 | if (new_bh) { |
2326 | if (!new_inode) { | 2328 | if (!new_inode) { |
2327 | brelse (new_bh); | 2329 | brelse(new_bh); |
2328 | new_bh = NULL; | 2330 | new_bh = NULL; |
2329 | } | 2331 | } |
2330 | } | 2332 | } |
2331 | if (S_ISDIR(old_inode->i_mode)) { | 2333 | if (S_ISDIR(old_inode->i_mode)) { |
2332 | if (new_inode) { | 2334 | if (new_inode) { |
2333 | retval = -ENOTEMPTY; | 2335 | retval = -ENOTEMPTY; |
2334 | if (!empty_dir (new_inode)) | 2336 | if (!empty_dir(new_inode)) |
2335 | goto end_rename; | 2337 | goto end_rename; |
2336 | } | 2338 | } |
2337 | retval = -EIO; | 2339 | retval = -EIO; |
2338 | dir_bh = ext4_bread (handle, old_inode, 0, 0, &retval); | 2340 | dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); |
2339 | if (!dir_bh) | 2341 | if (!dir_bh) |
2340 | goto end_rename; | 2342 | goto end_rename; |
2341 | if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) | 2343 | if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) |
2342 | goto end_rename; | 2344 | goto end_rename; |
2343 | retval = -EMLINK; | 2345 | retval = -EMLINK; |
2344 | if (!new_inode && new_dir!=old_dir && | 2346 | if (!new_inode && new_dir != old_dir && |
2345 | new_dir->i_nlink >= EXT4_LINK_MAX) | 2347 | new_dir->i_nlink >= EXT4_LINK_MAX) |
2346 | goto end_rename; | 2348 | goto end_rename; |
2347 | } | 2349 | } |
2348 | if (!new_bh) { | 2350 | if (!new_bh) { |
2349 | retval = ext4_add_entry (handle, new_dentry, old_inode); | 2351 | retval = ext4_add_entry(handle, new_dentry, old_inode); |
2350 | if (retval) | 2352 | if (retval) |
2351 | goto end_rename; | 2353 | goto end_rename; |
2352 | } else { | 2354 | } else { |
@@ -2388,7 +2390,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2388 | struct buffer_head *old_bh2; | 2390 | struct buffer_head *old_bh2; |
2389 | struct ext4_dir_entry_2 *old_de2; | 2391 | struct ext4_dir_entry_2 *old_de2; |
2390 | 2392 | ||
2391 | old_bh2 = ext4_find_entry(old_dentry, &old_de2); | 2393 | old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de2); |
2392 | if (old_bh2) { | 2394 | if (old_bh2) { |
2393 | retval = ext4_delete_entry(handle, old_dir, | 2395 | retval = ext4_delete_entry(handle, old_dir, |
2394 | old_de2, old_bh2); | 2396 | old_de2, old_bh2); |
@@ -2433,9 +2435,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2433 | retval = 0; | 2435 | retval = 0; |
2434 | 2436 | ||
2435 | end_rename: | 2437 | end_rename: |
2436 | brelse (dir_bh); | 2438 | brelse(dir_bh); |
2437 | brelse (old_bh); | 2439 | brelse(old_bh); |
2438 | brelse (new_bh); | 2440 | brelse(new_bh); |
2439 | ext4_journal_stop(handle); | 2441 | ext4_journal_stop(handle); |
2440 | return retval; | 2442 | return retval; |
2441 | } | 2443 | } |
@@ -2454,7 +2456,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
2454 | .mknod = ext4_mknod, | 2456 | .mknod = ext4_mknod, |
2455 | .rename = ext4_rename, | 2457 | .rename = ext4_rename, |
2456 | .setattr = ext4_setattr, | 2458 | .setattr = ext4_setattr, |
2457 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 2459 | #ifdef CONFIG_EXT4_FS_XATTR |
2458 | .setxattr = generic_setxattr, | 2460 | .setxattr = generic_setxattr, |
2459 | .getxattr = generic_getxattr, | 2461 | .getxattr = generic_getxattr, |
2460 | .listxattr = ext4_listxattr, | 2462 | .listxattr = ext4_listxattr, |
@@ -2465,7 +2467,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
2465 | 2467 | ||
2466 | const struct inode_operations ext4_special_inode_operations = { | 2468 | const struct inode_operations ext4_special_inode_operations = { |
2467 | .setattr = ext4_setattr, | 2469 | .setattr = ext4_setattr, |
2468 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 2470 | #ifdef CONFIG_EXT4_FS_XATTR |
2469 | .setxattr = generic_setxattr, | 2471 | .setxattr = generic_setxattr, |
2470 | .getxattr = generic_getxattr, | 2472 | .getxattr = generic_getxattr, |
2471 | .listxattr = ext4_listxattr, | 2473 | .listxattr = ext4_listxattr, |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 0a9265164265..b6ec1843a015 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -416,8 +416,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
416 | "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", | 416 | "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", |
417 | gdb_num); | 417 | gdb_num); |
418 | 418 | ||
419 | /* | 419 | /* |
420 | * If we are not using the primary superblock/GDT copy don't resize, | 420 | * If we are not using the primary superblock/GDT copy don't resize, |
421 | * because the user tools have no way of handling this. Probably a | 421 | * because the user tools have no way of handling this. Probably a |
422 | * bad time to do it anyways. | 422 | * bad time to do it anyways. |
423 | */ | 423 | */ |
@@ -773,7 +773,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
773 | 773 | ||
774 | if (reserved_gdb || gdb_off == 0) { | 774 | if (reserved_gdb || gdb_off == 0) { |
775 | if (!EXT4_HAS_COMPAT_FEATURE(sb, | 775 | if (!EXT4_HAS_COMPAT_FEATURE(sb, |
776 | EXT4_FEATURE_COMPAT_RESIZE_INODE)){ | 776 | EXT4_FEATURE_COMPAT_RESIZE_INODE) |
777 | || !le16_to_cpu(es->s_reserved_gdt_blocks)) { | ||
777 | ext4_warning(sb, __func__, | 778 | ext4_warning(sb, __func__, |
778 | "No reserved GDT blocks, can't resize"); | 779 | "No reserved GDT blocks, can't resize"); |
779 | return -EPERM; | 780 | return -EPERM; |
@@ -869,11 +870,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
869 | * We can allocate memory for mb_alloc based on the new group | 870 | * We can allocate memory for mb_alloc based on the new group |
870 | * descriptor | 871 | * descriptor |
871 | */ | 872 | */ |
872 | if (test_opt(sb, MBALLOC)) { | 873 | err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); |
873 | err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); | 874 | if (err) |
874 | if (err) | 875 | goto exit_journal; |
875 | goto exit_journal; | 876 | |
876 | } | ||
877 | /* | 877 | /* |
878 | * Make the new blocks and inodes valid next. We do this before | 878 | * Make the new blocks and inodes valid next. We do this before |
879 | * increasing the group count so that once the group is enabled, | 879 | * increasing the group count so that once the group is enabled, |
@@ -928,6 +928,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
928 | percpu_counter_add(&sbi->s_freeinodes_counter, | 928 | percpu_counter_add(&sbi->s_freeinodes_counter, |
929 | EXT4_INODES_PER_GROUP(sb)); | 929 | EXT4_INODES_PER_GROUP(sb)); |
930 | 930 | ||
931 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { | ||
932 | ext4_group_t flex_group; | ||
933 | flex_group = ext4_flex_group(sbi, input->group); | ||
934 | sbi->s_flex_groups[flex_group].free_blocks += | ||
935 | input->free_blocks_count; | ||
936 | sbi->s_flex_groups[flex_group].free_inodes += | ||
937 | EXT4_INODES_PER_GROUP(sb); | ||
938 | } | ||
939 | |||
931 | ext4_journal_dirty_metadata(handle, sbi->s_sbh); | 940 | ext4_journal_dirty_metadata(handle, sbi->s_sbh); |
932 | sb->s_dirt = 1; | 941 | sb->s_dirt = 1; |
933 | 942 | ||
@@ -963,7 +972,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
963 | ext4_group_t o_groups_count; | 972 | ext4_group_t o_groups_count; |
964 | ext4_grpblk_t last; | 973 | ext4_grpblk_t last; |
965 | ext4_grpblk_t add; | 974 | ext4_grpblk_t add; |
966 | struct buffer_head * bh; | 975 | struct buffer_head *bh; |
967 | handle_t *handle; | 976 | handle_t *handle; |
968 | int err; | 977 | int err; |
969 | unsigned long freed_blocks; | 978 | unsigned long freed_blocks; |
@@ -1076,8 +1085,15 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1076 | /* | 1085 | /* |
1077 | * Mark mballoc pages as not up to date so that they will be updated | 1086 | * Mark mballoc pages as not up to date so that they will be updated |
1078 | * next time they are loaded by ext4_mb_load_buddy. | 1087 | * next time they are loaded by ext4_mb_load_buddy. |
1088 | * | ||
1089 | * XXX Bad, Bad, BAD!!! We should not be overloading the | ||
1090 | * Uptodate flag, particularly on thte bitmap bh, as way of | ||
1091 | * hinting to ext4_mb_load_buddy() that it needs to be | ||
1092 | * overloaded. A user could take a LVM snapshot, then do an | ||
1093 | * on-line fsck, and clear the uptodate flag, and this would | ||
1094 | * not be a bug in userspace, but a bug in the kernel. FIXME!!! | ||
1079 | */ | 1095 | */ |
1080 | if (test_opt(sb, MBALLOC)) { | 1096 | { |
1081 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1097 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1082 | struct inode *inode = sbi->s_buddy_cache; | 1098 | struct inode *inode = sbi->s_buddy_cache; |
1083 | int blocks_per_page; | 1099 | int blocks_per_page; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d5d77958b861..dea8f13c2fd9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -34,6 +34,8 @@ | |||
34 | #include <linux/namei.h> | 34 | #include <linux/namei.h> |
35 | #include <linux/quotaops.h> | 35 | #include <linux/quotaops.h> |
36 | #include <linux/seq_file.h> | 36 | #include <linux/seq_file.h> |
37 | #include <linux/proc_fs.h> | ||
38 | #include <linux/marker.h> | ||
37 | #include <linux/log2.h> | 39 | #include <linux/log2.h> |
38 | #include <linux/crc16.h> | 40 | #include <linux/crc16.h> |
39 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
@@ -45,6 +47,8 @@ | |||
45 | #include "namei.h" | 47 | #include "namei.h" |
46 | #include "group.h" | 48 | #include "group.h" |
47 | 49 | ||
50 | struct proc_dir_entry *ext4_proc_root; | ||
51 | |||
48 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 52 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
49 | unsigned long journal_devnum); | 53 | unsigned long journal_devnum); |
50 | static int ext4_create_journal(struct super_block *, struct ext4_super_block *, | 54 | static int ext4_create_journal(struct super_block *, struct ext4_super_block *, |
@@ -503,15 +507,18 @@ static void ext4_put_super(struct super_block *sb) | |||
503 | ext4_mb_release(sb); | 507 | ext4_mb_release(sb); |
504 | ext4_ext_release(sb); | 508 | ext4_ext_release(sb); |
505 | ext4_xattr_put_super(sb); | 509 | ext4_xattr_put_super(sb); |
506 | jbd2_journal_destroy(sbi->s_journal); | 510 | if (jbd2_journal_destroy(sbi->s_journal) < 0) |
511 | ext4_abort(sb, __func__, "Couldn't clean up the journal"); | ||
507 | sbi->s_journal = NULL; | 512 | sbi->s_journal = NULL; |
508 | if (!(sb->s_flags & MS_RDONLY)) { | 513 | if (!(sb->s_flags & MS_RDONLY)) { |
509 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 514 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
510 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 515 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
511 | BUFFER_TRACE(sbi->s_sbh, "marking dirty"); | ||
512 | mark_buffer_dirty(sbi->s_sbh); | ||
513 | ext4_commit_super(sb, es, 1); | 516 | ext4_commit_super(sb, es, 1); |
514 | } | 517 | } |
518 | if (sbi->s_proc) { | ||
519 | remove_proc_entry("inode_readahead_blks", sbi->s_proc); | ||
520 | remove_proc_entry(sb->s_id, ext4_proc_root); | ||
521 | } | ||
515 | 522 | ||
516 | for (i = 0; i < sbi->s_gdb_count; i++) | 523 | for (i = 0; i < sbi->s_gdb_count; i++) |
517 | brelse(sbi->s_group_desc[i]); | 524 | brelse(sbi->s_group_desc[i]); |
@@ -520,6 +527,7 @@ static void ext4_put_super(struct super_block *sb) | |||
520 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 527 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
521 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 528 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
522 | percpu_counter_destroy(&sbi->s_dirs_counter); | 529 | percpu_counter_destroy(&sbi->s_dirs_counter); |
530 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
523 | brelse(sbi->s_sbh); | 531 | brelse(sbi->s_sbh); |
524 | #ifdef CONFIG_QUOTA | 532 | #ifdef CONFIG_QUOTA |
525 | for (i = 0; i < MAXQUOTAS; i++) | 533 | for (i = 0; i < MAXQUOTAS; i++) |
@@ -562,12 +570,12 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
562 | ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); | 570 | ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); |
563 | if (!ei) | 571 | if (!ei) |
564 | return NULL; | 572 | return NULL; |
565 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 573 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
566 | ei->i_acl = EXT4_ACL_NOT_CACHED; | 574 | ei->i_acl = EXT4_ACL_NOT_CACHED; |
567 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; | 575 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; |
568 | #endif | 576 | #endif |
569 | ei->i_block_alloc_info = NULL; | ||
570 | ei->vfs_inode.i_version = 1; | 577 | ei->vfs_inode.i_version = 1; |
578 | ei->vfs_inode.i_data.writeback_index = 0; | ||
571 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); | 579 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); |
572 | INIT_LIST_HEAD(&ei->i_prealloc_list); | 580 | INIT_LIST_HEAD(&ei->i_prealloc_list); |
573 | spin_lock_init(&ei->i_prealloc_lock); | 581 | spin_lock_init(&ei->i_prealloc_lock); |
@@ -598,7 +606,7 @@ static void init_once(void *foo) | |||
598 | struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; | 606 | struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; |
599 | 607 | ||
600 | INIT_LIST_HEAD(&ei->i_orphan); | 608 | INIT_LIST_HEAD(&ei->i_orphan); |
601 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 609 | #ifdef CONFIG_EXT4_FS_XATTR |
602 | init_rwsem(&ei->xattr_sem); | 610 | init_rwsem(&ei->xattr_sem); |
603 | #endif | 611 | #endif |
604 | init_rwsem(&ei->i_data_sem); | 612 | init_rwsem(&ei->i_data_sem); |
@@ -624,8 +632,7 @@ static void destroy_inodecache(void) | |||
624 | 632 | ||
625 | static void ext4_clear_inode(struct inode *inode) | 633 | static void ext4_clear_inode(struct inode *inode) |
626 | { | 634 | { |
627 | struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info; | 635 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
628 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | ||
629 | if (EXT4_I(inode)->i_acl && | 636 | if (EXT4_I(inode)->i_acl && |
630 | EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { | 637 | EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { |
631 | posix_acl_release(EXT4_I(inode)->i_acl); | 638 | posix_acl_release(EXT4_I(inode)->i_acl); |
@@ -637,10 +644,7 @@ static void ext4_clear_inode(struct inode *inode) | |||
637 | EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; | 644 | EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; |
638 | } | 645 | } |
639 | #endif | 646 | #endif |
640 | ext4_discard_reservation(inode); | 647 | ext4_discard_preallocations(inode); |
641 | EXT4_I(inode)->i_block_alloc_info = NULL; | ||
642 | if (unlikely(rsv)) | ||
643 | kfree(rsv); | ||
644 | jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, | 648 | jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, |
645 | &EXT4_I(inode)->jinode); | 649 | &EXT4_I(inode)->jinode); |
646 | } | 650 | } |
@@ -653,7 +657,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq, | |||
653 | 657 | ||
654 | if (sbi->s_jquota_fmt) | 658 | if (sbi->s_jquota_fmt) |
655 | seq_printf(seq, ",jqfmt=%s", | 659 | seq_printf(seq, ",jqfmt=%s", |
656 | (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); | 660 | (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0"); |
657 | 661 | ||
658 | if (sbi->s_qf_names[USRQUOTA]) | 662 | if (sbi->s_qf_names[USRQUOTA]) |
659 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); | 663 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); |
@@ -717,7 +721,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
717 | seq_puts(seq, ",debug"); | 721 | seq_puts(seq, ",debug"); |
718 | if (test_opt(sb, OLDALLOC)) | 722 | if (test_opt(sb, OLDALLOC)) |
719 | seq_puts(seq, ",oldalloc"); | 723 | seq_puts(seq, ",oldalloc"); |
720 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 724 | #ifdef CONFIG_EXT4_FS_XATTR |
721 | if (test_opt(sb, XATTR_USER) && | 725 | if (test_opt(sb, XATTR_USER) && |
722 | !(def_mount_opts & EXT4_DEFM_XATTR_USER)) | 726 | !(def_mount_opts & EXT4_DEFM_XATTR_USER)) |
723 | seq_puts(seq, ",user_xattr"); | 727 | seq_puts(seq, ",user_xattr"); |
@@ -726,7 +730,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
726 | seq_puts(seq, ",nouser_xattr"); | 730 | seq_puts(seq, ",nouser_xattr"); |
727 | } | 731 | } |
728 | #endif | 732 | #endif |
729 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 733 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
730 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) | 734 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) |
731 | seq_puts(seq, ",acl"); | 735 | seq_puts(seq, ",acl"); |
732 | if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) | 736 | if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) |
@@ -751,8 +755,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
751 | seq_puts(seq, ",nobh"); | 755 | seq_puts(seq, ",nobh"); |
752 | if (!test_opt(sb, EXTENTS)) | 756 | if (!test_opt(sb, EXTENTS)) |
753 | seq_puts(seq, ",noextents"); | 757 | seq_puts(seq, ",noextents"); |
754 | if (!test_opt(sb, MBALLOC)) | ||
755 | seq_puts(seq, ",nomballoc"); | ||
756 | if (test_opt(sb, I_VERSION)) | 758 | if (test_opt(sb, I_VERSION)) |
757 | seq_puts(seq, ",i_version"); | 759 | seq_puts(seq, ",i_version"); |
758 | if (!test_opt(sb, DELALLOC)) | 760 | if (!test_opt(sb, DELALLOC)) |
@@ -772,6 +774,13 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
772 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | 774 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) |
773 | seq_puts(seq, ",data=writeback"); | 775 | seq_puts(seq, ",data=writeback"); |
774 | 776 | ||
777 | if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) | ||
778 | seq_printf(seq, ",inode_readahead_blks=%u", | ||
779 | sbi->s_inode_readahead_blks); | ||
780 | |||
781 | if (test_opt(sb, DATA_ERR_ABORT)) | ||
782 | seq_puts(seq, ",data_err=abort"); | ||
783 | |||
775 | ext4_show_quota_options(seq, sb); | 784 | ext4_show_quota_options(seq, sb); |
776 | return 0; | 785 | return 0; |
777 | } | 786 | } |
@@ -821,7 +830,7 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
821 | } | 830 | } |
822 | 831 | ||
823 | #ifdef CONFIG_QUOTA | 832 | #ifdef CONFIG_QUOTA |
824 | #define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group") | 833 | #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") |
825 | #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) | 834 | #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) |
826 | 835 | ||
827 | static int ext4_dquot_initialize(struct inode *inode, int type); | 836 | static int ext4_dquot_initialize(struct inode *inode, int type); |
@@ -901,14 +910,16 @@ enum { | |||
901 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, | 910 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, |
902 | Opt_journal_checksum, Opt_journal_async_commit, | 911 | Opt_journal_checksum, Opt_journal_async_commit, |
903 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 912 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
913 | Opt_data_err_abort, Opt_data_err_ignore, | ||
904 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 914 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
905 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | 915 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, |
906 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, | 916 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, |
907 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, | 917 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, |
908 | Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, | 918 | Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, |
919 | Opt_inode_readahead_blks | ||
909 | }; | 920 | }; |
910 | 921 | ||
911 | static match_table_t tokens = { | 922 | static const match_table_t tokens = { |
912 | {Opt_bsd_df, "bsddf"}, | 923 | {Opt_bsd_df, "bsddf"}, |
913 | {Opt_minix_df, "minixdf"}, | 924 | {Opt_minix_df, "minixdf"}, |
914 | {Opt_grpid, "grpid"}, | 925 | {Opt_grpid, "grpid"}, |
@@ -946,6 +957,8 @@ static match_table_t tokens = { | |||
946 | {Opt_data_journal, "data=journal"}, | 957 | {Opt_data_journal, "data=journal"}, |
947 | {Opt_data_ordered, "data=ordered"}, | 958 | {Opt_data_ordered, "data=ordered"}, |
948 | {Opt_data_writeback, "data=writeback"}, | 959 | {Opt_data_writeback, "data=writeback"}, |
960 | {Opt_data_err_abort, "data_err=abort"}, | ||
961 | {Opt_data_err_ignore, "data_err=ignore"}, | ||
949 | {Opt_offusrjquota, "usrjquota="}, | 962 | {Opt_offusrjquota, "usrjquota="}, |
950 | {Opt_usrjquota, "usrjquota=%s"}, | 963 | {Opt_usrjquota, "usrjquota=%s"}, |
951 | {Opt_offgrpjquota, "grpjquota="}, | 964 | {Opt_offgrpjquota, "grpjquota="}, |
@@ -966,6 +979,7 @@ static match_table_t tokens = { | |||
966 | {Opt_resize, "resize"}, | 979 | {Opt_resize, "resize"}, |
967 | {Opt_delalloc, "delalloc"}, | 980 | {Opt_delalloc, "delalloc"}, |
968 | {Opt_nodelalloc, "nodelalloc"}, | 981 | {Opt_nodelalloc, "nodelalloc"}, |
982 | {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, | ||
969 | {Opt_err, NULL}, | 983 | {Opt_err, NULL}, |
970 | }; | 984 | }; |
971 | 985 | ||
@@ -980,7 +994,7 @@ static ext4_fsblk_t get_sb_block(void **data) | |||
980 | /*todo: use simple_strtoll with >32bit ext4 */ | 994 | /*todo: use simple_strtoll with >32bit ext4 */ |
981 | sb_block = simple_strtoul(options, &options, 0); | 995 | sb_block = simple_strtoul(options, &options, 0); |
982 | if (*options && *options != ',') { | 996 | if (*options && *options != ',') { |
983 | printk("EXT4-fs: Invalid sb specification: %s\n", | 997 | printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", |
984 | (char *) *data); | 998 | (char *) *data); |
985 | return 1; | 999 | return 1; |
986 | } | 1000 | } |
@@ -1071,7 +1085,7 @@ static int parse_options(char *options, struct super_block *sb, | |||
1071 | case Opt_orlov: | 1085 | case Opt_orlov: |
1072 | clear_opt(sbi->s_mount_opt, OLDALLOC); | 1086 | clear_opt(sbi->s_mount_opt, OLDALLOC); |
1073 | break; | 1087 | break; |
1074 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 1088 | #ifdef CONFIG_EXT4_FS_XATTR |
1075 | case Opt_user_xattr: | 1089 | case Opt_user_xattr: |
1076 | set_opt(sbi->s_mount_opt, XATTR_USER); | 1090 | set_opt(sbi->s_mount_opt, XATTR_USER); |
1077 | break; | 1091 | break; |
@@ -1081,10 +1095,11 @@ static int parse_options(char *options, struct super_block *sb, | |||
1081 | #else | 1095 | #else |
1082 | case Opt_user_xattr: | 1096 | case Opt_user_xattr: |
1083 | case Opt_nouser_xattr: | 1097 | case Opt_nouser_xattr: |
1084 | printk("EXT4 (no)user_xattr options not supported\n"); | 1098 | printk(KERN_ERR "EXT4 (no)user_xattr options " |
1099 | "not supported\n"); | ||
1085 | break; | 1100 | break; |
1086 | #endif | 1101 | #endif |
1087 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 1102 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
1088 | case Opt_acl: | 1103 | case Opt_acl: |
1089 | set_opt(sbi->s_mount_opt, POSIX_ACL); | 1104 | set_opt(sbi->s_mount_opt, POSIX_ACL); |
1090 | break; | 1105 | break; |
@@ -1094,7 +1109,8 @@ static int parse_options(char *options, struct super_block *sb, | |||
1094 | #else | 1109 | #else |
1095 | case Opt_acl: | 1110 | case Opt_acl: |
1096 | case Opt_noacl: | 1111 | case Opt_noacl: |
1097 | printk("EXT4 (no)acl options not supported\n"); | 1112 | printk(KERN_ERR "EXT4 (no)acl options " |
1113 | "not supported\n"); | ||
1098 | break; | 1114 | break; |
1099 | #endif | 1115 | #endif |
1100 | case Opt_reservation: | 1116 | case Opt_reservation: |
@@ -1177,6 +1193,12 @@ static int parse_options(char *options, struct super_block *sb, | |||
1177 | sbi->s_mount_opt |= data_opt; | 1193 | sbi->s_mount_opt |= data_opt; |
1178 | } | 1194 | } |
1179 | break; | 1195 | break; |
1196 | case Opt_data_err_abort: | ||
1197 | set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); | ||
1198 | break; | ||
1199 | case Opt_data_err_ignore: | ||
1200 | clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); | ||
1201 | break; | ||
1180 | #ifdef CONFIG_QUOTA | 1202 | #ifdef CONFIG_QUOTA |
1181 | case Opt_usrjquota: | 1203 | case Opt_usrjquota: |
1182 | qtype = USRQUOTA; | 1204 | qtype = USRQUOTA; |
@@ -1188,8 +1210,8 @@ set_qf_name: | |||
1188 | sb_any_quota_suspended(sb)) && | 1210 | sb_any_quota_suspended(sb)) && |
1189 | !sbi->s_qf_names[qtype]) { | 1211 | !sbi->s_qf_names[qtype]) { |
1190 | printk(KERN_ERR | 1212 | printk(KERN_ERR |
1191 | "EXT4-fs: Cannot change journaled " | 1213 | "EXT4-fs: Cannot change journaled " |
1192 | "quota options when quota turned on.\n"); | 1214 | "quota options when quota turned on.\n"); |
1193 | return 0; | 1215 | return 0; |
1194 | } | 1216 | } |
1195 | qname = match_strdup(&args[0]); | 1217 | qname = match_strdup(&args[0]); |
@@ -1356,12 +1378,6 @@ set_qf_format: | |||
1356 | case Opt_nodelalloc: | 1378 | case Opt_nodelalloc: |
1357 | clear_opt(sbi->s_mount_opt, DELALLOC); | 1379 | clear_opt(sbi->s_mount_opt, DELALLOC); |
1358 | break; | 1380 | break; |
1359 | case Opt_mballoc: | ||
1360 | set_opt(sbi->s_mount_opt, MBALLOC); | ||
1361 | break; | ||
1362 | case Opt_nomballoc: | ||
1363 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
1364 | break; | ||
1365 | case Opt_stripe: | 1381 | case Opt_stripe: |
1366 | if (match_int(&args[0], &option)) | 1382 | if (match_int(&args[0], &option)) |
1367 | return 0; | 1383 | return 0; |
@@ -1372,6 +1388,13 @@ set_qf_format: | |||
1372 | case Opt_delalloc: | 1388 | case Opt_delalloc: |
1373 | set_opt(sbi->s_mount_opt, DELALLOC); | 1389 | set_opt(sbi->s_mount_opt, DELALLOC); |
1374 | break; | 1390 | break; |
1391 | case Opt_inode_readahead_blks: | ||
1392 | if (match_int(&args[0], &option)) | ||
1393 | return 0; | ||
1394 | if (option < 0 || option > (1 << 30)) | ||
1395 | return 0; | ||
1396 | sbi->s_inode_readahead_blks = option; | ||
1397 | break; | ||
1375 | default: | 1398 | default: |
1376 | printk(KERN_ERR | 1399 | printk(KERN_ERR |
1377 | "EXT4-fs: Unrecognized mount option \"%s\" " | 1400 | "EXT4-fs: Unrecognized mount option \"%s\" " |
@@ -1472,15 +1495,9 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
1472 | EXT4_INODES_PER_GROUP(sb), | 1495 | EXT4_INODES_PER_GROUP(sb), |
1473 | sbi->s_mount_opt); | 1496 | sbi->s_mount_opt); |
1474 | 1497 | ||
1475 | printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id); | 1498 | printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", |
1476 | if (EXT4_SB(sb)->s_journal->j_inode == NULL) { | 1499 | sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : |
1477 | char b[BDEVNAME_SIZE]; | 1500 | "external", EXT4_SB(sb)->s_journal->j_devname); |
1478 | |||
1479 | printk("external journal on %s\n", | ||
1480 | bdevname(EXT4_SB(sb)->s_journal->j_dev, b)); | ||
1481 | } else { | ||
1482 | printk("internal journal\n"); | ||
1483 | } | ||
1484 | return res; | 1501 | return res; |
1485 | } | 1502 | } |
1486 | 1503 | ||
@@ -1503,8 +1520,11 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1503 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; | 1520 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; |
1504 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; | 1521 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; |
1505 | 1522 | ||
1506 | flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) / | 1523 | /* We allocate both existing and potentially added groups */ |
1507 | groups_per_flex; | 1524 | flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + |
1525 | ((sbi->s_es->s_reserved_gdt_blocks +1 ) << | ||
1526 | EXT4_DESC_PER_BLOCK_BITS(sb))) / | ||
1527 | groups_per_flex; | ||
1508 | sbi->s_flex_groups = kzalloc(flex_group_count * | 1528 | sbi->s_flex_groups = kzalloc(flex_group_count * |
1509 | sizeof(struct flex_groups), GFP_KERNEL); | 1529 | sizeof(struct flex_groups), GFP_KERNEL); |
1510 | if (sbi->s_flex_groups == NULL) { | 1530 | if (sbi->s_flex_groups == NULL) { |
@@ -1583,7 +1603,7 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1583 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) | 1603 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) |
1584 | flexbg_flag = 1; | 1604 | flexbg_flag = 1; |
1585 | 1605 | ||
1586 | ext4_debug ("Checking group descriptors"); | 1606 | ext4_debug("Checking group descriptors"); |
1587 | 1607 | ||
1588 | for (i = 0; i < sbi->s_groups_count; i++) { | 1608 | for (i = 0; i < sbi->s_groups_count; i++) { |
1589 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); | 1609 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); |
@@ -1622,8 +1642,10 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1622 | "Checksum for group %lu failed (%u!=%u)\n", | 1642 | "Checksum for group %lu failed (%u!=%u)\n", |
1623 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, | 1643 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, |
1624 | gdp)), le16_to_cpu(gdp->bg_checksum)); | 1644 | gdp)), le16_to_cpu(gdp->bg_checksum)); |
1625 | if (!(sb->s_flags & MS_RDONLY)) | 1645 | if (!(sb->s_flags & MS_RDONLY)) { |
1646 | spin_unlock(sb_bgl_lock(sbi, i)); | ||
1626 | return 0; | 1647 | return 0; |
1648 | } | ||
1627 | } | 1649 | } |
1628 | spin_unlock(sb_bgl_lock(sbi, i)); | 1650 | spin_unlock(sb_bgl_lock(sbi, i)); |
1629 | if (!flexbg_flag) | 1651 | if (!flexbg_flag) |
@@ -1713,9 +1735,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1713 | DQUOT_INIT(inode); | 1735 | DQUOT_INIT(inode); |
1714 | if (inode->i_nlink) { | 1736 | if (inode->i_nlink) { |
1715 | printk(KERN_DEBUG | 1737 | printk(KERN_DEBUG |
1716 | "%s: truncating inode %lu to %Ld bytes\n", | 1738 | "%s: truncating inode %lu to %lld bytes\n", |
1717 | __func__, inode->i_ino, inode->i_size); | 1739 | __func__, inode->i_ino, inode->i_size); |
1718 | jbd_debug(2, "truncating inode %lu to %Ld bytes\n", | 1740 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", |
1719 | inode->i_ino, inode->i_size); | 1741 | inode->i_ino, inode->i_size); |
1720 | ext4_truncate(inode); | 1742 | ext4_truncate(inode); |
1721 | nr_truncates++; | 1743 | nr_truncates++; |
@@ -1913,6 +1935,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
1913 | unsigned long journal_devnum = 0; | 1935 | unsigned long journal_devnum = 0; |
1914 | unsigned long def_mount_opts; | 1936 | unsigned long def_mount_opts; |
1915 | struct inode *root; | 1937 | struct inode *root; |
1938 | char *cp; | ||
1916 | int ret = -EINVAL; | 1939 | int ret = -EINVAL; |
1917 | int blocksize; | 1940 | int blocksize; |
1918 | int db_count; | 1941 | int db_count; |
@@ -1929,10 +1952,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
1929 | sbi->s_mount_opt = 0; | 1952 | sbi->s_mount_opt = 0; |
1930 | sbi->s_resuid = EXT4_DEF_RESUID; | 1953 | sbi->s_resuid = EXT4_DEF_RESUID; |
1931 | sbi->s_resgid = EXT4_DEF_RESGID; | 1954 | sbi->s_resgid = EXT4_DEF_RESGID; |
1955 | sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; | ||
1932 | sbi->s_sb_block = sb_block; | 1956 | sbi->s_sb_block = sb_block; |
1933 | 1957 | ||
1934 | unlock_kernel(); | 1958 | unlock_kernel(); |
1935 | 1959 | ||
1960 | /* Cleanup superblock name */ | ||
1961 | for (cp = sb->s_id; (cp = strchr(cp, '/'));) | ||
1962 | *cp = '!'; | ||
1963 | |||
1936 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); | 1964 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); |
1937 | if (!blocksize) { | 1965 | if (!blocksize) { |
1938 | printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); | 1966 | printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); |
@@ -1972,11 +2000,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
1972 | set_opt(sbi->s_mount_opt, GRPID); | 2000 | set_opt(sbi->s_mount_opt, GRPID); |
1973 | if (def_mount_opts & EXT4_DEFM_UID16) | 2001 | if (def_mount_opts & EXT4_DEFM_UID16) |
1974 | set_opt(sbi->s_mount_opt, NO_UID32); | 2002 | set_opt(sbi->s_mount_opt, NO_UID32); |
1975 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 2003 | #ifdef CONFIG_EXT4_FS_XATTR |
1976 | if (def_mount_opts & EXT4_DEFM_XATTR_USER) | 2004 | if (def_mount_opts & EXT4_DEFM_XATTR_USER) |
1977 | set_opt(sbi->s_mount_opt, XATTR_USER); | 2005 | set_opt(sbi->s_mount_opt, XATTR_USER); |
1978 | #endif | 2006 | #endif |
1979 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 2007 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
1980 | if (def_mount_opts & EXT4_DEFM_ACL) | 2008 | if (def_mount_opts & EXT4_DEFM_ACL) |
1981 | set_opt(sbi->s_mount_opt, POSIX_ACL); | 2009 | set_opt(sbi->s_mount_opt, POSIX_ACL); |
1982 | #endif | 2010 | #endif |
@@ -2011,11 +2039,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2011 | ext4_warning(sb, __func__, | 2039 | ext4_warning(sb, __func__, |
2012 | "extents feature not enabled on this filesystem, " | 2040 | "extents feature not enabled on this filesystem, " |
2013 | "use tune2fs.\n"); | 2041 | "use tune2fs.\n"); |
2014 | /* | ||
2015 | * turn on mballoc code by default in ext4 filesystem | ||
2016 | * Use -o nomballoc to turn it off | ||
2017 | */ | ||
2018 | set_opt(sbi->s_mount_opt, MBALLOC); | ||
2019 | 2042 | ||
2020 | /* | 2043 | /* |
2021 | * enable delayed allocation by default | 2044 | * enable delayed allocation by default |
@@ -2040,16 +2063,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2040 | "running e2fsck is recommended\n"); | 2063 | "running e2fsck is recommended\n"); |
2041 | 2064 | ||
2042 | /* | 2065 | /* |
2043 | * Since ext4 is still considered development code, we require | ||
2044 | * that the TEST_FILESYS flag in s->flags be set. | ||
2045 | */ | ||
2046 | if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) { | ||
2047 | printk(KERN_WARNING "EXT4-fs: %s: not marked " | ||
2048 | "OK to use with test code.\n", sb->s_id); | ||
2049 | goto failed_mount; | ||
2050 | } | ||
2051 | |||
2052 | /* | ||
2053 | * Check feature flags regardless of the revision level, since we | 2066 | * Check feature flags regardless of the revision level, since we |
2054 | * previously didn't change the revision level when setting the flags, | 2067 | * previously didn't change the revision level when setting the flags, |
2055 | * so there is a chance incompat flags are set on a rev 0 filesystem. | 2068 | * so there is a chance incompat flags are set on a rev 0 filesystem. |
@@ -2218,6 +2231,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2218 | goto failed_mount; | 2231 | goto failed_mount; |
2219 | } | 2232 | } |
2220 | 2233 | ||
2234 | #ifdef CONFIG_PROC_FS | ||
2235 | if (ext4_proc_root) | ||
2236 | sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); | ||
2237 | |||
2238 | if (sbi->s_proc) | ||
2239 | proc_create_data("inode_readahead_blks", 0644, sbi->s_proc, | ||
2240 | &ext4_ui_proc_fops, | ||
2241 | &sbi->s_inode_readahead_blks); | ||
2242 | #endif | ||
2243 | |||
2221 | bgl_lock_init(&sbi->s_blockgroup_lock); | 2244 | bgl_lock_init(&sbi->s_blockgroup_lock); |
2222 | 2245 | ||
2223 | for (i = 0; i < db_count; i++) { | 2246 | for (i = 0; i < db_count; i++) { |
@@ -2256,24 +2279,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2256 | err = percpu_counter_init(&sbi->s_dirs_counter, | 2279 | err = percpu_counter_init(&sbi->s_dirs_counter, |
2257 | ext4_count_dirs(sb)); | 2280 | ext4_count_dirs(sb)); |
2258 | } | 2281 | } |
2282 | if (!err) { | ||
2283 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | ||
2284 | } | ||
2259 | if (err) { | 2285 | if (err) { |
2260 | printk(KERN_ERR "EXT4-fs: insufficient memory\n"); | 2286 | printk(KERN_ERR "EXT4-fs: insufficient memory\n"); |
2261 | goto failed_mount3; | 2287 | goto failed_mount3; |
2262 | } | 2288 | } |
2263 | 2289 | ||
2264 | /* per fileystem reservation list head & lock */ | ||
2265 | spin_lock_init(&sbi->s_rsv_window_lock); | ||
2266 | sbi->s_rsv_window_root = RB_ROOT; | ||
2267 | /* Add a single, static dummy reservation to the start of the | ||
2268 | * reservation window list --- it gives us a placeholder for | ||
2269 | * append-at-start-of-list which makes the allocation logic | ||
2270 | * _much_ simpler. */ | ||
2271 | sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
2272 | sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
2273 | sbi->s_rsv_window_head.rsv_alloc_hit = 0; | ||
2274 | sbi->s_rsv_window_head.rsv_goal_size = 0; | ||
2275 | ext4_rsv_window_add(sb, &sbi->s_rsv_window_head); | ||
2276 | |||
2277 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 2290 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
2278 | 2291 | ||
2279 | /* | 2292 | /* |
@@ -2470,7 +2483,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2470 | printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); | 2483 | printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); |
2471 | 2484 | ||
2472 | ext4_ext_init(sb); | 2485 | ext4_ext_init(sb); |
2473 | ext4_mb_init(sb, needs_recovery); | 2486 | err = ext4_mb_init(sb, needs_recovery); |
2487 | if (err) { | ||
2488 | printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", | ||
2489 | err); | ||
2490 | goto failed_mount4; | ||
2491 | } | ||
2474 | 2492 | ||
2475 | lock_kernel(); | 2493 | lock_kernel(); |
2476 | return 0; | 2494 | return 0; |
@@ -2488,11 +2506,16 @@ failed_mount3: | |||
2488 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 2506 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
2489 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 2507 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
2490 | percpu_counter_destroy(&sbi->s_dirs_counter); | 2508 | percpu_counter_destroy(&sbi->s_dirs_counter); |
2509 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
2491 | failed_mount2: | 2510 | failed_mount2: |
2492 | for (i = 0; i < db_count; i++) | 2511 | for (i = 0; i < db_count; i++) |
2493 | brelse(sbi->s_group_desc[i]); | 2512 | brelse(sbi->s_group_desc[i]); |
2494 | kfree(sbi->s_group_desc); | 2513 | kfree(sbi->s_group_desc); |
2495 | failed_mount: | 2514 | failed_mount: |
2515 | if (sbi->s_proc) { | ||
2516 | remove_proc_entry("inode_readahead_blks", sbi->s_proc); | ||
2517 | remove_proc_entry(sb->s_id, ext4_proc_root); | ||
2518 | } | ||
2496 | #ifdef CONFIG_QUOTA | 2519 | #ifdef CONFIG_QUOTA |
2497 | for (i = 0; i < MAXQUOTAS; i++) | 2520 | for (i = 0; i < MAXQUOTAS; i++) |
2498 | kfree(sbi->s_qf_names[i]); | 2521 | kfree(sbi->s_qf_names[i]); |
@@ -2526,6 +2549,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) | |||
2526 | journal->j_flags |= JBD2_BARRIER; | 2549 | journal->j_flags |= JBD2_BARRIER; |
2527 | else | 2550 | else |
2528 | journal->j_flags &= ~JBD2_BARRIER; | 2551 | journal->j_flags &= ~JBD2_BARRIER; |
2552 | if (test_opt(sb, DATA_ERR_ABORT)) | ||
2553 | journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; | ||
2554 | else | ||
2555 | journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; | ||
2529 | spin_unlock(&journal->j_state_lock); | 2556 | spin_unlock(&journal->j_state_lock); |
2530 | } | 2557 | } |
2531 | 2558 | ||
@@ -2551,7 +2578,7 @@ static journal_t *ext4_get_journal(struct super_block *sb, | |||
2551 | return NULL; | 2578 | return NULL; |
2552 | } | 2579 | } |
2553 | 2580 | ||
2554 | jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", | 2581 | jbd_debug(2, "Journal inode found at %p: %lld bytes\n", |
2555 | journal_inode, journal_inode->i_size); | 2582 | journal_inode, journal_inode->i_size); |
2556 | if (!S_ISREG(journal_inode->i_mode)) { | 2583 | if (!S_ISREG(journal_inode->i_mode)) { |
2557 | printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); | 2584 | printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); |
@@ -2714,6 +2741,11 @@ static int ext4_load_journal(struct super_block *sb, | |||
2714 | return -EINVAL; | 2741 | return -EINVAL; |
2715 | } | 2742 | } |
2716 | 2743 | ||
2744 | if (journal->j_flags & JBD2_BARRIER) | ||
2745 | printk(KERN_INFO "EXT4-fs: barriers enabled\n"); | ||
2746 | else | ||
2747 | printk(KERN_INFO "EXT4-fs: barriers disabled\n"); | ||
2748 | |||
2717 | if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { | 2749 | if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { |
2718 | err = jbd2_journal_update_format(journal); | 2750 | err = jbd2_journal_update_format(journal); |
2719 | if (err) { | 2751 | if (err) { |
@@ -2798,13 +2830,34 @@ static void ext4_commit_super(struct super_block *sb, | |||
2798 | 2830 | ||
2799 | if (!sbh) | 2831 | if (!sbh) |
2800 | return; | 2832 | return; |
2833 | if (buffer_write_io_error(sbh)) { | ||
2834 | /* | ||
2835 | * Oh, dear. A previous attempt to write the | ||
2836 | * superblock failed. This could happen because the | ||
2837 | * USB device was yanked out. Or it could happen to | ||
2838 | * be a transient write error and maybe the block will | ||
2839 | * be remapped. Nothing we can do but to retry the | ||
2840 | * write and hope for the best. | ||
2841 | */ | ||
2842 | printk(KERN_ERR "ext4: previous I/O error to " | ||
2843 | "superblock detected for %s.\n", sb->s_id); | ||
2844 | clear_buffer_write_io_error(sbh); | ||
2845 | set_buffer_uptodate(sbh); | ||
2846 | } | ||
2801 | es->s_wtime = cpu_to_le32(get_seconds()); | 2847 | es->s_wtime = cpu_to_le32(get_seconds()); |
2802 | ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); | 2848 | ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); |
2803 | es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); | 2849 | es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); |
2804 | BUFFER_TRACE(sbh, "marking dirty"); | 2850 | BUFFER_TRACE(sbh, "marking dirty"); |
2805 | mark_buffer_dirty(sbh); | 2851 | mark_buffer_dirty(sbh); |
2806 | if (sync) | 2852 | if (sync) { |
2807 | sync_dirty_buffer(sbh); | 2853 | sync_dirty_buffer(sbh); |
2854 | if (buffer_write_io_error(sbh)) { | ||
2855 | printk(KERN_ERR "ext4: I/O error while writing " | ||
2856 | "superblock for %s.\n", sb->s_id); | ||
2857 | clear_buffer_write_io_error(sbh); | ||
2858 | set_buffer_uptodate(sbh); | ||
2859 | } | ||
2860 | } | ||
2808 | } | 2861 | } |
2809 | 2862 | ||
2810 | 2863 | ||
@@ -2819,7 +2872,9 @@ static void ext4_mark_recovery_complete(struct super_block *sb, | |||
2819 | journal_t *journal = EXT4_SB(sb)->s_journal; | 2872 | journal_t *journal = EXT4_SB(sb)->s_journal; |
2820 | 2873 | ||
2821 | jbd2_journal_lock_updates(journal); | 2874 | jbd2_journal_lock_updates(journal); |
2822 | jbd2_journal_flush(journal); | 2875 | if (jbd2_journal_flush(journal) < 0) |
2876 | goto out; | ||
2877 | |||
2823 | lock_super(sb); | 2878 | lock_super(sb); |
2824 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && | 2879 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && |
2825 | sb->s_flags & MS_RDONLY) { | 2880 | sb->s_flags & MS_RDONLY) { |
@@ -2828,6 +2883,8 @@ static void ext4_mark_recovery_complete(struct super_block *sb, | |||
2828 | ext4_commit_super(sb, es, 1); | 2883 | ext4_commit_super(sb, es, 1); |
2829 | } | 2884 | } |
2830 | unlock_super(sb); | 2885 | unlock_super(sb); |
2886 | |||
2887 | out: | ||
2831 | jbd2_journal_unlock_updates(journal); | 2888 | jbd2_journal_unlock_updates(journal); |
2832 | } | 2889 | } |
2833 | 2890 | ||
@@ -2906,6 +2963,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
2906 | { | 2963 | { |
2907 | tid_t target; | 2964 | tid_t target; |
2908 | 2965 | ||
2966 | trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); | ||
2909 | sb->s_dirt = 0; | 2967 | sb->s_dirt = 0; |
2910 | if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { | 2968 | if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { |
2911 | if (wait) | 2969 | if (wait) |
@@ -2927,7 +2985,13 @@ static void ext4_write_super_lockfs(struct super_block *sb) | |||
2927 | 2985 | ||
2928 | /* Now we set up the journal barrier. */ | 2986 | /* Now we set up the journal barrier. */ |
2929 | jbd2_journal_lock_updates(journal); | 2987 | jbd2_journal_lock_updates(journal); |
2930 | jbd2_journal_flush(journal); | 2988 | |
2989 | /* | ||
2990 | * We don't want to clear needs_recovery flag when we failed | ||
2991 | * to flush the journal. | ||
2992 | */ | ||
2993 | if (jbd2_journal_flush(journal) < 0) | ||
2994 | return; | ||
2931 | 2995 | ||
2932 | /* Journal blocked and flushed, clear needs_recovery flag. */ | 2996 | /* Journal blocked and flushed, clear needs_recovery flag. */ |
2933 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 2997 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
@@ -3161,7 +3225,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
3161 | buf->f_type = EXT4_SUPER_MAGIC; | 3225 | buf->f_type = EXT4_SUPER_MAGIC; |
3162 | buf->f_bsize = sb->s_blocksize; | 3226 | buf->f_bsize = sb->s_blocksize; |
3163 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; | 3227 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; |
3164 | buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); | 3228 | buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - |
3229 | percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); | ||
3165 | ext4_free_blocks_count_set(es, buf->f_bfree); | 3230 | ext4_free_blocks_count_set(es, buf->f_bfree); |
3166 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); | 3231 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); |
3167 | if (buf->f_bfree < ext4_r_blocks_count(es)) | 3232 | if (buf->f_bfree < ext4_r_blocks_count(es)) |
@@ -3366,8 +3431,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, | |||
3366 | * otherwise be livelocked... | 3431 | * otherwise be livelocked... |
3367 | */ | 3432 | */ |
3368 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 3433 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
3369 | jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 3434 | err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
3370 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 3435 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
3436 | if (err) { | ||
3437 | path_put(&nd.path); | ||
3438 | return err; | ||
3439 | } | ||
3371 | } | 3440 | } |
3372 | 3441 | ||
3373 | err = vfs_quota_on_path(sb, type, format_id, &nd.path); | 3442 | err = vfs_quota_on_path(sb, type, format_id, &nd.path); |
@@ -3431,7 +3500,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
3431 | handle_t *handle = journal_current_handle(); | 3500 | handle_t *handle = journal_current_handle(); |
3432 | 3501 | ||
3433 | if (!handle) { | 3502 | if (!handle) { |
3434 | printk(KERN_WARNING "EXT4-fs: Quota write (off=%Lu, len=%Lu)" | 3503 | printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" |
3435 | " cancelled because transaction is not started.\n", | 3504 | " cancelled because transaction is not started.\n", |
3436 | (unsigned long long)off, (unsigned long long)len); | 3505 | (unsigned long long)off, (unsigned long long)len); |
3437 | return -EIO; | 3506 | return -EIO; |
@@ -3492,18 +3561,82 @@ static int ext4_get_sb(struct file_system_type *fs_type, | |||
3492 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); | 3561 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); |
3493 | } | 3562 | } |
3494 | 3563 | ||
3564 | #ifdef CONFIG_PROC_FS | ||
3565 | static int ext4_ui_proc_show(struct seq_file *m, void *v) | ||
3566 | { | ||
3567 | unsigned int *p = m->private; | ||
3568 | |||
3569 | seq_printf(m, "%u\n", *p); | ||
3570 | return 0; | ||
3571 | } | ||
3572 | |||
3573 | static int ext4_ui_proc_open(struct inode *inode, struct file *file) | ||
3574 | { | ||
3575 | return single_open(file, ext4_ui_proc_show, PDE(inode)->data); | ||
3576 | } | ||
3577 | |||
3578 | static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf, | ||
3579 | size_t cnt, loff_t *ppos) | ||
3580 | { | ||
3581 | unsigned int *p = PDE(file->f_path.dentry->d_inode)->data; | ||
3582 | char str[32]; | ||
3583 | unsigned long value; | ||
3584 | |||
3585 | if (cnt >= sizeof(str)) | ||
3586 | return -EINVAL; | ||
3587 | if (copy_from_user(str, buf, cnt)) | ||
3588 | return -EFAULT; | ||
3589 | value = simple_strtol(str, NULL, 0); | ||
3590 | if (value < 0) | ||
3591 | return -ERANGE; | ||
3592 | *p = value; | ||
3593 | return cnt; | ||
3594 | } | ||
3595 | |||
3596 | const struct file_operations ext4_ui_proc_fops = { | ||
3597 | .owner = THIS_MODULE, | ||
3598 | .open = ext4_ui_proc_open, | ||
3599 | .read = seq_read, | ||
3600 | .llseek = seq_lseek, | ||
3601 | .release = single_release, | ||
3602 | .write = ext4_ui_proc_write, | ||
3603 | }; | ||
3604 | #endif | ||
3605 | |||
3606 | static struct file_system_type ext4_fs_type = { | ||
3607 | .owner = THIS_MODULE, | ||
3608 | .name = "ext4", | ||
3609 | .get_sb = ext4_get_sb, | ||
3610 | .kill_sb = kill_block_super, | ||
3611 | .fs_flags = FS_REQUIRES_DEV, | ||
3612 | }; | ||
3613 | |||
3614 | #ifdef CONFIG_EXT4DEV_COMPAT | ||
3615 | static int ext4dev_get_sb(struct file_system_type *fs_type, | ||
3616 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | ||
3617 | { | ||
3618 | printk(KERN_WARNING "EXT4-fs: Update your userspace programs " | ||
3619 | "to mount using ext4\n"); | ||
3620 | printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility " | ||
3621 | "will go away by 2.6.31\n"); | ||
3622 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); | ||
3623 | } | ||
3624 | |||
3495 | static struct file_system_type ext4dev_fs_type = { | 3625 | static struct file_system_type ext4dev_fs_type = { |
3496 | .owner = THIS_MODULE, | 3626 | .owner = THIS_MODULE, |
3497 | .name = "ext4dev", | 3627 | .name = "ext4dev", |
3498 | .get_sb = ext4_get_sb, | 3628 | .get_sb = ext4dev_get_sb, |
3499 | .kill_sb = kill_block_super, | 3629 | .kill_sb = kill_block_super, |
3500 | .fs_flags = FS_REQUIRES_DEV, | 3630 | .fs_flags = FS_REQUIRES_DEV, |
3501 | }; | 3631 | }; |
3632 | MODULE_ALIAS("ext4dev"); | ||
3633 | #endif | ||
3502 | 3634 | ||
3503 | static int __init init_ext4_fs(void) | 3635 | static int __init init_ext4_fs(void) |
3504 | { | 3636 | { |
3505 | int err; | 3637 | int err; |
3506 | 3638 | ||
3639 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | ||
3507 | err = init_ext4_mballoc(); | 3640 | err = init_ext4_mballoc(); |
3508 | if (err) | 3641 | if (err) |
3509 | return err; | 3642 | return err; |
@@ -3514,9 +3647,16 @@ static int __init init_ext4_fs(void) | |||
3514 | err = init_inodecache(); | 3647 | err = init_inodecache(); |
3515 | if (err) | 3648 | if (err) |
3516 | goto out1; | 3649 | goto out1; |
3517 | err = register_filesystem(&ext4dev_fs_type); | 3650 | err = register_filesystem(&ext4_fs_type); |
3518 | if (err) | 3651 | if (err) |
3519 | goto out; | 3652 | goto out; |
3653 | #ifdef CONFIG_EXT4DEV_COMPAT | ||
3654 | err = register_filesystem(&ext4dev_fs_type); | ||
3655 | if (err) { | ||
3656 | unregister_filesystem(&ext4_fs_type); | ||
3657 | goto out; | ||
3658 | } | ||
3659 | #endif | ||
3520 | return 0; | 3660 | return 0; |
3521 | out: | 3661 | out: |
3522 | destroy_inodecache(); | 3662 | destroy_inodecache(); |
@@ -3529,10 +3669,14 @@ out2: | |||
3529 | 3669 | ||
3530 | static void __exit exit_ext4_fs(void) | 3670 | static void __exit exit_ext4_fs(void) |
3531 | { | 3671 | { |
3672 | unregister_filesystem(&ext4_fs_type); | ||
3673 | #ifdef CONFIG_EXT4DEV_COMPAT | ||
3532 | unregister_filesystem(&ext4dev_fs_type); | 3674 | unregister_filesystem(&ext4dev_fs_type); |
3675 | #endif | ||
3533 | destroy_inodecache(); | 3676 | destroy_inodecache(); |
3534 | exit_ext4_xattr(); | 3677 | exit_ext4_xattr(); |
3535 | exit_ext4_mballoc(); | 3678 | exit_ext4_mballoc(); |
3679 | remove_proc_entry("fs/ext4", NULL); | ||
3536 | } | 3680 | } |
3537 | 3681 | ||
3538 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); | 3682 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); |
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index e9178643dc01..00740cb32be3 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c | |||
@@ -23,10 +23,10 @@ | |||
23 | #include "ext4.h" | 23 | #include "ext4.h" |
24 | #include "xattr.h" | 24 | #include "xattr.h" |
25 | 25 | ||
26 | static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) | 26 | static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) |
27 | { | 27 | { |
28 | struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); | 28 | struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); |
29 | nd_set_link(nd, (char*)ei->i_data); | 29 | nd_set_link(nd, (char *) ei->i_data); |
30 | return NULL; | 30 | return NULL; |
31 | } | 31 | } |
32 | 32 | ||
@@ -34,7 +34,7 @@ const struct inode_operations ext4_symlink_inode_operations = { | |||
34 | .readlink = generic_readlink, | 34 | .readlink = generic_readlink, |
35 | .follow_link = page_follow_link_light, | 35 | .follow_link = page_follow_link_light, |
36 | .put_link = page_put_link, | 36 | .put_link = page_put_link, |
37 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 37 | #ifdef CONFIG_EXT4_FS_XATTR |
38 | .setxattr = generic_setxattr, | 38 | .setxattr = generic_setxattr, |
39 | .getxattr = generic_getxattr, | 39 | .getxattr = generic_getxattr, |
40 | .listxattr = ext4_listxattr, | 40 | .listxattr = ext4_listxattr, |
@@ -45,7 +45,7 @@ const struct inode_operations ext4_symlink_inode_operations = { | |||
45 | const struct inode_operations ext4_fast_symlink_inode_operations = { | 45 | const struct inode_operations ext4_fast_symlink_inode_operations = { |
46 | .readlink = generic_readlink, | 46 | .readlink = generic_readlink, |
47 | .follow_link = ext4_follow_link, | 47 | .follow_link = ext4_follow_link, |
48 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 48 | #ifdef CONFIG_EXT4_FS_XATTR |
49 | .setxattr = generic_setxattr, | 49 | .setxattr = generic_setxattr, |
50 | .getxattr = generic_getxattr, | 50 | .getxattr = generic_getxattr, |
51 | .listxattr = ext4_listxattr, | 51 | .listxattr = ext4_listxattr, |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 8954208b4893..80626d516fee 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -99,12 +99,12 @@ static struct mb_cache *ext4_xattr_cache; | |||
99 | 99 | ||
100 | static struct xattr_handler *ext4_xattr_handler_map[] = { | 100 | static struct xattr_handler *ext4_xattr_handler_map[] = { |
101 | [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, | 101 | [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, |
102 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 102 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
103 | [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, | 103 | [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, |
104 | [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler, | 104 | [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler, |
105 | #endif | 105 | #endif |
106 | [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler, | 106 | [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler, |
107 | #ifdef CONFIG_EXT4DEV_FS_SECURITY | 107 | #ifdef CONFIG_EXT4_FS_SECURITY |
108 | [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, | 108 | [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, |
109 | #endif | 109 | #endif |
110 | }; | 110 | }; |
@@ -112,11 +112,11 @@ static struct xattr_handler *ext4_xattr_handler_map[] = { | |||
112 | struct xattr_handler *ext4_xattr_handlers[] = { | 112 | struct xattr_handler *ext4_xattr_handlers[] = { |
113 | &ext4_xattr_user_handler, | 113 | &ext4_xattr_user_handler, |
114 | &ext4_xattr_trusted_handler, | 114 | &ext4_xattr_trusted_handler, |
115 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 115 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
116 | &ext4_xattr_acl_access_handler, | 116 | &ext4_xattr_acl_access_handler, |
117 | &ext4_xattr_acl_default_handler, | 117 | &ext4_xattr_acl_default_handler, |
118 | #endif | 118 | #endif |
119 | #ifdef CONFIG_EXT4DEV_FS_SECURITY | 119 | #ifdef CONFIG_EXT4_FS_SECURITY |
120 | &ext4_xattr_security_handler, | 120 | &ext4_xattr_security_handler, |
121 | #endif | 121 | #endif |
122 | NULL | 122 | NULL |
@@ -959,6 +959,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
959 | struct ext4_xattr_block_find bs = { | 959 | struct ext4_xattr_block_find bs = { |
960 | .s = { .not_found = -ENODATA, }, | 960 | .s = { .not_found = -ENODATA, }, |
961 | }; | 961 | }; |
962 | unsigned long no_expand; | ||
962 | int error; | 963 | int error; |
963 | 964 | ||
964 | if (!name) | 965 | if (!name) |
@@ -966,6 +967,9 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
966 | if (strlen(name) > 255) | 967 | if (strlen(name) > 255) |
967 | return -ERANGE; | 968 | return -ERANGE; |
968 | down_write(&EXT4_I(inode)->xattr_sem); | 969 | down_write(&EXT4_I(inode)->xattr_sem); |
970 | no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND; | ||
971 | EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; | ||
972 | |||
969 | error = ext4_get_inode_loc(inode, &is.iloc); | 973 | error = ext4_get_inode_loc(inode, &is.iloc); |
970 | if (error) | 974 | if (error) |
971 | goto cleanup; | 975 | goto cleanup; |
@@ -1042,6 +1046,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
1042 | cleanup: | 1046 | cleanup: |
1043 | brelse(is.iloc.bh); | 1047 | brelse(is.iloc.bh); |
1044 | brelse(bs.bh); | 1048 | brelse(bs.bh); |
1049 | if (no_expand == 0) | ||
1050 | EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; | ||
1045 | up_write(&EXT4_I(inode)->xattr_sem); | 1051 | up_write(&EXT4_I(inode)->xattr_sem); |
1046 | return error; | 1052 | return error; |
1047 | } | 1053 | } |
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 5992fe979bb9..8ede88b18c29 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h | |||
@@ -51,8 +51,8 @@ struct ext4_xattr_entry { | |||
51 | (((name_len) + EXT4_XATTR_ROUND + \ | 51 | (((name_len) + EXT4_XATTR_ROUND + \ |
52 | sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND) | 52 | sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND) |
53 | #define EXT4_XATTR_NEXT(entry) \ | 53 | #define EXT4_XATTR_NEXT(entry) \ |
54 | ( (struct ext4_xattr_entry *)( \ | 54 | ((struct ext4_xattr_entry *)( \ |
55 | (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)) ) | 55 | (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len))) |
56 | #define EXT4_XATTR_SIZE(size) \ | 56 | #define EXT4_XATTR_SIZE(size) \ |
57 | (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) | 57 | (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) |
58 | 58 | ||
@@ -63,7 +63,7 @@ struct ext4_xattr_entry { | |||
63 | EXT4_I(inode)->i_extra_isize)) | 63 | EXT4_I(inode)->i_extra_isize)) |
64 | #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) | 64 | #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) |
65 | 65 | ||
66 | # ifdef CONFIG_EXT4DEV_FS_XATTR | 66 | # ifdef CONFIG_EXT4_FS_XATTR |
67 | 67 | ||
68 | extern struct xattr_handler ext4_xattr_user_handler; | 68 | extern struct xattr_handler ext4_xattr_user_handler; |
69 | extern struct xattr_handler ext4_xattr_trusted_handler; | 69 | extern struct xattr_handler ext4_xattr_trusted_handler; |
@@ -88,7 +88,7 @@ extern void exit_ext4_xattr(void); | |||
88 | 88 | ||
89 | extern struct xattr_handler *ext4_xattr_handlers[]; | 89 | extern struct xattr_handler *ext4_xattr_handlers[]; |
90 | 90 | ||
91 | # else /* CONFIG_EXT4DEV_FS_XATTR */ | 91 | # else /* CONFIG_EXT4_FS_XATTR */ |
92 | 92 | ||
93 | static inline int | 93 | static inline int |
94 | ext4_xattr_get(struct inode *inode, int name_index, const char *name, | 94 | ext4_xattr_get(struct inode *inode, int name_index, const char *name, |
@@ -141,9 +141,9 @@ ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, | |||
141 | 141 | ||
142 | #define ext4_xattr_handlers NULL | 142 | #define ext4_xattr_handlers NULL |
143 | 143 | ||
144 | # endif /* CONFIG_EXT4DEV_FS_XATTR */ | 144 | # endif /* CONFIG_EXT4_FS_XATTR */ |
145 | 145 | ||
146 | #ifdef CONFIG_EXT4DEV_FS_SECURITY | 146 | #ifdef CONFIG_EXT4_FS_SECURITY |
147 | extern int ext4_init_security(handle_t *handle, struct inode *inode, | 147 | extern int ext4_init_security(handle_t *handle, struct inode *inode, |
148 | struct inode *dir); | 148 | struct inode *dir); |
149 | #else | 149 | #else |
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 302e95c4af7e..fb98b3d847ed 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
7 | #include <linux/fs.h> | 7 | #include <linux/fs.h> |
8 | #include <linux/msdos_fs.h> | 8 | #include <linux/msdos_fs.h> |
9 | #include <linux/blkdev.h> | ||
9 | 10 | ||
10 | struct fatent_operations { | 11 | struct fatent_operations { |
11 | void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); | 12 | void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); |
@@ -535,6 +536,7 @@ int fat_free_clusters(struct inode *inode, int cluster) | |||
535 | struct fat_entry fatent; | 536 | struct fat_entry fatent; |
536 | struct buffer_head *bhs[MAX_BUF_PER_PAGE]; | 537 | struct buffer_head *bhs[MAX_BUF_PER_PAGE]; |
537 | int i, err, nr_bhs; | 538 | int i, err, nr_bhs; |
539 | int first_cl = cluster; | ||
538 | 540 | ||
539 | nr_bhs = 0; | 541 | nr_bhs = 0; |
540 | fatent_init(&fatent); | 542 | fatent_init(&fatent); |
@@ -551,6 +553,18 @@ int fat_free_clusters(struct inode *inode, int cluster) | |||
551 | goto error; | 553 | goto error; |
552 | } | 554 | } |
553 | 555 | ||
556 | /* | ||
557 | * Issue discard for the sectors we no longer care about, | ||
558 | * batching contiguous clusters into one request | ||
559 | */ | ||
560 | if (cluster != fatent.entry + 1) { | ||
561 | int nr_clus = fatent.entry - first_cl + 1; | ||
562 | |||
563 | sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), | ||
564 | nr_clus * sbi->sec_per_clus); | ||
565 | first_cl = cluster; | ||
566 | } | ||
567 | |||
554 | ops->ent_put(&fatent, FAT_ENT_FREE); | 568 | ops->ent_put(&fatent, FAT_ENT_FREE); |
555 | if (sbi->free_clusters != -1) { | 569 | if (sbi->free_clusters != -1) { |
556 | sbi->free_clusters++; | 570 | sbi->free_clusters++; |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 6d266d793e2c..d12cdf2a0406 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -562,26 +562,23 @@ static int fat_write_inode(struct inode *inode, int wait) | |||
562 | struct buffer_head *bh; | 562 | struct buffer_head *bh; |
563 | struct msdos_dir_entry *raw_entry; | 563 | struct msdos_dir_entry *raw_entry; |
564 | loff_t i_pos; | 564 | loff_t i_pos; |
565 | int err = 0; | 565 | int err; |
566 | 566 | ||
567 | retry: | 567 | retry: |
568 | i_pos = MSDOS_I(inode)->i_pos; | 568 | i_pos = MSDOS_I(inode)->i_pos; |
569 | if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) | 569 | if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) |
570 | return 0; | 570 | return 0; |
571 | 571 | ||
572 | lock_super(sb); | ||
573 | bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); | 572 | bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); |
574 | if (!bh) { | 573 | if (!bh) { |
575 | printk(KERN_ERR "FAT: unable to read inode block " | 574 | printk(KERN_ERR "FAT: unable to read inode block " |
576 | "for updating (i_pos %lld)\n", i_pos); | 575 | "for updating (i_pos %lld)\n", i_pos); |
577 | err = -EIO; | 576 | return -EIO; |
578 | goto out; | ||
579 | } | 577 | } |
580 | spin_lock(&sbi->inode_hash_lock); | 578 | spin_lock(&sbi->inode_hash_lock); |
581 | if (i_pos != MSDOS_I(inode)->i_pos) { | 579 | if (i_pos != MSDOS_I(inode)->i_pos) { |
582 | spin_unlock(&sbi->inode_hash_lock); | 580 | spin_unlock(&sbi->inode_hash_lock); |
583 | brelse(bh); | 581 | brelse(bh); |
584 | unlock_super(sb); | ||
585 | goto retry; | 582 | goto retry; |
586 | } | 583 | } |
587 | 584 | ||
@@ -607,11 +604,10 @@ retry: | |||
607 | } | 604 | } |
608 | spin_unlock(&sbi->inode_hash_lock); | 605 | spin_unlock(&sbi->inode_hash_lock); |
609 | mark_buffer_dirty(bh); | 606 | mark_buffer_dirty(bh); |
607 | err = 0; | ||
610 | if (wait) | 608 | if (wait) |
611 | err = sync_dirty_buffer(bh); | 609 | err = sync_dirty_buffer(bh); |
612 | brelse(bh); | 610 | brelse(bh); |
613 | out: | ||
614 | unlock_super(sb); | ||
615 | return err; | 611 | return err; |
616 | } | 612 | } |
617 | 613 | ||
@@ -859,7 +855,7 @@ enum { | |||
859 | Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err, | 855 | Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err, |
860 | }; | 856 | }; |
861 | 857 | ||
862 | static match_table_t fat_tokens = { | 858 | static const match_table_t fat_tokens = { |
863 | {Opt_check_r, "check=relaxed"}, | 859 | {Opt_check_r, "check=relaxed"}, |
864 | {Opt_check_s, "check=strict"}, | 860 | {Opt_check_s, "check=strict"}, |
865 | {Opt_check_n, "check=normal"}, | 861 | {Opt_check_n, "check=normal"}, |
@@ -894,14 +890,14 @@ static match_table_t fat_tokens = { | |||
894 | {Opt_tz_utc, "tz=UTC"}, | 890 | {Opt_tz_utc, "tz=UTC"}, |
895 | {Opt_err, NULL}, | 891 | {Opt_err, NULL}, |
896 | }; | 892 | }; |
897 | static match_table_t msdos_tokens = { | 893 | static const match_table_t msdos_tokens = { |
898 | {Opt_nodots, "nodots"}, | 894 | {Opt_nodots, "nodots"}, |
899 | {Opt_nodots, "dotsOK=no"}, | 895 | {Opt_nodots, "dotsOK=no"}, |
900 | {Opt_dots, "dots"}, | 896 | {Opt_dots, "dots"}, |
901 | {Opt_dots, "dotsOK=yes"}, | 897 | {Opt_dots, "dotsOK=yes"}, |
902 | {Opt_err, NULL} | 898 | {Opt_err, NULL} |
903 | }; | 899 | }; |
904 | static match_table_t vfat_tokens = { | 900 | static const match_table_t vfat_tokens = { |
905 | {Opt_charset, "iocharset=%s"}, | 901 | {Opt_charset, "iocharset=%s"}, |
906 | {Opt_shortname_lower, "shortname=lower"}, | 902 | {Opt_shortname_lower, "shortname=lower"}, |
907 | {Opt_shortname_win95, "shortname=win95"}, | 903 | {Opt_shortname_win95, "shortname=win95"}, |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index d2249f174e20..6a84388cacff 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -354,7 +354,7 @@ enum { | |||
354 | OPT_ERR | 354 | OPT_ERR |
355 | }; | 355 | }; |
356 | 356 | ||
357 | static match_table_t tokens = { | 357 | static const match_table_t tokens = { |
358 | {OPT_FD, "fd=%u"}, | 358 | {OPT_FD, "fd=%u"}, |
359 | {OPT_ROOTMODE, "rootmode=%o"}, | 359 | {OPT_ROOTMODE, "rootmode=%o"}, |
360 | {OPT_USER_ID, "user_id=%u"}, | 360 | {OPT_USER_ID, "user_id=%u"}, |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 13391e546616..c962283d4e7f 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -1265,6 +1265,8 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, | |||
1265 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; | 1265 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; |
1266 | if (time_before(now, holdtime)) | 1266 | if (time_before(now, holdtime)) |
1267 | delay = holdtime - now; | 1267 | delay = holdtime - now; |
1268 | if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) | ||
1269 | delay = gl->gl_ops->go_min_hold_time; | ||
1268 | 1270 | ||
1269 | spin_lock(&gl->gl_spin); | 1271 | spin_lock(&gl->gl_spin); |
1270 | handle_callback(gl, state, 1, delay); | 1272 | handle_callback(gl, state, 1, delay); |
@@ -1578,8 +1580,6 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags) | |||
1578 | *p++ = 'a'; | 1580 | *p++ = 'a'; |
1579 | if (flags & GL_EXACT) | 1581 | if (flags & GL_EXACT) |
1580 | *p++ = 'E'; | 1582 | *p++ = 'E'; |
1581 | if (flags & GL_ATIME) | ||
1582 | *p++ = 'a'; | ||
1583 | if (flags & GL_NOCACHE) | 1583 | if (flags & GL_NOCACHE) |
1584 | *p++ = 'c'; | 1584 | *p++ = 'c'; |
1585 | if (test_bit(HIF_HOLDER, &iflags)) | 1585 | if (test_bit(HIF_HOLDER, &iflags)) |
@@ -1816,15 +1816,17 @@ restart: | |||
1816 | if (gl) { | 1816 | if (gl) { |
1817 | gi->gl = hlist_entry(gl->gl_list.next, | 1817 | gi->gl = hlist_entry(gl->gl_list.next, |
1818 | struct gfs2_glock, gl_list); | 1818 | struct gfs2_glock, gl_list); |
1819 | if (gi->gl) | 1819 | } else { |
1820 | gfs2_glock_hold(gi->gl); | 1820 | gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, |
1821 | struct gfs2_glock, gl_list); | ||
1821 | } | 1822 | } |
1823 | if (gi->gl) | ||
1824 | gfs2_glock_hold(gi->gl); | ||
1822 | read_unlock(gl_lock_addr(gi->hash)); | 1825 | read_unlock(gl_lock_addr(gi->hash)); |
1823 | if (gl) | 1826 | if (gl) |
1824 | gfs2_glock_put(gl); | 1827 | gfs2_glock_put(gl); |
1825 | if (gl && gi->gl == NULL) | ||
1826 | gi->hash++; | ||
1827 | while (gi->gl == NULL) { | 1828 | while (gi->gl == NULL) { |
1829 | gi->hash++; | ||
1828 | if (gi->hash >= GFS2_GL_HASH_SIZE) | 1830 | if (gi->hash >= GFS2_GL_HASH_SIZE) |
1829 | return 1; | 1831 | return 1; |
1830 | read_lock(gl_lock_addr(gi->hash)); | 1832 | read_lock(gl_lock_addr(gi->hash)); |
@@ -1833,7 +1835,6 @@ restart: | |||
1833 | if (gi->gl) | 1835 | if (gi->gl) |
1834 | gfs2_glock_hold(gi->gl); | 1836 | gfs2_glock_hold(gi->gl); |
1835 | read_unlock(gl_lock_addr(gi->hash)); | 1837 | read_unlock(gl_lock_addr(gi->hash)); |
1836 | gi->hash++; | ||
1837 | } | 1838 | } |
1838 | 1839 | ||
1839 | if (gi->sdp != gi->gl->gl_sbd) | 1840 | if (gi->sdp != gi->gl->gl_sbd) |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 971d92af70fc..695c6b193611 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
@@ -24,7 +24,6 @@ | |||
24 | #define GL_ASYNC 0x00000040 | 24 | #define GL_ASYNC 0x00000040 |
25 | #define GL_EXACT 0x00000080 | 25 | #define GL_EXACT 0x00000080 |
26 | #define GL_SKIP 0x00000100 | 26 | #define GL_SKIP 0x00000100 |
27 | #define GL_ATIME 0x00000200 | ||
28 | #define GL_NOCACHE 0x00000400 | 27 | #define GL_NOCACHE 0x00000400 |
29 | 28 | ||
30 | #define GLR_TRYFAILED 13 | 29 | #define GLR_TRYFAILED 13 |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 448697a5c462..f566ec1b4e8e 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -386,20 +386,21 @@ struct gfs2_statfs_change_host { | |||
386 | #define GFS2_DATA_ORDERED 2 | 386 | #define GFS2_DATA_ORDERED 2 |
387 | 387 | ||
388 | struct gfs2_args { | 388 | struct gfs2_args { |
389 | char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ | 389 | char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ |
390 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ | 390 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ |
391 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ | 391 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ |
392 | int ar_spectator; /* Don't get a journal because we're always RO */ | 392 | unsigned int ar_spectator:1; /* Don't get a journal */ |
393 | int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */ | 393 | unsigned int ar_ignore_local_fs:1; /* Ignore optimisations */ |
394 | int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */ | 394 | unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */ |
395 | int ar_localcaching; /* Local-style caching (dangerous on multihost) */ | 395 | unsigned int ar_localcaching:1; /* Local caching */ |
396 | int ar_debug; /* Oops on errors instead of trying to be graceful */ | 396 | unsigned int ar_debug:1; /* Oops on errors */ |
397 | int ar_upgrade; /* Upgrade ondisk/multihost format */ | 397 | unsigned int ar_upgrade:1; /* Upgrade ondisk format */ |
398 | unsigned int ar_num_glockd; /* Number of glockd threads */ | 398 | unsigned int ar_posix_acl:1; /* Enable posix acls */ |
399 | int ar_posix_acl; /* Enable posix acls */ | 399 | unsigned int ar_quota:2; /* off/account/on */ |
400 | int ar_quota; /* off/account/on */ | 400 | unsigned int ar_suiddir:1; /* suiddir support */ |
401 | int ar_suiddir; /* suiddir support */ | 401 | unsigned int ar_data:2; /* ordered/writeback */ |
402 | int ar_data; /* ordered/writeback */ | 402 | unsigned int ar_meta:1; /* mount metafs */ |
403 | unsigned int ar_num_glockd; /* Number of glockd threads */ | ||
403 | }; | 404 | }; |
404 | 405 | ||
405 | struct gfs2_tune { | 406 | struct gfs2_tune { |
@@ -419,7 +420,6 @@ struct gfs2_tune { | |||
419 | unsigned int gt_quota_scale_den; /* Denominator */ | 420 | unsigned int gt_quota_scale_den; /* Denominator */ |
420 | unsigned int gt_quota_cache_secs; | 421 | unsigned int gt_quota_cache_secs; |
421 | unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ | 422 | unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ |
422 | unsigned int gt_atime_quantum; /* Min secs between atime updates */ | ||
423 | unsigned int gt_new_files_jdata; | 423 | unsigned int gt_new_files_jdata; |
424 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ | 424 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ |
425 | unsigned int gt_stall_secs; /* Detects trouble! */ | 425 | unsigned int gt_stall_secs; /* Detects trouble! */ |
@@ -432,7 +432,7 @@ enum { | |||
432 | SDF_JOURNAL_CHECKED = 0, | 432 | SDF_JOURNAL_CHECKED = 0, |
433 | SDF_JOURNAL_LIVE = 1, | 433 | SDF_JOURNAL_LIVE = 1, |
434 | SDF_SHUTDOWN = 2, | 434 | SDF_SHUTDOWN = 2, |
435 | SDF_NOATIME = 3, | 435 | SDF_NOBARRIERS = 3, |
436 | }; | 436 | }; |
437 | 437 | ||
438 | #define GFS2_FSNAME_LEN 256 | 438 | #define GFS2_FSNAME_LEN 256 |
@@ -461,7 +461,6 @@ struct gfs2_sb_host { | |||
461 | 461 | ||
462 | struct gfs2_sbd { | 462 | struct gfs2_sbd { |
463 | struct super_block *sd_vfs; | 463 | struct super_block *sd_vfs; |
464 | struct super_block *sd_vfs_meta; | ||
465 | struct kobject sd_kobj; | 464 | struct kobject sd_kobj; |
466 | unsigned long sd_flags; /* SDF_... */ | 465 | unsigned long sd_flags; /* SDF_... */ |
467 | struct gfs2_sb_host sd_sb; | 466 | struct gfs2_sb_host sd_sb; |
@@ -499,7 +498,9 @@ struct gfs2_sbd { | |||
499 | 498 | ||
500 | /* Inode Stuff */ | 499 | /* Inode Stuff */ |
501 | 500 | ||
502 | struct inode *sd_master_dir; | 501 | struct dentry *sd_master_dir; |
502 | struct dentry *sd_root_dir; | ||
503 | |||
503 | struct inode *sd_jindex; | 504 | struct inode *sd_jindex; |
504 | struct inode *sd_inum_inode; | 505 | struct inode *sd_inum_inode; |
505 | struct inode *sd_statfs_inode; | 506 | struct inode *sd_statfs_inode; |
@@ -634,7 +635,6 @@ struct gfs2_sbd { | |||
634 | /* Debugging crud */ | 635 | /* Debugging crud */ |
635 | 636 | ||
636 | unsigned long sd_last_warning; | 637 | unsigned long sd_last_warning; |
637 | struct vfsmount *sd_gfs2mnt; | ||
638 | struct dentry *debugfs_dir; /* debugfs directory */ | 638 | struct dentry *debugfs_dir; /* debugfs directory */ |
639 | struct dentry *debugfs_dentry_glocks; /* for debugfs */ | 639 | struct dentry *debugfs_dentry_glocks; /* for debugfs */ |
640 | }; | 640 | }; |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 8b0806a32948..7cee695fa441 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/crc32.h> | 18 | #include <linux/crc32.h> |
19 | #include <linux/lm_interface.h> | 19 | #include <linux/lm_interface.h> |
20 | #include <linux/security.h> | 20 | #include <linux/security.h> |
21 | #include <linux/time.h> | ||
21 | 22 | ||
22 | #include "gfs2.h" | 23 | #include "gfs2.h" |
23 | #include "incore.h" | 24 | #include "incore.h" |
@@ -249,6 +250,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
249 | { | 250 | { |
250 | struct gfs2_dinode_host *di = &ip->i_di; | 251 | struct gfs2_dinode_host *di = &ip->i_di; |
251 | const struct gfs2_dinode *str = buf; | 252 | const struct gfs2_dinode *str = buf; |
253 | struct timespec atime; | ||
252 | u16 height, depth; | 254 | u16 height, depth; |
253 | 255 | ||
254 | if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) | 256 | if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) |
@@ -275,8 +277,10 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
275 | di->di_size = be64_to_cpu(str->di_size); | 277 | di->di_size = be64_to_cpu(str->di_size); |
276 | i_size_write(&ip->i_inode, di->di_size); | 278 | i_size_write(&ip->i_inode, di->di_size); |
277 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); | 279 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); |
278 | ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); | 280 | atime.tv_sec = be64_to_cpu(str->di_atime); |
279 | ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); | 281 | atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); |
282 | if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) | ||
283 | ip->i_inode.i_atime = atime; | ||
280 | ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); | 284 | ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); |
281 | ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); | 285 | ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); |
282 | ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); | 286 | ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); |
@@ -1033,13 +1037,11 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, | |||
1033 | 1037 | ||
1034 | if (bh) | 1038 | if (bh) |
1035 | brelse(bh); | 1039 | brelse(bh); |
1036 | if (!inode) | ||
1037 | return ERR_PTR(-ENOMEM); | ||
1038 | return inode; | 1040 | return inode; |
1039 | 1041 | ||
1040 | fail_gunlock2: | 1042 | fail_gunlock2: |
1041 | gfs2_glock_dq_uninit(ghs + 1); | 1043 | gfs2_glock_dq_uninit(ghs + 1); |
1042 | if (inode) | 1044 | if (inode && !IS_ERR(inode)) |
1043 | iput(inode); | 1045 | iput(inode); |
1044 | fail_gunlock: | 1046 | fail_gunlock: |
1045 | gfs2_glock_dq(ghs); | 1047 | gfs2_glock_dq(ghs); |
@@ -1140,54 +1142,6 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | |||
1140 | return 0; | 1142 | return 0; |
1141 | } | 1143 | } |
1142 | 1144 | ||
1143 | /* | ||
1144 | * gfs2_ok_to_move - check if it's ok to move a directory to another directory | ||
1145 | * @this: move this | ||
1146 | * @to: to here | ||
1147 | * | ||
1148 | * Follow @to back to the root and make sure we don't encounter @this | ||
1149 | * Assumes we already hold the rename lock. | ||
1150 | * | ||
1151 | * Returns: errno | ||
1152 | */ | ||
1153 | |||
1154 | int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | ||
1155 | { | ||
1156 | struct inode *dir = &to->i_inode; | ||
1157 | struct super_block *sb = dir->i_sb; | ||
1158 | struct inode *tmp; | ||
1159 | struct qstr dotdot; | ||
1160 | int error = 0; | ||
1161 | |||
1162 | gfs2_str2qstr(&dotdot, ".."); | ||
1163 | |||
1164 | igrab(dir); | ||
1165 | |||
1166 | for (;;) { | ||
1167 | if (dir == &this->i_inode) { | ||
1168 | error = -EINVAL; | ||
1169 | break; | ||
1170 | } | ||
1171 | if (dir == sb->s_root->d_inode) { | ||
1172 | error = 0; | ||
1173 | break; | ||
1174 | } | ||
1175 | |||
1176 | tmp = gfs2_lookupi(dir, &dotdot, 1); | ||
1177 | if (IS_ERR(tmp)) { | ||
1178 | error = PTR_ERR(tmp); | ||
1179 | break; | ||
1180 | } | ||
1181 | |||
1182 | iput(dir); | ||
1183 | dir = tmp; | ||
1184 | } | ||
1185 | |||
1186 | iput(dir); | ||
1187 | |||
1188 | return error; | ||
1189 | } | ||
1190 | |||
1191 | /** | 1145 | /** |
1192 | * gfs2_readlinki - return the contents of a symlink | 1146 | * gfs2_readlinki - return the contents of a symlink |
1193 | * @ip: the symlink's inode | 1147 | * @ip: the symlink's inode |
@@ -1207,8 +1161,8 @@ int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) | |||
1207 | unsigned int x; | 1161 | unsigned int x; |
1208 | int error; | 1162 | int error; |
1209 | 1163 | ||
1210 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); | 1164 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); |
1211 | error = gfs2_glock_nq_atime(&i_gh); | 1165 | error = gfs2_glock_nq(&i_gh); |
1212 | if (error) { | 1166 | if (error) { |
1213 | gfs2_holder_uninit(&i_gh); | 1167 | gfs2_holder_uninit(&i_gh); |
1214 | return error; | 1168 | return error; |
@@ -1243,101 +1197,6 @@ out: | |||
1243 | return error; | 1197 | return error; |
1244 | } | 1198 | } |
1245 | 1199 | ||
1246 | /** | ||
1247 | * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and | ||
1248 | * conditionally update the inode's atime | ||
1249 | * @gh: the holder to acquire | ||
1250 | * | ||
1251 | * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap | ||
1252 | * Update if the difference between the current time and the inode's current | ||
1253 | * atime is greater than an interval specified at mount. | ||
1254 | * | ||
1255 | * Returns: errno | ||
1256 | */ | ||
1257 | |||
1258 | int gfs2_glock_nq_atime(struct gfs2_holder *gh) | ||
1259 | { | ||
1260 | struct gfs2_glock *gl = gh->gh_gl; | ||
1261 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
1262 | struct gfs2_inode *ip = gl->gl_object; | ||
1263 | s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum); | ||
1264 | unsigned int state; | ||
1265 | int flags; | ||
1266 | int error; | ||
1267 | struct timespec tv = CURRENT_TIME; | ||
1268 | |||
1269 | if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) || | ||
1270 | gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) || | ||
1271 | gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops)) | ||
1272 | return -EINVAL; | ||
1273 | |||
1274 | state = gh->gh_state; | ||
1275 | flags = gh->gh_flags; | ||
1276 | |||
1277 | error = gfs2_glock_nq(gh); | ||
1278 | if (error) | ||
1279 | return error; | ||
1280 | |||
1281 | if (test_bit(SDF_NOATIME, &sdp->sd_flags) || | ||
1282 | (sdp->sd_vfs->s_flags & MS_RDONLY)) | ||
1283 | return 0; | ||
1284 | |||
1285 | if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) { | ||
1286 | gfs2_glock_dq(gh); | ||
1287 | gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY, | ||
1288 | gh); | ||
1289 | error = gfs2_glock_nq(gh); | ||
1290 | if (error) | ||
1291 | return error; | ||
1292 | |||
1293 | /* Verify that atime hasn't been updated while we were | ||
1294 | trying to get exclusive lock. */ | ||
1295 | |||
1296 | tv = CURRENT_TIME; | ||
1297 | if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) { | ||
1298 | struct buffer_head *dibh; | ||
1299 | struct gfs2_dinode *di; | ||
1300 | |||
1301 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
1302 | if (error == -EROFS) | ||
1303 | return 0; | ||
1304 | if (error) | ||
1305 | goto fail; | ||
1306 | |||
1307 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
1308 | if (error) | ||
1309 | goto fail_end_trans; | ||
1310 | |||
1311 | ip->i_inode.i_atime = tv; | ||
1312 | |||
1313 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1314 | di = (struct gfs2_dinode *)dibh->b_data; | ||
1315 | di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); | ||
1316 | di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); | ||
1317 | brelse(dibh); | ||
1318 | |||
1319 | gfs2_trans_end(sdp); | ||
1320 | } | ||
1321 | |||
1322 | /* If someone else has asked for the glock, | ||
1323 | unlock and let them have it. Then reacquire | ||
1324 | in the original state. */ | ||
1325 | if (gfs2_glock_is_blocking(gl)) { | ||
1326 | gfs2_glock_dq(gh); | ||
1327 | gfs2_holder_reinit(state, flags, gh); | ||
1328 | return gfs2_glock_nq(gh); | ||
1329 | } | ||
1330 | } | ||
1331 | |||
1332 | return 0; | ||
1333 | |||
1334 | fail_end_trans: | ||
1335 | gfs2_trans_end(sdp); | ||
1336 | fail: | ||
1337 | gfs2_glock_dq(gh); | ||
1338 | return error; | ||
1339 | } | ||
1340 | |||
1341 | static int | 1200 | static int |
1342 | __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) | 1201 | __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) |
1343 | { | 1202 | { |
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 58f9607d6a86..2d43f69610a0 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
@@ -91,9 +91,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | |||
91 | int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | 91 | int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, |
92 | const struct gfs2_inode *ip); | 92 | const struct gfs2_inode *ip); |
93 | int gfs2_permission(struct inode *inode, int mask); | 93 | int gfs2_permission(struct inode *inode, int mask); |
94 | int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to); | ||
95 | int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); | 94 | int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); |
96 | int gfs2_glock_nq_atime(struct gfs2_holder *gh); | ||
97 | int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); | 95 | int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); |
98 | struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); | 96 | struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); |
99 | void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); | 97 | void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); |
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index 09d78c216f48..0c4cbe6c8285 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c | |||
@@ -144,7 +144,8 @@ static int gdlm_mount(char *table_name, char *host_data, | |||
144 | 144 | ||
145 | error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname), | 145 | error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname), |
146 | &ls->dlm_lockspace, | 146 | &ls->dlm_lockspace, |
147 | DLM_LSFL_FS | (nodir ? DLM_LSFL_NODIR : 0), | 147 | DLM_LSFL_FS | DLM_LSFL_NEWEXCL | |
148 | (nodir ? DLM_LSFL_NODIR : 0), | ||
148 | GDLM_LVB_SIZE); | 149 | GDLM_LVB_SIZE); |
149 | if (error) { | 150 | if (error) { |
150 | log_error("dlm_new_lockspace error %d", error); | 151 | log_error("dlm_new_lockspace error %d", error); |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 6c6af9f5e3ab..ad305854bdc6 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/delay.h> | 18 | #include <linux/delay.h> |
19 | #include <linux/kthread.h> | 19 | #include <linux/kthread.h> |
20 | #include <linux/freezer.h> | 20 | #include <linux/freezer.h> |
21 | #include <linux/bio.h> | ||
21 | 22 | ||
22 | #include "gfs2.h" | 23 | #include "gfs2.h" |
23 | #include "incore.h" | 24 | #include "incore.h" |
@@ -584,7 +585,6 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | |||
584 | memset(bh->b_data, 0, bh->b_size); | 585 | memset(bh->b_data, 0, bh->b_size); |
585 | set_buffer_uptodate(bh); | 586 | set_buffer_uptodate(bh); |
586 | clear_buffer_dirty(bh); | 587 | clear_buffer_dirty(bh); |
587 | unlock_buffer(bh); | ||
588 | 588 | ||
589 | gfs2_ail1_empty(sdp, 0); | 589 | gfs2_ail1_empty(sdp, 0); |
590 | tail = current_tail(sdp); | 590 | tail = current_tail(sdp); |
@@ -601,8 +601,23 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | |||
601 | hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); | 601 | hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); |
602 | lh->lh_hash = cpu_to_be32(hash); | 602 | lh->lh_hash = cpu_to_be32(hash); |
603 | 603 | ||
604 | set_buffer_dirty(bh); | 604 | bh->b_end_io = end_buffer_write_sync; |
605 | if (sync_dirty_buffer(bh)) | 605 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) |
606 | goto skip_barrier; | ||
607 | get_bh(bh); | ||
608 | submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh); | ||
609 | wait_on_buffer(bh); | ||
610 | if (buffer_eopnotsupp(bh)) { | ||
611 | clear_buffer_eopnotsupp(bh); | ||
612 | set_buffer_uptodate(bh); | ||
613 | set_bit(SDF_NOBARRIERS, &sdp->sd_flags); | ||
614 | lock_buffer(bh); | ||
615 | skip_barrier: | ||
616 | get_bh(bh); | ||
617 | submit_bh(WRITE_SYNC | (1 << BIO_RW_META), bh); | ||
618 | wait_on_buffer(bh); | ||
619 | } | ||
620 | if (!buffer_uptodate(bh)) | ||
606 | gfs2_io_error_bh(sdp, bh); | 621 | gfs2_io_error_bh(sdp, bh); |
607 | brelse(bh); | 622 | brelse(bh); |
608 | 623 | ||
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c index b941f9f9f958..f96eb90a2cfa 100644 --- a/fs/gfs2/mount.c +++ b/fs/gfs2/mount.c | |||
@@ -42,10 +42,11 @@ enum { | |||
42 | Opt_nosuiddir, | 42 | Opt_nosuiddir, |
43 | Opt_data_writeback, | 43 | Opt_data_writeback, |
44 | Opt_data_ordered, | 44 | Opt_data_ordered, |
45 | Opt_meta, | ||
45 | Opt_err, | 46 | Opt_err, |
46 | }; | 47 | }; |
47 | 48 | ||
48 | static match_table_t tokens = { | 49 | static const match_table_t tokens = { |
49 | {Opt_lockproto, "lockproto=%s"}, | 50 | {Opt_lockproto, "lockproto=%s"}, |
50 | {Opt_locktable, "locktable=%s"}, | 51 | {Opt_locktable, "locktable=%s"}, |
51 | {Opt_hostdata, "hostdata=%s"}, | 52 | {Opt_hostdata, "hostdata=%s"}, |
@@ -66,6 +67,7 @@ static match_table_t tokens = { | |||
66 | {Opt_nosuiddir, "nosuiddir"}, | 67 | {Opt_nosuiddir, "nosuiddir"}, |
67 | {Opt_data_writeback, "data=writeback"}, | 68 | {Opt_data_writeback, "data=writeback"}, |
68 | {Opt_data_ordered, "data=ordered"}, | 69 | {Opt_data_ordered, "data=ordered"}, |
70 | {Opt_meta, "meta"}, | ||
69 | {Opt_err, NULL} | 71 | {Opt_err, NULL} |
70 | }; | 72 | }; |
71 | 73 | ||
@@ -239,6 +241,11 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount) | |||
239 | case Opt_data_ordered: | 241 | case Opt_data_ordered: |
240 | args->ar_data = GFS2_DATA_ORDERED; | 242 | args->ar_data = GFS2_DATA_ORDERED; |
241 | break; | 243 | break; |
244 | case Opt_meta: | ||
245 | if (remount && args->ar_meta != 1) | ||
246 | goto cant_remount; | ||
247 | args->ar_meta = 1; | ||
248 | break; | ||
242 | case Opt_err: | 249 | case Opt_err: |
243 | default: | 250 | default: |
244 | fs_info(sdp, "unknown option: %s\n", o); | 251 | fs_info(sdp, "unknown option: %s\n", o); |
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index e64a1b04117a..27563816e1c5 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
@@ -512,8 +512,8 @@ static int gfs2_readpage(struct file *file, struct page *page) | |||
512 | int error; | 512 | int error; |
513 | 513 | ||
514 | unlock_page(page); | 514 | unlock_page(page); |
515 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 515 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); |
516 | error = gfs2_glock_nq_atime(&gh); | 516 | error = gfs2_glock_nq(&gh); |
517 | if (unlikely(error)) | 517 | if (unlikely(error)) |
518 | goto out; | 518 | goto out; |
519 | error = AOP_TRUNCATED_PAGE; | 519 | error = AOP_TRUNCATED_PAGE; |
@@ -594,8 +594,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, | |||
594 | struct gfs2_holder gh; | 594 | struct gfs2_holder gh; |
595 | int ret; | 595 | int ret; |
596 | 596 | ||
597 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 597 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); |
598 | ret = gfs2_glock_nq_atime(&gh); | 598 | ret = gfs2_glock_nq(&gh); |
599 | if (unlikely(ret)) | 599 | if (unlikely(ret)) |
600 | goto out_uninit; | 600 | goto out_uninit; |
601 | if (!gfs2_is_stuffed(ip)) | 601 | if (!gfs2_is_stuffed(ip)) |
@@ -636,8 +636,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
636 | unsigned to = from + len; | 636 | unsigned to = from + len; |
637 | struct page *page; | 637 | struct page *page; |
638 | 638 | ||
639 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh); | 639 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); |
640 | error = gfs2_glock_nq_atime(&ip->i_gh); | 640 | error = gfs2_glock_nq(&ip->i_gh); |
641 | if (unlikely(error)) | 641 | if (unlikely(error)) |
642 | goto out_uninit; | 642 | goto out_uninit; |
643 | 643 | ||
@@ -975,7 +975,7 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) | |||
975 | if (gfs2_is_stuffed(ip)) | 975 | if (gfs2_is_stuffed(ip)) |
976 | return 0; | 976 | return 0; |
977 | 977 | ||
978 | if (offset > i_size_read(&ip->i_inode)) | 978 | if (offset >= i_size_read(&ip->i_inode)) |
979 | return 0; | 979 | return 0; |
980 | return 1; | 980 | return 1; |
981 | } | 981 | } |
@@ -1000,8 +1000,8 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
1000 | * unfortunately have the option of only flushing a range like | 1000 | * unfortunately have the option of only flushing a range like |
1001 | * the VFS does. | 1001 | * the VFS does. |
1002 | */ | 1002 | */ |
1003 | gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh); | 1003 | gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); |
1004 | rv = gfs2_glock_nq_atime(&gh); | 1004 | rv = gfs2_glock_nq(&gh); |
1005 | if (rv) | 1005 | if (rv) |
1006 | return rv; | 1006 | return rv; |
1007 | rv = gfs2_ok_for_dio(ip, rw, offset); | 1007 | rv = gfs2_ok_for_dio(ip, rw, offset); |
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index e9a366d4411c..3a747f8e2188 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c | |||
@@ -89,8 +89,8 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
89 | u64 offset = file->f_pos; | 89 | u64 offset = file->f_pos; |
90 | int error; | 90 | int error; |
91 | 91 | ||
92 | gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); | 92 | gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); |
93 | error = gfs2_glock_nq_atime(&d_gh); | 93 | error = gfs2_glock_nq(&d_gh); |
94 | if (error) { | 94 | if (error) { |
95 | gfs2_holder_uninit(&d_gh); | 95 | gfs2_holder_uninit(&d_gh); |
96 | return error; | 96 | return error; |
@@ -153,8 +153,8 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr) | |||
153 | int error; | 153 | int error; |
154 | u32 fsflags; | 154 | u32 fsflags; |
155 | 155 | ||
156 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 156 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); |
157 | error = gfs2_glock_nq_atime(&gh); | 157 | error = gfs2_glock_nq(&gh); |
158 | if (error) | 158 | if (error) |
159 | return error; | 159 | return error; |
160 | 160 | ||
@@ -351,8 +351,8 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
351 | struct gfs2_alloc *al; | 351 | struct gfs2_alloc *al; |
352 | int ret; | 352 | int ret; |
353 | 353 | ||
354 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &gh); | 354 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); |
355 | ret = gfs2_glock_nq_atime(&gh); | 355 | ret = gfs2_glock_nq(&gh); |
356 | if (ret) | 356 | if (ret) |
357 | goto out; | 357 | goto out; |
358 | 358 | ||
@@ -434,8 +434,8 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
434 | struct gfs2_holder i_gh; | 434 | struct gfs2_holder i_gh; |
435 | int error; | 435 | int error; |
436 | 436 | ||
437 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); | 437 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); |
438 | error = gfs2_glock_nq_atime(&i_gh); | 438 | error = gfs2_glock_nq(&i_gh); |
439 | if (error) { | 439 | if (error) { |
440 | gfs2_holder_uninit(&i_gh); | 440 | gfs2_holder_uninit(&i_gh); |
441 | return error; | 441 | return error; |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b4d1d6490633..b117fcf2c4f5 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -40,6 +40,44 @@ | |||
40 | #define DO 0 | 40 | #define DO 0 |
41 | #define UNDO 1 | 41 | #define UNDO 1 |
42 | 42 | ||
43 | static const u32 gfs2_old_fs_formats[] = { | ||
44 | 0 | ||
45 | }; | ||
46 | |||
47 | static const u32 gfs2_old_multihost_formats[] = { | ||
48 | 0 | ||
49 | }; | ||
50 | |||
51 | /** | ||
52 | * gfs2_tune_init - Fill a gfs2_tune structure with default values | ||
53 | * @gt: tune | ||
54 | * | ||
55 | */ | ||
56 | |||
57 | static void gfs2_tune_init(struct gfs2_tune *gt) | ||
58 | { | ||
59 | spin_lock_init(>->gt_spin); | ||
60 | |||
61 | gt->gt_demote_secs = 300; | ||
62 | gt->gt_incore_log_blocks = 1024; | ||
63 | gt->gt_log_flush_secs = 60; | ||
64 | gt->gt_recoverd_secs = 60; | ||
65 | gt->gt_logd_secs = 1; | ||
66 | gt->gt_quotad_secs = 5; | ||
67 | gt->gt_quota_simul_sync = 64; | ||
68 | gt->gt_quota_warn_period = 10; | ||
69 | gt->gt_quota_scale_num = 1; | ||
70 | gt->gt_quota_scale_den = 1; | ||
71 | gt->gt_quota_cache_secs = 300; | ||
72 | gt->gt_quota_quantum = 60; | ||
73 | gt->gt_new_files_jdata = 0; | ||
74 | gt->gt_max_readahead = 1 << 18; | ||
75 | gt->gt_stall_secs = 600; | ||
76 | gt->gt_complain_secs = 10; | ||
77 | gt->gt_statfs_quantum = 30; | ||
78 | gt->gt_statfs_slow = 0; | ||
79 | } | ||
80 | |||
43 | static struct gfs2_sbd *init_sbd(struct super_block *sb) | 81 | static struct gfs2_sbd *init_sbd(struct super_block *sb) |
44 | { | 82 | { |
45 | struct gfs2_sbd *sdp; | 83 | struct gfs2_sbd *sdp; |
@@ -96,21 +134,271 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
96 | return sdp; | 134 | return sdp; |
97 | } | 135 | } |
98 | 136 | ||
99 | static void init_vfs(struct super_block *sb, unsigned noatime) | 137 | |
138 | /** | ||
139 | * gfs2_check_sb - Check superblock | ||
140 | * @sdp: the filesystem | ||
141 | * @sb: The superblock | ||
142 | * @silent: Don't print a message if the check fails | ||
143 | * | ||
144 | * Checks the version code of the FS is one that we understand how to | ||
145 | * read and that the sizes of the various on-disk structures have not | ||
146 | * changed. | ||
147 | */ | ||
148 | |||
149 | static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) | ||
100 | { | 150 | { |
101 | struct gfs2_sbd *sdp = sb->s_fs_info; | 151 | unsigned int x; |
102 | 152 | ||
103 | sb->s_magic = GFS2_MAGIC; | 153 | if (sb->sb_magic != GFS2_MAGIC || |
104 | sb->s_op = &gfs2_super_ops; | 154 | sb->sb_type != GFS2_METATYPE_SB) { |
105 | sb->s_export_op = &gfs2_export_ops; | 155 | if (!silent) |
106 | sb->s_time_gran = 1; | 156 | printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); |
107 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 157 | return -EINVAL; |
158 | } | ||
159 | |||
160 | /* If format numbers match exactly, we're done. */ | ||
161 | |||
162 | if (sb->sb_fs_format == GFS2_FORMAT_FS && | ||
163 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) | ||
164 | return 0; | ||
165 | |||
166 | if (sb->sb_fs_format != GFS2_FORMAT_FS) { | ||
167 | for (x = 0; gfs2_old_fs_formats[x]; x++) | ||
168 | if (gfs2_old_fs_formats[x] == sb->sb_fs_format) | ||
169 | break; | ||
170 | |||
171 | if (!gfs2_old_fs_formats[x]) { | ||
172 | printk(KERN_WARNING | ||
173 | "GFS2: code version (%u, %u) is incompatible " | ||
174 | "with ondisk format (%u, %u)\n", | ||
175 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
176 | sb->sb_fs_format, sb->sb_multihost_format); | ||
177 | printk(KERN_WARNING | ||
178 | "GFS2: I don't know how to upgrade this FS\n"); | ||
179 | return -EINVAL; | ||
180 | } | ||
181 | } | ||
182 | |||
183 | if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { | ||
184 | for (x = 0; gfs2_old_multihost_formats[x]; x++) | ||
185 | if (gfs2_old_multihost_formats[x] == | ||
186 | sb->sb_multihost_format) | ||
187 | break; | ||
188 | |||
189 | if (!gfs2_old_multihost_formats[x]) { | ||
190 | printk(KERN_WARNING | ||
191 | "GFS2: code version (%u, %u) is incompatible " | ||
192 | "with ondisk format (%u, %u)\n", | ||
193 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
194 | sb->sb_fs_format, sb->sb_multihost_format); | ||
195 | printk(KERN_WARNING | ||
196 | "GFS2: I don't know how to upgrade this FS\n"); | ||
197 | return -EINVAL; | ||
198 | } | ||
199 | } | ||
200 | |||
201 | if (!sdp->sd_args.ar_upgrade) { | ||
202 | printk(KERN_WARNING | ||
203 | "GFS2: code version (%u, %u) is incompatible " | ||
204 | "with ondisk format (%u, %u)\n", | ||
205 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
206 | sb->sb_fs_format, sb->sb_multihost_format); | ||
207 | printk(KERN_INFO | ||
208 | "GFS2: Use the \"upgrade\" mount option to upgrade " | ||
209 | "the FS\n"); | ||
210 | printk(KERN_INFO "GFS2: See the manual for more details\n"); | ||
211 | return -EINVAL; | ||
212 | } | ||
213 | |||
214 | return 0; | ||
215 | } | ||
216 | |||
217 | static void end_bio_io_page(struct bio *bio, int error) | ||
218 | { | ||
219 | struct page *page = bio->bi_private; | ||
108 | 220 | ||
109 | if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME)) | 221 | if (!error) |
110 | set_bit(noatime, &sdp->sd_flags); | 222 | SetPageUptodate(page); |
223 | else | ||
224 | printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); | ||
225 | unlock_page(page); | ||
226 | } | ||
227 | |||
228 | static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) | ||
229 | { | ||
230 | const struct gfs2_sb *str = buf; | ||
231 | |||
232 | sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); | ||
233 | sb->sb_type = be32_to_cpu(str->sb_header.mh_type); | ||
234 | sb->sb_format = be32_to_cpu(str->sb_header.mh_format); | ||
235 | sb->sb_fs_format = be32_to_cpu(str->sb_fs_format); | ||
236 | sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format); | ||
237 | sb->sb_bsize = be32_to_cpu(str->sb_bsize); | ||
238 | sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift); | ||
239 | sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr); | ||
240 | sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino); | ||
241 | sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr); | ||
242 | sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino); | ||
243 | |||
244 | memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); | ||
245 | memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); | ||
246 | } | ||
247 | |||
248 | /** | ||
249 | * gfs2_read_super - Read the gfs2 super block from disk | ||
250 | * @sdp: The GFS2 super block | ||
251 | * @sector: The location of the super block | ||
252 | * @error: The error code to return | ||
253 | * | ||
254 | * This uses the bio functions to read the super block from disk | ||
255 | * because we want to be 100% sure that we never read cached data. | ||
256 | * A super block is read twice only during each GFS2 mount and is | ||
257 | * never written to by the filesystem. The first time its read no | ||
258 | * locks are held, and the only details which are looked at are those | ||
259 | * relating to the locking protocol. Once locking is up and working, | ||
260 | * the sb is read again under the lock to establish the location of | ||
261 | * the master directory (contains pointers to journals etc) and the | ||
262 | * root directory. | ||
263 | * | ||
264 | * Returns: 0 on success or error | ||
265 | */ | ||
266 | |||
267 | static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) | ||
268 | { | ||
269 | struct super_block *sb = sdp->sd_vfs; | ||
270 | struct gfs2_sb *p; | ||
271 | struct page *page; | ||
272 | struct bio *bio; | ||
273 | |||
274 | page = alloc_page(GFP_NOFS); | ||
275 | if (unlikely(!page)) | ||
276 | return -ENOBUFS; | ||
277 | |||
278 | ClearPageUptodate(page); | ||
279 | ClearPageDirty(page); | ||
280 | lock_page(page); | ||
281 | |||
282 | bio = bio_alloc(GFP_NOFS, 1); | ||
283 | if (unlikely(!bio)) { | ||
284 | __free_page(page); | ||
285 | return -ENOBUFS; | ||
286 | } | ||
111 | 287 | ||
112 | /* Don't let the VFS update atimes. GFS2 handles this itself. */ | 288 | bio->bi_sector = sector * (sb->s_blocksize >> 9); |
113 | sb->s_flags |= MS_NOATIME | MS_NODIRATIME; | 289 | bio->bi_bdev = sb->s_bdev; |
290 | bio_add_page(bio, page, PAGE_SIZE, 0); | ||
291 | |||
292 | bio->bi_end_io = end_bio_io_page; | ||
293 | bio->bi_private = page; | ||
294 | submit_bio(READ_SYNC | (1 << BIO_RW_META), bio); | ||
295 | wait_on_page_locked(page); | ||
296 | bio_put(bio); | ||
297 | if (!PageUptodate(page)) { | ||
298 | __free_page(page); | ||
299 | return -EIO; | ||
300 | } | ||
301 | p = kmap(page); | ||
302 | gfs2_sb_in(&sdp->sd_sb, p); | ||
303 | kunmap(page); | ||
304 | __free_page(page); | ||
305 | return 0; | ||
306 | } | ||
307 | /** | ||
308 | * gfs2_read_sb - Read super block | ||
309 | * @sdp: The GFS2 superblock | ||
310 | * @gl: the glock for the superblock (assumed to be held) | ||
311 | * @silent: Don't print message if mount fails | ||
312 | * | ||
313 | */ | ||
314 | |||
315 | static int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) | ||
316 | { | ||
317 | u32 hash_blocks, ind_blocks, leaf_blocks; | ||
318 | u32 tmp_blocks; | ||
319 | unsigned int x; | ||
320 | int error; | ||
321 | |||
322 | error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); | ||
323 | if (error) { | ||
324 | if (!silent) | ||
325 | fs_err(sdp, "can't read superblock\n"); | ||
326 | return error; | ||
327 | } | ||
328 | |||
329 | error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); | ||
330 | if (error) | ||
331 | return error; | ||
332 | |||
333 | sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - | ||
334 | GFS2_BASIC_BLOCK_SHIFT; | ||
335 | sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; | ||
336 | sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - | ||
337 | sizeof(struct gfs2_dinode)) / sizeof(u64); | ||
338 | sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - | ||
339 | sizeof(struct gfs2_meta_header)) / sizeof(u64); | ||
340 | sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); | ||
341 | sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2; | ||
342 | sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; | ||
343 | sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64); | ||
344 | sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - | ||
345 | sizeof(struct gfs2_meta_header)) / | ||
346 | sizeof(struct gfs2_quota_change); | ||
347 | |||
348 | /* Compute maximum reservation required to add a entry to a directory */ | ||
349 | |||
350 | hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH), | ||
351 | sdp->sd_jbsize); | ||
352 | |||
353 | ind_blocks = 0; | ||
354 | for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { | ||
355 | tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs); | ||
356 | ind_blocks += tmp_blocks; | ||
357 | } | ||
358 | |||
359 | leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH; | ||
360 | |||
361 | sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; | ||
362 | |||
363 | sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - | ||
364 | sizeof(struct gfs2_dinode); | ||
365 | sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; | ||
366 | for (x = 2;; x++) { | ||
367 | u64 space, d; | ||
368 | u32 m; | ||
369 | |||
370 | space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; | ||
371 | d = space; | ||
372 | m = do_div(d, sdp->sd_inptrs); | ||
373 | |||
374 | if (d != sdp->sd_heightsize[x - 1] || m) | ||
375 | break; | ||
376 | sdp->sd_heightsize[x] = space; | ||
377 | } | ||
378 | sdp->sd_max_height = x; | ||
379 | sdp->sd_heightsize[x] = ~0; | ||
380 | gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); | ||
381 | |||
382 | sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - | ||
383 | sizeof(struct gfs2_dinode); | ||
384 | sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; | ||
385 | for (x = 2;; x++) { | ||
386 | u64 space, d; | ||
387 | u32 m; | ||
388 | |||
389 | space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; | ||
390 | d = space; | ||
391 | m = do_div(d, sdp->sd_inptrs); | ||
392 | |||
393 | if (d != sdp->sd_jheightsize[x - 1] || m) | ||
394 | break; | ||
395 | sdp->sd_jheightsize[x] = space; | ||
396 | } | ||
397 | sdp->sd_max_jheight = x; | ||
398 | sdp->sd_jheightsize[x] = ~0; | ||
399 | gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); | ||
400 | |||
401 | return 0; | ||
114 | } | 402 | } |
115 | 403 | ||
116 | static int init_names(struct gfs2_sbd *sdp, int silent) | 404 | static int init_names(struct gfs2_sbd *sdp, int silent) |
@@ -224,51 +512,59 @@ fail: | |||
224 | return error; | 512 | return error; |
225 | } | 513 | } |
226 | 514 | ||
227 | static inline struct inode *gfs2_lookup_root(struct super_block *sb, | 515 | static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, |
228 | u64 no_addr) | 516 | u64 no_addr, const char *name) |
229 | { | 517 | { |
230 | return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); | 518 | struct gfs2_sbd *sdp = sb->s_fs_info; |
519 | struct dentry *dentry; | ||
520 | struct inode *inode; | ||
521 | |||
522 | inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); | ||
523 | if (IS_ERR(inode)) { | ||
524 | fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); | ||
525 | return PTR_ERR(inode); | ||
526 | } | ||
527 | dentry = d_alloc_root(inode); | ||
528 | if (!dentry) { | ||
529 | fs_err(sdp, "can't alloc %s dentry\n", name); | ||
530 | iput(inode); | ||
531 | return -ENOMEM; | ||
532 | } | ||
533 | dentry->d_op = &gfs2_dops; | ||
534 | *dptr = dentry; | ||
535 | return 0; | ||
231 | } | 536 | } |
232 | 537 | ||
233 | static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) | 538 | static int init_sb(struct gfs2_sbd *sdp, int silent) |
234 | { | 539 | { |
235 | struct super_block *sb = sdp->sd_vfs; | 540 | struct super_block *sb = sdp->sd_vfs; |
236 | struct gfs2_holder sb_gh; | 541 | struct gfs2_holder sb_gh; |
237 | u64 no_addr; | 542 | u64 no_addr; |
238 | struct inode *inode; | 543 | int ret; |
239 | int error = 0; | ||
240 | 544 | ||
241 | if (undo) { | 545 | ret = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops, |
242 | if (sb->s_root) { | 546 | LM_ST_SHARED, 0, &sb_gh); |
243 | dput(sb->s_root); | 547 | if (ret) { |
244 | sb->s_root = NULL; | 548 | fs_err(sdp, "can't acquire superblock glock: %d\n", ret); |
245 | } | 549 | return ret; |
246 | return 0; | ||
247 | } | 550 | } |
248 | 551 | ||
249 | error = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops, | 552 | ret = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); |
250 | LM_ST_SHARED, 0, &sb_gh); | 553 | if (ret) { |
251 | if (error) { | 554 | fs_err(sdp, "can't read superblock: %d\n", ret); |
252 | fs_err(sdp, "can't acquire superblock glock: %d\n", error); | ||
253 | return error; | ||
254 | } | ||
255 | |||
256 | error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); | ||
257 | if (error) { | ||
258 | fs_err(sdp, "can't read superblock: %d\n", error); | ||
259 | goto out; | 555 | goto out; |
260 | } | 556 | } |
261 | 557 | ||
262 | /* Set up the buffer cache and SB for real */ | 558 | /* Set up the buffer cache and SB for real */ |
263 | if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { | 559 | if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { |
264 | error = -EINVAL; | 560 | ret = -EINVAL; |
265 | fs_err(sdp, "FS block size (%u) is too small for device " | 561 | fs_err(sdp, "FS block size (%u) is too small for device " |
266 | "block size (%u)\n", | 562 | "block size (%u)\n", |
267 | sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); | 563 | sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); |
268 | goto out; | 564 | goto out; |
269 | } | 565 | } |
270 | if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { | 566 | if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { |
271 | error = -EINVAL; | 567 | ret = -EINVAL; |
272 | fs_err(sdp, "FS block size (%u) is too big for machine " | 568 | fs_err(sdp, "FS block size (%u) is too big for machine " |
273 | "page size (%u)\n", | 569 | "page size (%u)\n", |
274 | sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE); | 570 | sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE); |
@@ -278,26 +574,21 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) | |||
278 | 574 | ||
279 | /* Get the root inode */ | 575 | /* Get the root inode */ |
280 | no_addr = sdp->sd_sb.sb_root_dir.no_addr; | 576 | no_addr = sdp->sd_sb.sb_root_dir.no_addr; |
281 | if (sb->s_type == &gfs2meta_fs_type) | 577 | ret = gfs2_lookup_root(sb, &sdp->sd_root_dir, no_addr, "root"); |
282 | no_addr = sdp->sd_sb.sb_master_dir.no_addr; | 578 | if (ret) |
283 | inode = gfs2_lookup_root(sb, no_addr); | ||
284 | if (IS_ERR(inode)) { | ||
285 | error = PTR_ERR(inode); | ||
286 | fs_err(sdp, "can't read in root inode: %d\n", error); | ||
287 | goto out; | 579 | goto out; |
288 | } | ||
289 | 580 | ||
290 | sb->s_root = d_alloc_root(inode); | 581 | /* Get the master inode */ |
291 | if (!sb->s_root) { | 582 | no_addr = sdp->sd_sb.sb_master_dir.no_addr; |
292 | fs_err(sdp, "can't get root dentry\n"); | 583 | ret = gfs2_lookup_root(sb, &sdp->sd_master_dir, no_addr, "master"); |
293 | error = -ENOMEM; | 584 | if (ret) { |
294 | iput(inode); | 585 | dput(sdp->sd_root_dir); |
295 | } else | 586 | goto out; |
296 | sb->s_root->d_op = &gfs2_dops; | 587 | } |
297 | 588 | sb->s_root = dget(sdp->sd_args.ar_meta ? sdp->sd_master_dir : sdp->sd_root_dir); | |
298 | out: | 589 | out: |
299 | gfs2_glock_dq_uninit(&sb_gh); | 590 | gfs2_glock_dq_uninit(&sb_gh); |
300 | return error; | 591 | return ret; |
301 | } | 592 | } |
302 | 593 | ||
303 | /** | 594 | /** |
@@ -372,6 +663,7 @@ static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) | |||
372 | 663 | ||
373 | static int init_journal(struct gfs2_sbd *sdp, int undo) | 664 | static int init_journal(struct gfs2_sbd *sdp, int undo) |
374 | { | 665 | { |
666 | struct inode *master = sdp->sd_master_dir->d_inode; | ||
375 | struct gfs2_holder ji_gh; | 667 | struct gfs2_holder ji_gh; |
376 | struct task_struct *p; | 668 | struct task_struct *p; |
377 | struct gfs2_inode *ip; | 669 | struct gfs2_inode *ip; |
@@ -383,7 +675,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) | |||
383 | goto fail_recoverd; | 675 | goto fail_recoverd; |
384 | } | 676 | } |
385 | 677 | ||
386 | sdp->sd_jindex = gfs2_lookup_simple(sdp->sd_master_dir, "jindex"); | 678 | sdp->sd_jindex = gfs2_lookup_simple(master, "jindex"); |
387 | if (IS_ERR(sdp->sd_jindex)) { | 679 | if (IS_ERR(sdp->sd_jindex)) { |
388 | fs_err(sdp, "can't lookup journal index: %d\n", error); | 680 | fs_err(sdp, "can't lookup journal index: %d\n", error); |
389 | return PTR_ERR(sdp->sd_jindex); | 681 | return PTR_ERR(sdp->sd_jindex); |
@@ -506,25 +798,17 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
506 | { | 798 | { |
507 | int error = 0; | 799 | int error = 0; |
508 | struct gfs2_inode *ip; | 800 | struct gfs2_inode *ip; |
509 | struct inode *inode; | 801 | struct inode *master = sdp->sd_master_dir->d_inode; |
510 | 802 | ||
511 | if (undo) | 803 | if (undo) |
512 | goto fail_qinode; | 804 | goto fail_qinode; |
513 | 805 | ||
514 | inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr); | ||
515 | if (IS_ERR(inode)) { | ||
516 | error = PTR_ERR(inode); | ||
517 | fs_err(sdp, "can't read in master directory: %d\n", error); | ||
518 | goto fail; | ||
519 | } | ||
520 | sdp->sd_master_dir = inode; | ||
521 | |||
522 | error = init_journal(sdp, undo); | 806 | error = init_journal(sdp, undo); |
523 | if (error) | 807 | if (error) |
524 | goto fail_master; | 808 | goto fail; |
525 | 809 | ||
526 | /* Read in the master inode number inode */ | 810 | /* Read in the master inode number inode */ |
527 | sdp->sd_inum_inode = gfs2_lookup_simple(sdp->sd_master_dir, "inum"); | 811 | sdp->sd_inum_inode = gfs2_lookup_simple(master, "inum"); |
528 | if (IS_ERR(sdp->sd_inum_inode)) { | 812 | if (IS_ERR(sdp->sd_inum_inode)) { |
529 | error = PTR_ERR(sdp->sd_inum_inode); | 813 | error = PTR_ERR(sdp->sd_inum_inode); |
530 | fs_err(sdp, "can't read in inum inode: %d\n", error); | 814 | fs_err(sdp, "can't read in inum inode: %d\n", error); |
@@ -533,7 +817,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
533 | 817 | ||
534 | 818 | ||
535 | /* Read in the master statfs inode */ | 819 | /* Read in the master statfs inode */ |
536 | sdp->sd_statfs_inode = gfs2_lookup_simple(sdp->sd_master_dir, "statfs"); | 820 | sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs"); |
537 | if (IS_ERR(sdp->sd_statfs_inode)) { | 821 | if (IS_ERR(sdp->sd_statfs_inode)) { |
538 | error = PTR_ERR(sdp->sd_statfs_inode); | 822 | error = PTR_ERR(sdp->sd_statfs_inode); |
539 | fs_err(sdp, "can't read in statfs inode: %d\n", error); | 823 | fs_err(sdp, "can't read in statfs inode: %d\n", error); |
@@ -541,7 +825,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
541 | } | 825 | } |
542 | 826 | ||
543 | /* Read in the resource index inode */ | 827 | /* Read in the resource index inode */ |
544 | sdp->sd_rindex = gfs2_lookup_simple(sdp->sd_master_dir, "rindex"); | 828 | sdp->sd_rindex = gfs2_lookup_simple(master, "rindex"); |
545 | if (IS_ERR(sdp->sd_rindex)) { | 829 | if (IS_ERR(sdp->sd_rindex)) { |
546 | error = PTR_ERR(sdp->sd_rindex); | 830 | error = PTR_ERR(sdp->sd_rindex); |
547 | fs_err(sdp, "can't get resource index inode: %d\n", error); | 831 | fs_err(sdp, "can't get resource index inode: %d\n", error); |
@@ -552,7 +836,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
552 | sdp->sd_rindex_uptodate = 0; | 836 | sdp->sd_rindex_uptodate = 0; |
553 | 837 | ||
554 | /* Read in the quota inode */ | 838 | /* Read in the quota inode */ |
555 | sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota"); | 839 | sdp->sd_quota_inode = gfs2_lookup_simple(master, "quota"); |
556 | if (IS_ERR(sdp->sd_quota_inode)) { | 840 | if (IS_ERR(sdp->sd_quota_inode)) { |
557 | error = PTR_ERR(sdp->sd_quota_inode); | 841 | error = PTR_ERR(sdp->sd_quota_inode); |
558 | fs_err(sdp, "can't get quota file inode: %d\n", error); | 842 | fs_err(sdp, "can't get quota file inode: %d\n", error); |
@@ -571,8 +855,6 @@ fail_inum: | |||
571 | iput(sdp->sd_inum_inode); | 855 | iput(sdp->sd_inum_inode); |
572 | fail_journal: | 856 | fail_journal: |
573 | init_journal(sdp, UNDO); | 857 | init_journal(sdp, UNDO); |
574 | fail_master: | ||
575 | iput(sdp->sd_master_dir); | ||
576 | fail: | 858 | fail: |
577 | return error; | 859 | return error; |
578 | } | 860 | } |
@@ -583,6 +865,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) | |||
583 | char buf[30]; | 865 | char buf[30]; |
584 | int error = 0; | 866 | int error = 0; |
585 | struct gfs2_inode *ip; | 867 | struct gfs2_inode *ip; |
868 | struct inode *master = sdp->sd_master_dir->d_inode; | ||
586 | 869 | ||
587 | if (sdp->sd_args.ar_spectator) | 870 | if (sdp->sd_args.ar_spectator) |
588 | return 0; | 871 | return 0; |
@@ -590,7 +873,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) | |||
590 | if (undo) | 873 | if (undo) |
591 | goto fail_qc_gh; | 874 | goto fail_qc_gh; |
592 | 875 | ||
593 | pn = gfs2_lookup_simple(sdp->sd_master_dir, "per_node"); | 876 | pn = gfs2_lookup_simple(master, "per_node"); |
594 | if (IS_ERR(pn)) { | 877 | if (IS_ERR(pn)) { |
595 | error = PTR_ERR(pn); | 878 | error = PTR_ERR(pn); |
596 | fs_err(sdp, "can't find per_node directory: %d\n", error); | 879 | fs_err(sdp, "can't find per_node directory: %d\n", error); |
@@ -800,7 +1083,11 @@ static int fill_super(struct super_block *sb, void *data, int silent) | |||
800 | goto fail; | 1083 | goto fail; |
801 | } | 1084 | } |
802 | 1085 | ||
803 | init_vfs(sb, SDF_NOATIME); | 1086 | sb->s_magic = GFS2_MAGIC; |
1087 | sb->s_op = &gfs2_super_ops; | ||
1088 | sb->s_export_op = &gfs2_export_ops; | ||
1089 | sb->s_time_gran = 1; | ||
1090 | sb->s_maxbytes = MAX_LFS_FILESIZE; | ||
804 | 1091 | ||
805 | /* Set up the buffer cache and fill in some fake block size values | 1092 | /* Set up the buffer cache and fill in some fake block size values |
806 | to allow us to read-in the on-disk superblock. */ | 1093 | to allow us to read-in the on-disk superblock. */ |
@@ -828,7 +1115,7 @@ static int fill_super(struct super_block *sb, void *data, int silent) | |||
828 | if (error) | 1115 | if (error) |
829 | goto fail_lm; | 1116 | goto fail_lm; |
830 | 1117 | ||
831 | error = init_sb(sdp, silent, DO); | 1118 | error = init_sb(sdp, silent); |
832 | if (error) | 1119 | if (error) |
833 | goto fail_locking; | 1120 | goto fail_locking; |
834 | 1121 | ||
@@ -869,7 +1156,11 @@ fail_per_node: | |||
869 | fail_inodes: | 1156 | fail_inodes: |
870 | init_inodes(sdp, UNDO); | 1157 | init_inodes(sdp, UNDO); |
871 | fail_sb: | 1158 | fail_sb: |
872 | init_sb(sdp, 0, UNDO); | 1159 | if (sdp->sd_root_dir) |
1160 | dput(sdp->sd_root_dir); | ||
1161 | if (sdp->sd_master_dir) | ||
1162 | dput(sdp->sd_master_dir); | ||
1163 | sb->s_root = NULL; | ||
873 | fail_locking: | 1164 | fail_locking: |
874 | init_locking(sdp, &mount_gh, UNDO); | 1165 | init_locking(sdp, &mount_gh, UNDO); |
875 | fail_lm: | 1166 | fail_lm: |
@@ -887,151 +1178,63 @@ fail: | |||
887 | } | 1178 | } |
888 | 1179 | ||
889 | static int gfs2_get_sb(struct file_system_type *fs_type, int flags, | 1180 | static int gfs2_get_sb(struct file_system_type *fs_type, int flags, |
890 | const char *dev_name, void *data, struct vfsmount *mnt) | 1181 | const char *dev_name, void *data, struct vfsmount *mnt) |
891 | { | 1182 | { |
892 | struct super_block *sb; | 1183 | return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); |
893 | struct gfs2_sbd *sdp; | ||
894 | int error = get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); | ||
895 | if (error) | ||
896 | goto out; | ||
897 | sb = mnt->mnt_sb; | ||
898 | sdp = sb->s_fs_info; | ||
899 | sdp->sd_gfs2mnt = mnt; | ||
900 | out: | ||
901 | return error; | ||
902 | } | 1184 | } |
903 | 1185 | ||
904 | static int fill_super_meta(struct super_block *sb, struct super_block *new, | 1186 | static struct super_block *get_gfs2_sb(const char *dev_name) |
905 | void *data, int silent) | ||
906 | { | 1187 | { |
907 | struct gfs2_sbd *sdp = sb->s_fs_info; | 1188 | struct super_block *sb; |
908 | struct inode *inode; | ||
909 | int error = 0; | ||
910 | |||
911 | new->s_fs_info = sdp; | ||
912 | sdp->sd_vfs_meta = sb; | ||
913 | |||
914 | init_vfs(new, SDF_NOATIME); | ||
915 | |||
916 | /* Get the master inode */ | ||
917 | inode = igrab(sdp->sd_master_dir); | ||
918 | |||
919 | new->s_root = d_alloc_root(inode); | ||
920 | if (!new->s_root) { | ||
921 | fs_err(sdp, "can't get root dentry\n"); | ||
922 | error = -ENOMEM; | ||
923 | iput(inode); | ||
924 | } else | ||
925 | new->s_root->d_op = &gfs2_dops; | ||
926 | |||
927 | return error; | ||
928 | } | ||
929 | |||
930 | static int set_bdev_super(struct super_block *s, void *data) | ||
931 | { | ||
932 | s->s_bdev = data; | ||
933 | s->s_dev = s->s_bdev->bd_dev; | ||
934 | return 0; | ||
935 | } | ||
936 | |||
937 | static int test_bdev_super(struct super_block *s, void *data) | ||
938 | { | ||
939 | return s->s_bdev == data; | ||
940 | } | ||
941 | |||
942 | static struct super_block* get_gfs2_sb(const char *dev_name) | ||
943 | { | ||
944 | struct kstat stat; | ||
945 | struct nameidata nd; | 1189 | struct nameidata nd; |
946 | struct super_block *sb = NULL, *s; | ||
947 | int error; | 1190 | int error; |
948 | 1191 | ||
949 | error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); | 1192 | error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); |
950 | if (error) { | 1193 | if (error) { |
951 | printk(KERN_WARNING "GFS2: path_lookup on %s returned error\n", | 1194 | printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", |
952 | dev_name); | 1195 | dev_name, error); |
953 | goto out; | 1196 | return NULL; |
954 | } | ||
955 | error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat); | ||
956 | |||
957 | list_for_each_entry(s, &gfs2_fs_type.fs_supers, s_instances) { | ||
958 | if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) || | ||
959 | (S_ISDIR(stat.mode) && | ||
960 | s == nd.path.dentry->d_inode->i_sb)) { | ||
961 | sb = s; | ||
962 | goto free_nd; | ||
963 | } | ||
964 | } | 1197 | } |
965 | 1198 | sb = nd.path.dentry->d_inode->i_sb; | |
966 | printk(KERN_WARNING "GFS2: Unrecognized block device or " | 1199 | if (sb && (sb->s_type == &gfs2_fs_type)) |
967 | "mount point %s\n", dev_name); | 1200 | atomic_inc(&sb->s_active); |
968 | 1201 | else | |
969 | free_nd: | 1202 | sb = NULL; |
970 | path_put(&nd.path); | 1203 | path_put(&nd.path); |
971 | out: | ||
972 | return sb; | 1204 | return sb; |
973 | } | 1205 | } |
974 | 1206 | ||
975 | static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, | 1207 | static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, |
976 | const char *dev_name, void *data, struct vfsmount *mnt) | 1208 | const char *dev_name, void *data, struct vfsmount *mnt) |
977 | { | 1209 | { |
978 | int error = 0; | 1210 | struct super_block *sb = NULL; |
979 | struct super_block *sb = NULL, *new; | ||
980 | struct gfs2_sbd *sdp; | 1211 | struct gfs2_sbd *sdp; |
981 | 1212 | ||
982 | sb = get_gfs2_sb(dev_name); | 1213 | sb = get_gfs2_sb(dev_name); |
983 | if (!sb) { | 1214 | if (!sb) { |
984 | printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); | 1215 | printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); |
985 | error = -ENOENT; | 1216 | return -ENOENT; |
986 | goto error; | ||
987 | } | 1217 | } |
988 | sdp = sb->s_fs_info; | 1218 | sdp = sb->s_fs_info; |
989 | if (sdp->sd_vfs_meta) { | 1219 | mnt->mnt_sb = sb; |
990 | printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n"); | 1220 | mnt->mnt_root = dget(sdp->sd_master_dir); |
991 | error = -EBUSY; | 1221 | return 0; |
992 | goto error; | ||
993 | } | ||
994 | down(&sb->s_bdev->bd_mount_sem); | ||
995 | new = sget(fs_type, test_bdev_super, set_bdev_super, sb->s_bdev); | ||
996 | up(&sb->s_bdev->bd_mount_sem); | ||
997 | if (IS_ERR(new)) { | ||
998 | error = PTR_ERR(new); | ||
999 | goto error; | ||
1000 | } | ||
1001 | new->s_flags = flags; | ||
1002 | strlcpy(new->s_id, sb->s_id, sizeof(new->s_id)); | ||
1003 | sb_set_blocksize(new, sb->s_blocksize); | ||
1004 | error = fill_super_meta(sb, new, data, flags & MS_SILENT ? 1 : 0); | ||
1005 | if (error) { | ||
1006 | up_write(&new->s_umount); | ||
1007 | deactivate_super(new); | ||
1008 | goto error; | ||
1009 | } | ||
1010 | |||
1011 | new->s_flags |= MS_ACTIVE; | ||
1012 | |||
1013 | /* Grab a reference to the gfs2 mount point */ | ||
1014 | atomic_inc(&sdp->sd_gfs2mnt->mnt_count); | ||
1015 | return simple_set_mnt(mnt, new); | ||
1016 | error: | ||
1017 | return error; | ||
1018 | } | 1222 | } |
1019 | 1223 | ||
1020 | static void gfs2_kill_sb(struct super_block *sb) | 1224 | static void gfs2_kill_sb(struct super_block *sb) |
1021 | { | 1225 | { |
1022 | if (sb->s_fs_info) { | 1226 | struct gfs2_sbd *sdp = sb->s_fs_info; |
1023 | gfs2_delete_debugfs_file(sb->s_fs_info); | 1227 | if (sdp) { |
1024 | gfs2_meta_syncfs(sb->s_fs_info); | 1228 | gfs2_meta_syncfs(sdp); |
1229 | dput(sdp->sd_root_dir); | ||
1230 | dput(sdp->sd_master_dir); | ||
1231 | sdp->sd_root_dir = NULL; | ||
1232 | sdp->sd_master_dir = NULL; | ||
1025 | } | 1233 | } |
1234 | shrink_dcache_sb(sb); | ||
1026 | kill_block_super(sb); | 1235 | kill_block_super(sb); |
1027 | } | 1236 | if (sdp) |
1028 | 1237 | gfs2_delete_debugfs_file(sdp); | |
1029 | static void gfs2_kill_sb_meta(struct super_block *sb) | ||
1030 | { | ||
1031 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
1032 | generic_shutdown_super(sb); | ||
1033 | sdp->sd_vfs_meta = NULL; | ||
1034 | atomic_dec(&sdp->sd_gfs2mnt->mnt_count); | ||
1035 | } | 1238 | } |
1036 | 1239 | ||
1037 | struct file_system_type gfs2_fs_type = { | 1240 | struct file_system_type gfs2_fs_type = { |
@@ -1046,7 +1249,6 @@ struct file_system_type gfs2meta_fs_type = { | |||
1046 | .name = "gfs2meta", | 1249 | .name = "gfs2meta", |
1047 | .fs_flags = FS_REQUIRES_DEV, | 1250 | .fs_flags = FS_REQUIRES_DEV, |
1048 | .get_sb = gfs2_get_sb_meta, | 1251 | .get_sb = gfs2_get_sb_meta, |
1049 | .kill_sb = gfs2_kill_sb_meta, | ||
1050 | .owner = THIS_MODULE, | 1252 | .owner = THIS_MODULE, |
1051 | }; | 1253 | }; |
1052 | 1254 | ||
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index e2c62f73a778..534e1e2c65ca 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -159,9 +159,13 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
159 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); | 159 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); |
160 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); | 160 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); |
161 | 161 | ||
162 | error = gfs2_glock_nq_m(2, ghs); | 162 | error = gfs2_glock_nq(ghs); /* parent */ |
163 | if (error) | 163 | if (error) |
164 | goto out; | 164 | goto out_parent; |
165 | |||
166 | error = gfs2_glock_nq(ghs + 1); /* child */ | ||
167 | if (error) | ||
168 | goto out_child; | ||
165 | 169 | ||
166 | error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); | 170 | error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); |
167 | if (error) | 171 | if (error) |
@@ -245,8 +249,10 @@ out_alloc: | |||
245 | if (alloc_required) | 249 | if (alloc_required) |
246 | gfs2_alloc_put(dip); | 250 | gfs2_alloc_put(dip); |
247 | out_gunlock: | 251 | out_gunlock: |
248 | gfs2_glock_dq_m(2, ghs); | 252 | gfs2_glock_dq(ghs + 1); |
249 | out: | 253 | out_child: |
254 | gfs2_glock_dq(ghs); | ||
255 | out_parent: | ||
250 | gfs2_holder_uninit(ghs); | 256 | gfs2_holder_uninit(ghs); |
251 | gfs2_holder_uninit(ghs + 1); | 257 | gfs2_holder_uninit(ghs + 1); |
252 | if (!error) { | 258 | if (!error) { |
@@ -302,7 +308,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | |||
302 | 308 | ||
303 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); | 309 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); |
304 | if (error) | 310 | if (error) |
305 | goto out_rgrp; | 311 | goto out_gunlock; |
306 | 312 | ||
307 | error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); | 313 | error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); |
308 | if (error) | 314 | if (error) |
@@ -316,6 +322,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | |||
316 | 322 | ||
317 | out_end_trans: | 323 | out_end_trans: |
318 | gfs2_trans_end(sdp); | 324 | gfs2_trans_end(sdp); |
325 | out_gunlock: | ||
319 | gfs2_glock_dq(ghs + 2); | 326 | gfs2_glock_dq(ghs + 2); |
320 | out_rgrp: | 327 | out_rgrp: |
321 | gfs2_holder_uninit(ghs + 2); | 328 | gfs2_holder_uninit(ghs + 2); |
@@ -485,7 +492,6 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
485 | struct gfs2_holder ri_gh; | 492 | struct gfs2_holder ri_gh; |
486 | int error; | 493 | int error; |
487 | 494 | ||
488 | |||
489 | error = gfs2_rindex_hold(sdp, &ri_gh); | 495 | error = gfs2_rindex_hold(sdp, &ri_gh); |
490 | if (error) | 496 | if (error) |
491 | return error; | 497 | return error; |
@@ -495,9 +501,17 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
495 | rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); | 501 | rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); |
496 | gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); | 502 | gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); |
497 | 503 | ||
498 | error = gfs2_glock_nq_m(3, ghs); | 504 | error = gfs2_glock_nq(ghs); /* parent */ |
499 | if (error) | 505 | if (error) |
500 | goto out; | 506 | goto out_parent; |
507 | |||
508 | error = gfs2_glock_nq(ghs + 1); /* child */ | ||
509 | if (error) | ||
510 | goto out_child; | ||
511 | |||
512 | error = gfs2_glock_nq(ghs + 2); /* rgrp */ | ||
513 | if (error) | ||
514 | goto out_rgrp; | ||
501 | 515 | ||
502 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); | 516 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); |
503 | if (error) | 517 | if (error) |
@@ -523,11 +537,15 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
523 | gfs2_trans_end(sdp); | 537 | gfs2_trans_end(sdp); |
524 | 538 | ||
525 | out_gunlock: | 539 | out_gunlock: |
526 | gfs2_glock_dq_m(3, ghs); | 540 | gfs2_glock_dq(ghs + 2); |
527 | out: | 541 | out_rgrp: |
528 | gfs2_holder_uninit(ghs); | ||
529 | gfs2_holder_uninit(ghs + 1); | ||
530 | gfs2_holder_uninit(ghs + 2); | 542 | gfs2_holder_uninit(ghs + 2); |
543 | gfs2_glock_dq(ghs + 1); | ||
544 | out_child: | ||
545 | gfs2_holder_uninit(ghs + 1); | ||
546 | gfs2_glock_dq(ghs); | ||
547 | out_parent: | ||
548 | gfs2_holder_uninit(ghs); | ||
531 | gfs2_glock_dq_uninit(&ri_gh); | 549 | gfs2_glock_dq_uninit(&ri_gh); |
532 | return error; | 550 | return error; |
533 | } | 551 | } |
@@ -571,6 +589,54 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, | |||
571 | return 0; | 589 | return 0; |
572 | } | 590 | } |
573 | 591 | ||
592 | /* | ||
593 | * gfs2_ok_to_move - check if it's ok to move a directory to another directory | ||
594 | * @this: move this | ||
595 | * @to: to here | ||
596 | * | ||
597 | * Follow @to back to the root and make sure we don't encounter @this | ||
598 | * Assumes we already hold the rename lock. | ||
599 | * | ||
600 | * Returns: errno | ||
601 | */ | ||
602 | |||
603 | static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | ||
604 | { | ||
605 | struct inode *dir = &to->i_inode; | ||
606 | struct super_block *sb = dir->i_sb; | ||
607 | struct inode *tmp; | ||
608 | struct qstr dotdot; | ||
609 | int error = 0; | ||
610 | |||
611 | gfs2_str2qstr(&dotdot, ".."); | ||
612 | |||
613 | igrab(dir); | ||
614 | |||
615 | for (;;) { | ||
616 | if (dir == &this->i_inode) { | ||
617 | error = -EINVAL; | ||
618 | break; | ||
619 | } | ||
620 | if (dir == sb->s_root->d_inode) { | ||
621 | error = 0; | ||
622 | break; | ||
623 | } | ||
624 | |||
625 | tmp = gfs2_lookupi(dir, &dotdot, 1); | ||
626 | if (IS_ERR(tmp)) { | ||
627 | error = PTR_ERR(tmp); | ||
628 | break; | ||
629 | } | ||
630 | |||
631 | iput(dir); | ||
632 | dir = tmp; | ||
633 | } | ||
634 | |||
635 | iput(dir); | ||
636 | |||
637 | return error; | ||
638 | } | ||
639 | |||
574 | /** | 640 | /** |
575 | * gfs2_rename - Rename a file | 641 | * gfs2_rename - Rename a file |
576 | * @odir: Parent directory of old file name | 642 | * @odir: Parent directory of old file name |
@@ -589,7 +655,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
589 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); | 655 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); |
590 | struct gfs2_inode *nip = NULL; | 656 | struct gfs2_inode *nip = NULL; |
591 | struct gfs2_sbd *sdp = GFS2_SB(odir); | 657 | struct gfs2_sbd *sdp = GFS2_SB(odir); |
592 | struct gfs2_holder ghs[5], r_gh; | 658 | struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }; |
593 | struct gfs2_rgrpd *nrgd; | 659 | struct gfs2_rgrpd *nrgd; |
594 | unsigned int num_gh; | 660 | unsigned int num_gh; |
595 | int dir_rename = 0; | 661 | int dir_rename = 0; |
@@ -603,19 +669,20 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
603 | return 0; | 669 | return 0; |
604 | } | 670 | } |
605 | 671 | ||
606 | /* Make sure we aren't trying to move a dirctory into it's subdir */ | ||
607 | |||
608 | if (S_ISDIR(ip->i_inode.i_mode) && odip != ndip) { | ||
609 | dir_rename = 1; | ||
610 | 672 | ||
611 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0, | 673 | if (odip != ndip) { |
612 | &r_gh); | 674 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, |
675 | 0, &r_gh); | ||
613 | if (error) | 676 | if (error) |
614 | goto out; | 677 | goto out; |
615 | 678 | ||
616 | error = gfs2_ok_to_move(ip, ndip); | 679 | if (S_ISDIR(ip->i_inode.i_mode)) { |
617 | if (error) | 680 | dir_rename = 1; |
618 | goto out_gunlock_r; | 681 | /* don't move a dirctory into it's subdir */ |
682 | error = gfs2_ok_to_move(ip, ndip); | ||
683 | if (error) | ||
684 | goto out_gunlock_r; | ||
685 | } | ||
619 | } | 686 | } |
620 | 687 | ||
621 | num_gh = 1; | 688 | num_gh = 1; |
@@ -639,9 +706,11 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
639 | gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); | 706 | gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); |
640 | } | 707 | } |
641 | 708 | ||
642 | error = gfs2_glock_nq_m(num_gh, ghs); | 709 | for (x = 0; x < num_gh; x++) { |
643 | if (error) | 710 | error = gfs2_glock_nq(ghs + x); |
644 | goto out_uninit; | 711 | if (error) |
712 | goto out_gunlock; | ||
713 | } | ||
645 | 714 | ||
646 | /* Check out the old directory */ | 715 | /* Check out the old directory */ |
647 | 716 | ||
@@ -804,12 +873,12 @@ out_alloc: | |||
804 | if (alloc_required) | 873 | if (alloc_required) |
805 | gfs2_alloc_put(ndip); | 874 | gfs2_alloc_put(ndip); |
806 | out_gunlock: | 875 | out_gunlock: |
807 | gfs2_glock_dq_m(num_gh, ghs); | 876 | while (x--) { |
808 | out_uninit: | 877 | gfs2_glock_dq(ghs + x); |
809 | for (x = 0; x < num_gh; x++) | ||
810 | gfs2_holder_uninit(ghs + x); | 878 | gfs2_holder_uninit(ghs + x); |
879 | } | ||
811 | out_gunlock_r: | 880 | out_gunlock_r: |
812 | if (dir_rename) | 881 | if (r_gh.gh_gl) |
813 | gfs2_glock_dq_uninit(&r_gh); | 882 | gfs2_glock_dq_uninit(&r_gh); |
814 | out: | 883 | out: |
815 | return error; | 884 | return error; |
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index f66ea0f7a356..d5355d9b5926 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/gfs2_ondisk.h> | 20 | #include <linux/gfs2_ondisk.h> |
21 | #include <linux/crc32.h> | 21 | #include <linux/crc32.h> |
22 | #include <linux/lm_interface.h> | 22 | #include <linux/lm_interface.h> |
23 | #include <linux/time.h> | ||
23 | 24 | ||
24 | #include "gfs2.h" | 25 | #include "gfs2.h" |
25 | #include "incore.h" | 26 | #include "incore.h" |
@@ -38,6 +39,7 @@ | |||
38 | #include "dir.h" | 39 | #include "dir.h" |
39 | #include "eattr.h" | 40 | #include "eattr.h" |
40 | #include "bmap.h" | 41 | #include "bmap.h" |
42 | #include "meta_io.h" | ||
41 | 43 | ||
42 | /** | 44 | /** |
43 | * gfs2_write_inode - Make sure the inode is stable on the disk | 45 | * gfs2_write_inode - Make sure the inode is stable on the disk |
@@ -50,16 +52,74 @@ | |||
50 | static int gfs2_write_inode(struct inode *inode, int sync) | 52 | static int gfs2_write_inode(struct inode *inode, int sync) |
51 | { | 53 | { |
52 | struct gfs2_inode *ip = GFS2_I(inode); | 54 | struct gfs2_inode *ip = GFS2_I(inode); |
53 | 55 | struct gfs2_sbd *sdp = GFS2_SB(inode); | |
54 | /* Check this is a "normal" inode */ | 56 | struct gfs2_holder gh; |
55 | if (test_bit(GIF_USER, &ip->i_flags)) { | 57 | struct buffer_head *bh; |
56 | if (current->flags & PF_MEMALLOC) | 58 | struct timespec atime; |
57 | return 0; | 59 | struct gfs2_dinode *di; |
58 | if (sync) | 60 | int ret = 0; |
59 | gfs2_log_flush(GFS2_SB(inode), ip->i_gl); | 61 | |
62 | /* Check this is a "normal" inode, etc */ | ||
63 | if (!test_bit(GIF_USER, &ip->i_flags) || | ||
64 | (current->flags & PF_MEMALLOC)) | ||
65 | return 0; | ||
66 | ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | ||
67 | if (ret) | ||
68 | goto do_flush; | ||
69 | ret = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
70 | if (ret) | ||
71 | goto do_unlock; | ||
72 | ret = gfs2_meta_inode_buffer(ip, &bh); | ||
73 | if (ret == 0) { | ||
74 | di = (struct gfs2_dinode *)bh->b_data; | ||
75 | atime.tv_sec = be64_to_cpu(di->di_atime); | ||
76 | atime.tv_nsec = be32_to_cpu(di->di_atime_nsec); | ||
77 | if (timespec_compare(&inode->i_atime, &atime) > 0) { | ||
78 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
79 | gfs2_dinode_out(ip, bh->b_data); | ||
80 | } | ||
81 | brelse(bh); | ||
60 | } | 82 | } |
83 | gfs2_trans_end(sdp); | ||
84 | do_unlock: | ||
85 | gfs2_glock_dq_uninit(&gh); | ||
86 | do_flush: | ||
87 | if (sync != 0) | ||
88 | gfs2_log_flush(GFS2_SB(inode), ip->i_gl); | ||
89 | return ret; | ||
90 | } | ||
61 | 91 | ||
62 | return 0; | 92 | /** |
93 | * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one | ||
94 | * @sdp: the filesystem | ||
95 | * | ||
96 | * Returns: errno | ||
97 | */ | ||
98 | |||
99 | static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | ||
100 | { | ||
101 | struct gfs2_holder t_gh; | ||
102 | int error; | ||
103 | |||
104 | gfs2_quota_sync(sdp); | ||
105 | gfs2_statfs_sync(sdp); | ||
106 | |||
107 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, | ||
108 | &t_gh); | ||
109 | if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
110 | return error; | ||
111 | |||
112 | gfs2_meta_syncfs(sdp); | ||
113 | gfs2_log_shutdown(sdp); | ||
114 | |||
115 | clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); | ||
116 | |||
117 | if (t_gh.gh_gl) | ||
118 | gfs2_glock_dq_uninit(&t_gh); | ||
119 | |||
120 | gfs2_quota_cleanup(sdp); | ||
121 | |||
122 | return error; | ||
63 | } | 123 | } |
64 | 124 | ||
65 | /** | 125 | /** |
@@ -73,12 +133,6 @@ static void gfs2_put_super(struct super_block *sb) | |||
73 | struct gfs2_sbd *sdp = sb->s_fs_info; | 133 | struct gfs2_sbd *sdp = sb->s_fs_info; |
74 | int error; | 134 | int error; |
75 | 135 | ||
76 | if (!sdp) | ||
77 | return; | ||
78 | |||
79 | if (!strncmp(sb->s_type->name, "gfs2meta", 8)) | ||
80 | return; /* Nothing to do */ | ||
81 | |||
82 | /* Unfreeze the filesystem, if we need to */ | 136 | /* Unfreeze the filesystem, if we need to */ |
83 | 137 | ||
84 | mutex_lock(&sdp->sd_freeze_lock); | 138 | mutex_lock(&sdp->sd_freeze_lock); |
@@ -101,7 +155,6 @@ static void gfs2_put_super(struct super_block *sb) | |||
101 | 155 | ||
102 | /* Release stuff */ | 156 | /* Release stuff */ |
103 | 157 | ||
104 | iput(sdp->sd_master_dir); | ||
105 | iput(sdp->sd_jindex); | 158 | iput(sdp->sd_jindex); |
106 | iput(sdp->sd_inum_inode); | 159 | iput(sdp->sd_inum_inode); |
107 | iput(sdp->sd_statfs_inode); | 160 | iput(sdp->sd_statfs_inode); |
@@ -152,6 +205,7 @@ static void gfs2_write_super(struct super_block *sb) | |||
152 | * | 205 | * |
153 | * Flushes the log to disk. | 206 | * Flushes the log to disk. |
154 | */ | 207 | */ |
208 | |||
155 | static int gfs2_sync_fs(struct super_block *sb, int wait) | 209 | static int gfs2_sync_fs(struct super_block *sb, int wait) |
156 | { | 210 | { |
157 | sb->s_dirt = 0; | 211 | sb->s_dirt = 0; |
@@ -270,14 +324,6 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
270 | } | 324 | } |
271 | } | 325 | } |
272 | 326 | ||
273 | if (*flags & (MS_NOATIME | MS_NODIRATIME)) | ||
274 | set_bit(SDF_NOATIME, &sdp->sd_flags); | ||
275 | else | ||
276 | clear_bit(SDF_NOATIME, &sdp->sd_flags); | ||
277 | |||
278 | /* Don't let the VFS update atimes. GFS2 handles this itself. */ | ||
279 | *flags |= MS_NOATIME | MS_NODIRATIME; | ||
280 | |||
281 | return error; | 327 | return error; |
282 | } | 328 | } |
283 | 329 | ||
@@ -295,6 +341,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
295 | * inode's blocks, or alternatively pass the baton on to another | 341 | * inode's blocks, or alternatively pass the baton on to another |
296 | * node for later deallocation. | 342 | * node for later deallocation. |
297 | */ | 343 | */ |
344 | |||
298 | static void gfs2_drop_inode(struct inode *inode) | 345 | static void gfs2_drop_inode(struct inode *inode) |
299 | { | 346 | { |
300 | struct gfs2_inode *ip = GFS2_I(inode); | 347 | struct gfs2_inode *ip = GFS2_I(inode); |
@@ -333,6 +380,16 @@ static void gfs2_clear_inode(struct inode *inode) | |||
333 | } | 380 | } |
334 | } | 381 | } |
335 | 382 | ||
383 | static int is_ancestor(const struct dentry *d1, const struct dentry *d2) | ||
384 | { | ||
385 | do { | ||
386 | if (d1 == d2) | ||
387 | return 1; | ||
388 | d1 = d1->d_parent; | ||
389 | } while (!IS_ROOT(d1)); | ||
390 | return 0; | ||
391 | } | ||
392 | |||
336 | /** | 393 | /** |
337 | * gfs2_show_options - Show mount options for /proc/mounts | 394 | * gfs2_show_options - Show mount options for /proc/mounts |
338 | * @s: seq_file structure | 395 | * @s: seq_file structure |
@@ -346,6 +403,8 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
346 | struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; | 403 | struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; |
347 | struct gfs2_args *args = &sdp->sd_args; | 404 | struct gfs2_args *args = &sdp->sd_args; |
348 | 405 | ||
406 | if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) | ||
407 | seq_printf(s, ",meta"); | ||
349 | if (args->ar_lockproto[0]) | 408 | if (args->ar_lockproto[0]) |
350 | seq_printf(s, ",lockproto=%s", args->ar_lockproto); | 409 | seq_printf(s, ",lockproto=%s", args->ar_lockproto); |
351 | if (args->ar_locktable[0]) | 410 | if (args->ar_locktable[0]) |
@@ -414,6 +473,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
414 | * conversion on the iopen lock, but we can change that later. This | 473 | * conversion on the iopen lock, but we can change that later. This |
415 | * is safe, just less efficient. | 474 | * is safe, just less efficient. |
416 | */ | 475 | */ |
476 | |||
417 | static void gfs2_delete_inode(struct inode *inode) | 477 | static void gfs2_delete_inode(struct inode *inode) |
418 | { | 478 | { |
419 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; | 479 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; |
@@ -478,8 +538,6 @@ out: | |||
478 | clear_inode(inode); | 538 | clear_inode(inode); |
479 | } | 539 | } |
480 | 540 | ||
481 | |||
482 | |||
483 | static struct inode *gfs2_alloc_inode(struct super_block *sb) | 541 | static struct inode *gfs2_alloc_inode(struct super_block *sb) |
484 | { | 542 | { |
485 | struct gfs2_inode *ip; | 543 | struct gfs2_inode *ip; |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index ca831991cbc2..c3ba3d9d0aac 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -33,313 +33,6 @@ | |||
33 | #include "trans.h" | 33 | #include "trans.h" |
34 | #include "util.h" | 34 | #include "util.h" |
35 | 35 | ||
36 | static const u32 gfs2_old_fs_formats[] = { | ||
37 | 0 | ||
38 | }; | ||
39 | |||
40 | static const u32 gfs2_old_multihost_formats[] = { | ||
41 | 0 | ||
42 | }; | ||
43 | |||
44 | /** | ||
45 | * gfs2_tune_init - Fill a gfs2_tune structure with default values | ||
46 | * @gt: tune | ||
47 | * | ||
48 | */ | ||
49 | |||
50 | void gfs2_tune_init(struct gfs2_tune *gt) | ||
51 | { | ||
52 | spin_lock_init(>->gt_spin); | ||
53 | |||
54 | gt->gt_demote_secs = 300; | ||
55 | gt->gt_incore_log_blocks = 1024; | ||
56 | gt->gt_log_flush_secs = 60; | ||
57 | gt->gt_recoverd_secs = 60; | ||
58 | gt->gt_logd_secs = 1; | ||
59 | gt->gt_quotad_secs = 5; | ||
60 | gt->gt_quota_simul_sync = 64; | ||
61 | gt->gt_quota_warn_period = 10; | ||
62 | gt->gt_quota_scale_num = 1; | ||
63 | gt->gt_quota_scale_den = 1; | ||
64 | gt->gt_quota_cache_secs = 300; | ||
65 | gt->gt_quota_quantum = 60; | ||
66 | gt->gt_atime_quantum = 3600; | ||
67 | gt->gt_new_files_jdata = 0; | ||
68 | gt->gt_max_readahead = 1 << 18; | ||
69 | gt->gt_stall_secs = 600; | ||
70 | gt->gt_complain_secs = 10; | ||
71 | gt->gt_statfs_quantum = 30; | ||
72 | gt->gt_statfs_slow = 0; | ||
73 | } | ||
74 | |||
75 | /** | ||
76 | * gfs2_check_sb - Check superblock | ||
77 | * @sdp: the filesystem | ||
78 | * @sb: The superblock | ||
79 | * @silent: Don't print a message if the check fails | ||
80 | * | ||
81 | * Checks the version code of the FS is one that we understand how to | ||
82 | * read and that the sizes of the various on-disk structures have not | ||
83 | * changed. | ||
84 | */ | ||
85 | |||
86 | int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) | ||
87 | { | ||
88 | unsigned int x; | ||
89 | |||
90 | if (sb->sb_magic != GFS2_MAGIC || | ||
91 | sb->sb_type != GFS2_METATYPE_SB) { | ||
92 | if (!silent) | ||
93 | printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); | ||
94 | return -EINVAL; | ||
95 | } | ||
96 | |||
97 | /* If format numbers match exactly, we're done. */ | ||
98 | |||
99 | if (sb->sb_fs_format == GFS2_FORMAT_FS && | ||
100 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) | ||
101 | return 0; | ||
102 | |||
103 | if (sb->sb_fs_format != GFS2_FORMAT_FS) { | ||
104 | for (x = 0; gfs2_old_fs_formats[x]; x++) | ||
105 | if (gfs2_old_fs_formats[x] == sb->sb_fs_format) | ||
106 | break; | ||
107 | |||
108 | if (!gfs2_old_fs_formats[x]) { | ||
109 | printk(KERN_WARNING | ||
110 | "GFS2: code version (%u, %u) is incompatible " | ||
111 | "with ondisk format (%u, %u)\n", | ||
112 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
113 | sb->sb_fs_format, sb->sb_multihost_format); | ||
114 | printk(KERN_WARNING | ||
115 | "GFS2: I don't know how to upgrade this FS\n"); | ||
116 | return -EINVAL; | ||
117 | } | ||
118 | } | ||
119 | |||
120 | if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { | ||
121 | for (x = 0; gfs2_old_multihost_formats[x]; x++) | ||
122 | if (gfs2_old_multihost_formats[x] == | ||
123 | sb->sb_multihost_format) | ||
124 | break; | ||
125 | |||
126 | if (!gfs2_old_multihost_formats[x]) { | ||
127 | printk(KERN_WARNING | ||
128 | "GFS2: code version (%u, %u) is incompatible " | ||
129 | "with ondisk format (%u, %u)\n", | ||
130 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
131 | sb->sb_fs_format, sb->sb_multihost_format); | ||
132 | printk(KERN_WARNING | ||
133 | "GFS2: I don't know how to upgrade this FS\n"); | ||
134 | return -EINVAL; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | if (!sdp->sd_args.ar_upgrade) { | ||
139 | printk(KERN_WARNING | ||
140 | "GFS2: code version (%u, %u) is incompatible " | ||
141 | "with ondisk format (%u, %u)\n", | ||
142 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
143 | sb->sb_fs_format, sb->sb_multihost_format); | ||
144 | printk(KERN_INFO | ||
145 | "GFS2: Use the \"upgrade\" mount option to upgrade " | ||
146 | "the FS\n"); | ||
147 | printk(KERN_INFO "GFS2: See the manual for more details\n"); | ||
148 | return -EINVAL; | ||
149 | } | ||
150 | |||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | |||
155 | static void end_bio_io_page(struct bio *bio, int error) | ||
156 | { | ||
157 | struct page *page = bio->bi_private; | ||
158 | |||
159 | if (!error) | ||
160 | SetPageUptodate(page); | ||
161 | else | ||
162 | printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); | ||
163 | unlock_page(page); | ||
164 | } | ||
165 | |||
166 | static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) | ||
167 | { | ||
168 | const struct gfs2_sb *str = buf; | ||
169 | |||
170 | sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); | ||
171 | sb->sb_type = be32_to_cpu(str->sb_header.mh_type); | ||
172 | sb->sb_format = be32_to_cpu(str->sb_header.mh_format); | ||
173 | sb->sb_fs_format = be32_to_cpu(str->sb_fs_format); | ||
174 | sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format); | ||
175 | sb->sb_bsize = be32_to_cpu(str->sb_bsize); | ||
176 | sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift); | ||
177 | sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr); | ||
178 | sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino); | ||
179 | sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr); | ||
180 | sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino); | ||
181 | |||
182 | memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); | ||
183 | memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); | ||
184 | } | ||
185 | |||
186 | /** | ||
187 | * gfs2_read_super - Read the gfs2 super block from disk | ||
188 | * @sdp: The GFS2 super block | ||
189 | * @sector: The location of the super block | ||
190 | * @error: The error code to return | ||
191 | * | ||
192 | * This uses the bio functions to read the super block from disk | ||
193 | * because we want to be 100% sure that we never read cached data. | ||
194 | * A super block is read twice only during each GFS2 mount and is | ||
195 | * never written to by the filesystem. The first time its read no | ||
196 | * locks are held, and the only details which are looked at are those | ||
197 | * relating to the locking protocol. Once locking is up and working, | ||
198 | * the sb is read again under the lock to establish the location of | ||
199 | * the master directory (contains pointers to journals etc) and the | ||
200 | * root directory. | ||
201 | * | ||
202 | * Returns: 0 on success or error | ||
203 | */ | ||
204 | |||
205 | int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) | ||
206 | { | ||
207 | struct super_block *sb = sdp->sd_vfs; | ||
208 | struct gfs2_sb *p; | ||
209 | struct page *page; | ||
210 | struct bio *bio; | ||
211 | |||
212 | page = alloc_page(GFP_NOFS); | ||
213 | if (unlikely(!page)) | ||
214 | return -ENOBUFS; | ||
215 | |||
216 | ClearPageUptodate(page); | ||
217 | ClearPageDirty(page); | ||
218 | lock_page(page); | ||
219 | |||
220 | bio = bio_alloc(GFP_NOFS, 1); | ||
221 | if (unlikely(!bio)) { | ||
222 | __free_page(page); | ||
223 | return -ENOBUFS; | ||
224 | } | ||
225 | |||
226 | bio->bi_sector = sector * (sb->s_blocksize >> 9); | ||
227 | bio->bi_bdev = sb->s_bdev; | ||
228 | bio_add_page(bio, page, PAGE_SIZE, 0); | ||
229 | |||
230 | bio->bi_end_io = end_bio_io_page; | ||
231 | bio->bi_private = page; | ||
232 | submit_bio(READ_SYNC | (1 << BIO_RW_META), bio); | ||
233 | wait_on_page_locked(page); | ||
234 | bio_put(bio); | ||
235 | if (!PageUptodate(page)) { | ||
236 | __free_page(page); | ||
237 | return -EIO; | ||
238 | } | ||
239 | p = kmap(page); | ||
240 | gfs2_sb_in(&sdp->sd_sb, p); | ||
241 | kunmap(page); | ||
242 | __free_page(page); | ||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | /** | ||
247 | * gfs2_read_sb - Read super block | ||
248 | * @sdp: The GFS2 superblock | ||
249 | * @gl: the glock for the superblock (assumed to be held) | ||
250 | * @silent: Don't print message if mount fails | ||
251 | * | ||
252 | */ | ||
253 | |||
254 | int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) | ||
255 | { | ||
256 | u32 hash_blocks, ind_blocks, leaf_blocks; | ||
257 | u32 tmp_blocks; | ||
258 | unsigned int x; | ||
259 | int error; | ||
260 | |||
261 | error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); | ||
262 | if (error) { | ||
263 | if (!silent) | ||
264 | fs_err(sdp, "can't read superblock\n"); | ||
265 | return error; | ||
266 | } | ||
267 | |||
268 | error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); | ||
269 | if (error) | ||
270 | return error; | ||
271 | |||
272 | sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - | ||
273 | GFS2_BASIC_BLOCK_SHIFT; | ||
274 | sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; | ||
275 | sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - | ||
276 | sizeof(struct gfs2_dinode)) / sizeof(u64); | ||
277 | sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - | ||
278 | sizeof(struct gfs2_meta_header)) / sizeof(u64); | ||
279 | sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); | ||
280 | sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2; | ||
281 | sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; | ||
282 | sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64); | ||
283 | sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - | ||
284 | sizeof(struct gfs2_meta_header)) / | ||
285 | sizeof(struct gfs2_quota_change); | ||
286 | |||
287 | /* Compute maximum reservation required to add a entry to a directory */ | ||
288 | |||
289 | hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH), | ||
290 | sdp->sd_jbsize); | ||
291 | |||
292 | ind_blocks = 0; | ||
293 | for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { | ||
294 | tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs); | ||
295 | ind_blocks += tmp_blocks; | ||
296 | } | ||
297 | |||
298 | leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH; | ||
299 | |||
300 | sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; | ||
301 | |||
302 | sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - | ||
303 | sizeof(struct gfs2_dinode); | ||
304 | sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; | ||
305 | for (x = 2;; x++) { | ||
306 | u64 space, d; | ||
307 | u32 m; | ||
308 | |||
309 | space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; | ||
310 | d = space; | ||
311 | m = do_div(d, sdp->sd_inptrs); | ||
312 | |||
313 | if (d != sdp->sd_heightsize[x - 1] || m) | ||
314 | break; | ||
315 | sdp->sd_heightsize[x] = space; | ||
316 | } | ||
317 | sdp->sd_max_height = x; | ||
318 | sdp->sd_heightsize[x] = ~0; | ||
319 | gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); | ||
320 | |||
321 | sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - | ||
322 | sizeof(struct gfs2_dinode); | ||
323 | sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; | ||
324 | for (x = 2;; x++) { | ||
325 | u64 space, d; | ||
326 | u32 m; | ||
327 | |||
328 | space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; | ||
329 | d = space; | ||
330 | m = do_div(d, sdp->sd_inptrs); | ||
331 | |||
332 | if (d != sdp->sd_jheightsize[x - 1] || m) | ||
333 | break; | ||
334 | sdp->sd_jheightsize[x] = space; | ||
335 | } | ||
336 | sdp->sd_max_jheight = x; | ||
337 | sdp->sd_jheightsize[x] = ~0; | ||
338 | gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); | ||
339 | |||
340 | return 0; | ||
341 | } | ||
342 | |||
343 | /** | 36 | /** |
344 | * gfs2_jindex_hold - Grab a lock on the jindex | 37 | * gfs2_jindex_hold - Grab a lock on the jindex |
345 | * @sdp: The GFS2 superblock | 38 | * @sdp: The GFS2 superblock |
@@ -581,39 +274,6 @@ fail: | |||
581 | return error; | 274 | return error; |
582 | } | 275 | } |
583 | 276 | ||
584 | /** | ||
585 | * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one | ||
586 | * @sdp: the filesystem | ||
587 | * | ||
588 | * Returns: errno | ||
589 | */ | ||
590 | |||
591 | int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | ||
592 | { | ||
593 | struct gfs2_holder t_gh; | ||
594 | int error; | ||
595 | |||
596 | gfs2_quota_sync(sdp); | ||
597 | gfs2_statfs_sync(sdp); | ||
598 | |||
599 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, | ||
600 | &t_gh); | ||
601 | if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
602 | return error; | ||
603 | |||
604 | gfs2_meta_syncfs(sdp); | ||
605 | gfs2_log_shutdown(sdp); | ||
606 | |||
607 | clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); | ||
608 | |||
609 | if (t_gh.gh_gl) | ||
610 | gfs2_glock_dq_uninit(&t_gh); | ||
611 | |||
612 | gfs2_quota_cleanup(sdp); | ||
613 | |||
614 | return error; | ||
615 | } | ||
616 | |||
617 | static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) | 277 | static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) |
618 | { | 278 | { |
619 | const struct gfs2_statfs_change *str = buf; | 279 | const struct gfs2_statfs_change *str = buf; |
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 44361ecc44f7..50a4c9b1215e 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h | |||
@@ -12,11 +12,6 @@ | |||
12 | 12 | ||
13 | #include "incore.h" | 13 | #include "incore.h" |
14 | 14 | ||
15 | void gfs2_tune_init(struct gfs2_tune *gt); | ||
16 | |||
17 | int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent); | ||
18 | int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); | ||
19 | int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector); | ||
20 | void gfs2_lm_unmount(struct gfs2_sbd *sdp); | 15 | void gfs2_lm_unmount(struct gfs2_sbd *sdp); |
21 | 16 | ||
22 | static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) | 17 | static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) |
@@ -40,7 +35,6 @@ int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, | |||
40 | struct gfs2_inode **ipp); | 35 | struct gfs2_inode **ipp); |
41 | 36 | ||
42 | int gfs2_make_fs_rw(struct gfs2_sbd *sdp); | 37 | int gfs2_make_fs_rw(struct gfs2_sbd *sdp); |
43 | int gfs2_make_fs_ro(struct gfs2_sbd *sdp); | ||
44 | 38 | ||
45 | int gfs2_statfs_init(struct gfs2_sbd *sdp); | 39 | int gfs2_statfs_init(struct gfs2_sbd *sdp); |
46 | void gfs2_statfs_change(struct gfs2_sbd *sdp, | 40 | void gfs2_statfs_change(struct gfs2_sbd *sdp, |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 74846559fc3f..7e1879f1a02c 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -269,14 +269,6 @@ ARGS_ATTR(quota, "%u\n"); | |||
269 | ARGS_ATTR(suiddir, "%d\n"); | 269 | ARGS_ATTR(suiddir, "%d\n"); |
270 | ARGS_ATTR(data, "%d\n"); | 270 | ARGS_ATTR(data, "%d\n"); |
271 | 271 | ||
272 | /* one oddball doesn't fit the macro mold */ | ||
273 | static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf) | ||
274 | { | ||
275 | return snprintf(buf, PAGE_SIZE, "%d\n", | ||
276 | !!test_bit(SDF_NOATIME, &sdp->sd_flags)); | ||
277 | } | ||
278 | static struct args_attr args_attr_noatime = __ATTR_RO(noatime); | ||
279 | |||
280 | static struct attribute *args_attrs[] = { | 272 | static struct attribute *args_attrs[] = { |
281 | &args_attr_lockproto.attr, | 273 | &args_attr_lockproto.attr, |
282 | &args_attr_locktable.attr, | 274 | &args_attr_locktable.attr, |
@@ -292,7 +284,6 @@ static struct attribute *args_attrs[] = { | |||
292 | &args_attr_quota.attr, | 284 | &args_attr_quota.attr, |
293 | &args_attr_suiddir.attr, | 285 | &args_attr_suiddir.attr, |
294 | &args_attr_data.attr, | 286 | &args_attr_data.attr, |
295 | &args_attr_noatime.attr, | ||
296 | NULL, | 287 | NULL, |
297 | }; | 288 | }; |
298 | 289 | ||
@@ -407,7 +398,6 @@ TUNE_ATTR(incore_log_blocks, 0); | |||
407 | TUNE_ATTR(log_flush_secs, 0); | 398 | TUNE_ATTR(log_flush_secs, 0); |
408 | TUNE_ATTR(quota_warn_period, 0); | 399 | TUNE_ATTR(quota_warn_period, 0); |
409 | TUNE_ATTR(quota_quantum, 0); | 400 | TUNE_ATTR(quota_quantum, 0); |
410 | TUNE_ATTR(atime_quantum, 0); | ||
411 | TUNE_ATTR(max_readahead, 0); | 401 | TUNE_ATTR(max_readahead, 0); |
412 | TUNE_ATTR(complain_secs, 0); | 402 | TUNE_ATTR(complain_secs, 0); |
413 | TUNE_ATTR(statfs_slow, 0); | 403 | TUNE_ATTR(statfs_slow, 0); |
@@ -427,7 +417,6 @@ static struct attribute *tune_attrs[] = { | |||
427 | &tune_attr_log_flush_secs.attr, | 417 | &tune_attr_log_flush_secs.attr, |
428 | &tune_attr_quota_warn_period.attr, | 418 | &tune_attr_quota_warn_period.attr, |
429 | &tune_attr_quota_quantum.attr, | 419 | &tune_attr_quota_quantum.attr, |
430 | &tune_attr_atime_quantum.attr, | ||
431 | &tune_attr_max_readahead.attr, | 420 | &tune_attr_max_readahead.attr, |
432 | &tune_attr_complain_secs.attr, | 421 | &tune_attr_complain_secs.attr, |
433 | &tune_attr_statfs_slow.attr, | 422 | &tune_attr_statfs_slow.attr, |
diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4abb1047c689..3c7c7637719c 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c | |||
@@ -173,7 +173,7 @@ enum { | |||
173 | opt_err | 173 | opt_err |
174 | }; | 174 | }; |
175 | 175 | ||
176 | static match_table_t tokens = { | 176 | static const match_table_t tokens = { |
177 | { opt_uid, "uid=%u" }, | 177 | { opt_uid, "uid=%u" }, |
178 | { opt_gid, "gid=%u" }, | 178 | { opt_gid, "gid=%u" }, |
179 | { opt_umask, "umask=%o" }, | 179 | { opt_umask, "umask=%o" }, |
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 9997cbf8beb5..9699c56d323f 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c | |||
@@ -25,7 +25,7 @@ enum { | |||
25 | opt_force, opt_err | 25 | opt_force, opt_err |
26 | }; | 26 | }; |
27 | 27 | ||
28 | static match_table_t tokens = { | 28 | static const match_table_t tokens = { |
29 | { opt_creator, "creator=%s" }, | 29 | { opt_creator, "creator=%s" }, |
30 | { opt_type, "type=%s" }, | 30 | { opt_type, "type=%s" }, |
31 | { opt_umask, "umask=%o" }, | 31 | { opt_umask, "umask=%o" }, |
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index b8ae9c90ada0..29ad461d568f 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c | |||
@@ -215,7 +215,7 @@ enum { | |||
215 | Opt_timeshift, Opt_err, | 215 | Opt_timeshift, Opt_err, |
216 | }; | 216 | }; |
217 | 217 | ||
218 | static match_table_t tokens = { | 218 | static const match_table_t tokens = { |
219 | {Opt_help, "help"}, | 219 | {Opt_help, "help"}, |
220 | {Opt_uid, "uid=%u"}, | 220 | {Opt_uid, "uid=%u"}, |
221 | {Opt_gid, "gid=%u"}, | 221 | {Opt_gid, "gid=%u"}, |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 3f58923fb39b..61edc701b0e6 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -57,7 +57,7 @@ enum { | |||
57 | Opt_err, | 57 | Opt_err, |
58 | }; | 58 | }; |
59 | 59 | ||
60 | static match_table_t tokens = { | 60 | static const match_table_t tokens = { |
61 | {Opt_size, "size=%s"}, | 61 | {Opt_size, "size=%s"}, |
62 | {Opt_nr_inodes, "nr_inodes=%s"}, | 62 | {Opt_nr_inodes, "nr_inodes=%s"}, |
63 | {Opt_mode, "mode=%o"}, | 63 | {Opt_mode, "mode=%o"}, |
diff --git a/fs/inode.c b/fs/inode.c index b6726f644530..0487ddba1397 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -166,6 +166,7 @@ static struct inode *alloc_inode(struct super_block *sb) | |||
166 | mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); | 166 | mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); |
167 | mapping->assoc_mapping = NULL; | 167 | mapping->assoc_mapping = NULL; |
168 | mapping->backing_dev_info = &default_backing_dev_info; | 168 | mapping->backing_dev_info = &default_backing_dev_info; |
169 | mapping->writeback_index = 0; | ||
169 | 170 | ||
170 | /* | 171 | /* |
171 | * If the block_device provides a backing_dev_info for client | 172 | * If the block_device provides a backing_dev_info for client |
diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 60249429a253..d85c7d931cdf 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c | |||
@@ -323,7 +323,7 @@ out: | |||
323 | } | 323 | } |
324 | 324 | ||
325 | /* | 325 | /* |
326 | * remove_kevent - cleans up and ultimately frees the given kevent | 326 | * remove_kevent - cleans up the given kevent |
327 | * | 327 | * |
328 | * Caller must hold dev->ev_mutex. | 328 | * Caller must hold dev->ev_mutex. |
329 | */ | 329 | */ |
@@ -334,7 +334,13 @@ static void remove_kevent(struct inotify_device *dev, | |||
334 | 334 | ||
335 | dev->event_count--; | 335 | dev->event_count--; |
336 | dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; | 336 | dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; |
337 | } | ||
337 | 338 | ||
339 | /* | ||
340 | * free_kevent - frees the given kevent. | ||
341 | */ | ||
342 | static void free_kevent(struct inotify_kernel_event *kevent) | ||
343 | { | ||
338 | kfree(kevent->name); | 344 | kfree(kevent->name); |
339 | kmem_cache_free(event_cachep, kevent); | 345 | kmem_cache_free(event_cachep, kevent); |
340 | } | 346 | } |
@@ -350,6 +356,7 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev) | |||
350 | struct inotify_kernel_event *kevent; | 356 | struct inotify_kernel_event *kevent; |
351 | kevent = inotify_dev_get_event(dev); | 357 | kevent = inotify_dev_get_event(dev); |
352 | remove_kevent(dev, kevent); | 358 | remove_kevent(dev, kevent); |
359 | free_kevent(kevent); | ||
353 | } | 360 | } |
354 | } | 361 | } |
355 | 362 | ||
@@ -433,17 +440,15 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
433 | dev = file->private_data; | 440 | dev = file->private_data; |
434 | 441 | ||
435 | while (1) { | 442 | while (1) { |
436 | int events; | ||
437 | 443 | ||
438 | prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); | 444 | prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); |
439 | 445 | ||
440 | mutex_lock(&dev->ev_mutex); | 446 | mutex_lock(&dev->ev_mutex); |
441 | events = !list_empty(&dev->events); | 447 | if (!list_empty(&dev->events)) { |
442 | mutex_unlock(&dev->ev_mutex); | ||
443 | if (events) { | ||
444 | ret = 0; | 448 | ret = 0; |
445 | break; | 449 | break; |
446 | } | 450 | } |
451 | mutex_unlock(&dev->ev_mutex); | ||
447 | 452 | ||
448 | if (file->f_flags & O_NONBLOCK) { | 453 | if (file->f_flags & O_NONBLOCK) { |
449 | ret = -EAGAIN; | 454 | ret = -EAGAIN; |
@@ -462,7 +467,6 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
462 | if (ret) | 467 | if (ret) |
463 | return ret; | 468 | return ret; |
464 | 469 | ||
465 | mutex_lock(&dev->ev_mutex); | ||
466 | while (1) { | 470 | while (1) { |
467 | struct inotify_kernel_event *kevent; | 471 | struct inotify_kernel_event *kevent; |
468 | 472 | ||
@@ -481,6 +485,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
481 | } | 485 | } |
482 | break; | 486 | break; |
483 | } | 487 | } |
488 | remove_kevent(dev, kevent); | ||
489 | |||
490 | /* | ||
491 | * Must perform the copy_to_user outside the mutex in order | ||
492 | * to avoid a lock order reversal with mmap_sem. | ||
493 | */ | ||
494 | mutex_unlock(&dev->ev_mutex); | ||
484 | 495 | ||
485 | if (copy_to_user(buf, &kevent->event, event_size)) { | 496 | if (copy_to_user(buf, &kevent->event, event_size)) { |
486 | ret = -EFAULT; | 497 | ret = -EFAULT; |
@@ -498,7 +509,9 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
498 | count -= kevent->event.len; | 509 | count -= kevent->event.len; |
499 | } | 510 | } |
500 | 511 | ||
501 | remove_kevent(dev, kevent); | 512 | free_kevent(kevent); |
513 | |||
514 | mutex_lock(&dev->ev_mutex); | ||
502 | } | 515 | } |
503 | mutex_unlock(&dev->ev_mutex); | 516 | mutex_unlock(&dev->ev_mutex); |
504 | 517 | ||
diff --git a/fs/ioctl.c b/fs/ioctl.c index 7db32b3382d3..d152856c371b 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -13,9 +13,14 @@ | |||
13 | #include <linux/security.h> | 13 | #include <linux/security.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/uaccess.h> | 15 | #include <linux/uaccess.h> |
16 | #include <linux/writeback.h> | ||
17 | #include <linux/buffer_head.h> | ||
16 | 18 | ||
17 | #include <asm/ioctls.h> | 19 | #include <asm/ioctls.h> |
18 | 20 | ||
21 | /* So that the fiemap access checks can't overflow on 32 bit machines. */ | ||
22 | #define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent)) | ||
23 | |||
19 | /** | 24 | /** |
20 | * vfs_ioctl - call filesystem specific ioctl methods | 25 | * vfs_ioctl - call filesystem specific ioctl methods |
21 | * @filp: open file to invoke ioctl method on | 26 | * @filp: open file to invoke ioctl method on |
@@ -71,6 +76,276 @@ static int ioctl_fibmap(struct file *filp, int __user *p) | |||
71 | return put_user(res, p); | 76 | return put_user(res, p); |
72 | } | 77 | } |
73 | 78 | ||
79 | /** | ||
80 | * fiemap_fill_next_extent - Fiemap helper function | ||
81 | * @fieinfo: Fiemap context passed into ->fiemap | ||
82 | * @logical: Extent logical start offset, in bytes | ||
83 | * @phys: Extent physical start offset, in bytes | ||
84 | * @len: Extent length, in bytes | ||
85 | * @flags: FIEMAP_EXTENT flags that describe this extent | ||
86 | * | ||
87 | * Called from file system ->fiemap callback. Will populate extent | ||
88 | * info as passed in via arguments and copy to user memory. On | ||
89 | * success, extent count on fieinfo is incremented. | ||
90 | * | ||
91 | * Returns 0 on success, -errno on error, 1 if this was the last | ||
92 | * extent that will fit in user array. | ||
93 | */ | ||
94 | #define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC) | ||
95 | #define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED) | ||
96 | #define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE) | ||
97 | int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, | ||
98 | u64 phys, u64 len, u32 flags) | ||
99 | { | ||
100 | struct fiemap_extent extent; | ||
101 | struct fiemap_extent *dest = fieinfo->fi_extents_start; | ||
102 | |||
103 | /* only count the extents */ | ||
104 | if (fieinfo->fi_extents_max == 0) { | ||
105 | fieinfo->fi_extents_mapped++; | ||
106 | return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; | ||
107 | } | ||
108 | |||
109 | if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max) | ||
110 | return 1; | ||
111 | |||
112 | if (flags & SET_UNKNOWN_FLAGS) | ||
113 | flags |= FIEMAP_EXTENT_UNKNOWN; | ||
114 | if (flags & SET_NO_UNMOUNTED_IO_FLAGS) | ||
115 | flags |= FIEMAP_EXTENT_ENCODED; | ||
116 | if (flags & SET_NOT_ALIGNED_FLAGS) | ||
117 | flags |= FIEMAP_EXTENT_NOT_ALIGNED; | ||
118 | |||
119 | memset(&extent, 0, sizeof(extent)); | ||
120 | extent.fe_logical = logical; | ||
121 | extent.fe_physical = phys; | ||
122 | extent.fe_length = len; | ||
123 | extent.fe_flags = flags; | ||
124 | |||
125 | dest += fieinfo->fi_extents_mapped; | ||
126 | if (copy_to_user(dest, &extent, sizeof(extent))) | ||
127 | return -EFAULT; | ||
128 | |||
129 | fieinfo->fi_extents_mapped++; | ||
130 | if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max) | ||
131 | return 1; | ||
132 | return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; | ||
133 | } | ||
134 | EXPORT_SYMBOL(fiemap_fill_next_extent); | ||
135 | |||
136 | /** | ||
137 | * fiemap_check_flags - check validity of requested flags for fiemap | ||
138 | * @fieinfo: Fiemap context passed into ->fiemap | ||
139 | * @fs_flags: Set of fiemap flags that the file system understands | ||
140 | * | ||
141 | * Called from file system ->fiemap callback. This will compute the | ||
142 | * intersection of valid fiemap flags and those that the fs supports. That | ||
143 | * value is then compared against the user supplied flags. In case of bad user | ||
144 | * flags, the invalid values will be written into the fieinfo structure, and | ||
145 | * -EBADR is returned, which tells ioctl_fiemap() to return those values to | ||
146 | * userspace. For this reason, a return code of -EBADR should be preserved. | ||
147 | * | ||
148 | * Returns 0 on success, -EBADR on bad flags. | ||
149 | */ | ||
150 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags) | ||
151 | { | ||
152 | u32 incompat_flags; | ||
153 | |||
154 | incompat_flags = fieinfo->fi_flags & ~(FIEMAP_FLAGS_COMPAT & fs_flags); | ||
155 | if (incompat_flags) { | ||
156 | fieinfo->fi_flags = incompat_flags; | ||
157 | return -EBADR; | ||
158 | } | ||
159 | return 0; | ||
160 | } | ||
161 | EXPORT_SYMBOL(fiemap_check_flags); | ||
162 | |||
163 | static int fiemap_check_ranges(struct super_block *sb, | ||
164 | u64 start, u64 len, u64 *new_len) | ||
165 | { | ||
166 | *new_len = len; | ||
167 | |||
168 | if (len == 0) | ||
169 | return -EINVAL; | ||
170 | |||
171 | if (start > sb->s_maxbytes) | ||
172 | return -EFBIG; | ||
173 | |||
174 | /* | ||
175 | * Shrink request scope to what the fs can actually handle. | ||
176 | */ | ||
177 | if ((len > sb->s_maxbytes) || | ||
178 | (sb->s_maxbytes - len) < start) | ||
179 | *new_len = sb->s_maxbytes - start; | ||
180 | |||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | static int ioctl_fiemap(struct file *filp, unsigned long arg) | ||
185 | { | ||
186 | struct fiemap fiemap; | ||
187 | struct fiemap_extent_info fieinfo = { 0, }; | ||
188 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
189 | struct super_block *sb = inode->i_sb; | ||
190 | u64 len; | ||
191 | int error; | ||
192 | |||
193 | if (!inode->i_op->fiemap) | ||
194 | return -EOPNOTSUPP; | ||
195 | |||
196 | if (copy_from_user(&fiemap, (struct fiemap __user *)arg, | ||
197 | sizeof(struct fiemap))) | ||
198 | return -EFAULT; | ||
199 | |||
200 | if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) | ||
201 | return -EINVAL; | ||
202 | |||
203 | error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length, | ||
204 | &len); | ||
205 | if (error) | ||
206 | return error; | ||
207 | |||
208 | fieinfo.fi_flags = fiemap.fm_flags; | ||
209 | fieinfo.fi_extents_max = fiemap.fm_extent_count; | ||
210 | fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap)); | ||
211 | |||
212 | if (fiemap.fm_extent_count != 0 && | ||
213 | !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start, | ||
214 | fieinfo.fi_extents_max * sizeof(struct fiemap_extent))) | ||
215 | return -EFAULT; | ||
216 | |||
217 | if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC) | ||
218 | filemap_write_and_wait(inode->i_mapping); | ||
219 | |||
220 | error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len); | ||
221 | fiemap.fm_flags = fieinfo.fi_flags; | ||
222 | fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; | ||
223 | if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap))) | ||
224 | error = -EFAULT; | ||
225 | |||
226 | return error; | ||
227 | } | ||
228 | |||
229 | #ifdef CONFIG_BLOCK | ||
230 | |||
231 | #define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits) | ||
232 | #define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits); | ||
233 | |||
234 | /* | ||
235 | * @inode - the inode to map | ||
236 | * @arg - the pointer to userspace where we copy everything to | ||
237 | * @get_block - the fs's get_block function | ||
238 | * | ||
239 | * This does FIEMAP for block based inodes. Basically it will just loop | ||
240 | * through get_block until we hit the number of extents we want to map, or we | ||
241 | * go past the end of the file and hit a hole. | ||
242 | * | ||
243 | * If it is possible to have data blocks beyond a hole past @inode->i_size, then | ||
244 | * please do not use this function, it will stop at the first unmapped block | ||
245 | * beyond i_size | ||
246 | */ | ||
247 | int generic_block_fiemap(struct inode *inode, | ||
248 | struct fiemap_extent_info *fieinfo, u64 start, | ||
249 | u64 len, get_block_t *get_block) | ||
250 | { | ||
251 | struct buffer_head tmp; | ||
252 | unsigned int start_blk; | ||
253 | long long length = 0, map_len = 0; | ||
254 | u64 logical = 0, phys = 0, size = 0; | ||
255 | u32 flags = FIEMAP_EXTENT_MERGED; | ||
256 | int ret = 0; | ||
257 | |||
258 | if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))) | ||
259 | return ret; | ||
260 | |||
261 | start_blk = logical_to_blk(inode, start); | ||
262 | |||
263 | /* guard against change */ | ||
264 | mutex_lock(&inode->i_mutex); | ||
265 | |||
266 | length = (long long)min_t(u64, len, i_size_read(inode)); | ||
267 | map_len = length; | ||
268 | |||
269 | do { | ||
270 | /* | ||
271 | * we set b_size to the total size we want so it will map as | ||
272 | * many contiguous blocks as possible at once | ||
273 | */ | ||
274 | memset(&tmp, 0, sizeof(struct buffer_head)); | ||
275 | tmp.b_size = map_len; | ||
276 | |||
277 | ret = get_block(inode, start_blk, &tmp, 0); | ||
278 | if (ret) | ||
279 | break; | ||
280 | |||
281 | /* HOLE */ | ||
282 | if (!buffer_mapped(&tmp)) { | ||
283 | /* | ||
284 | * first hole after going past the EOF, this is our | ||
285 | * last extent | ||
286 | */ | ||
287 | if (length <= 0) { | ||
288 | flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST; | ||
289 | ret = fiemap_fill_next_extent(fieinfo, logical, | ||
290 | phys, size, | ||
291 | flags); | ||
292 | break; | ||
293 | } | ||
294 | |||
295 | length -= blk_to_logical(inode, 1); | ||
296 | |||
297 | /* if we have holes up to/past EOF then we're done */ | ||
298 | if (length <= 0) | ||
299 | break; | ||
300 | |||
301 | start_blk++; | ||
302 | } else { | ||
303 | if (length <= 0 && size) { | ||
304 | ret = fiemap_fill_next_extent(fieinfo, logical, | ||
305 | phys, size, | ||
306 | flags); | ||
307 | if (ret) | ||
308 | break; | ||
309 | } | ||
310 | |||
311 | logical = blk_to_logical(inode, start_blk); | ||
312 | phys = blk_to_logical(inode, tmp.b_blocknr); | ||
313 | size = tmp.b_size; | ||
314 | flags = FIEMAP_EXTENT_MERGED; | ||
315 | |||
316 | length -= tmp.b_size; | ||
317 | start_blk += logical_to_blk(inode, size); | ||
318 | |||
319 | /* | ||
320 | * if we are past the EOF we need to loop again to see | ||
321 | * if there is a hole so we can mark this extent as the | ||
322 | * last one, and if not keep mapping things until we | ||
323 | * find a hole, or we run out of slots in the extent | ||
324 | * array | ||
325 | */ | ||
326 | if (length <= 0) | ||
327 | continue; | ||
328 | |||
329 | ret = fiemap_fill_next_extent(fieinfo, logical, phys, | ||
330 | size, flags); | ||
331 | if (ret) | ||
332 | break; | ||
333 | } | ||
334 | cond_resched(); | ||
335 | } while (1); | ||
336 | |||
337 | mutex_unlock(&inode->i_mutex); | ||
338 | |||
339 | /* if ret is 1 then we just hit the end of the extent array */ | ||
340 | if (ret == 1) | ||
341 | ret = 0; | ||
342 | |||
343 | return ret; | ||
344 | } | ||
345 | EXPORT_SYMBOL(generic_block_fiemap); | ||
346 | |||
347 | #endif /* CONFIG_BLOCK */ | ||
348 | |||
74 | static int file_ioctl(struct file *filp, unsigned int cmd, | 349 | static int file_ioctl(struct file *filp, unsigned int cmd, |
75 | unsigned long arg) | 350 | unsigned long arg) |
76 | { | 351 | { |
@@ -80,6 +355,8 @@ static int file_ioctl(struct file *filp, unsigned int cmd, | |||
80 | switch (cmd) { | 355 | switch (cmd) { |
81 | case FIBMAP: | 356 | case FIBMAP: |
82 | return ioctl_fibmap(filp, p); | 357 | return ioctl_fibmap(filp, p); |
358 | case FS_IOC_FIEMAP: | ||
359 | return ioctl_fiemap(filp, arg); | ||
83 | case FIGETBSZ: | 360 | case FIGETBSZ: |
84 | return put_user(inode->i_sb->s_blocksize, p); | 361 | return put_user(inode->i_sb->s_blocksize, p); |
85 | case FIONREAD: | 362 | case FIONREAD: |
diff --git a/fs/ioprio.c b/fs/ioprio.c index c4a1c3c65aac..da3cc460d4df 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c | |||
@@ -115,11 +115,11 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) | |||
115 | pgrp = task_pgrp(current); | 115 | pgrp = task_pgrp(current); |
116 | else | 116 | else |
117 | pgrp = find_vpid(who); | 117 | pgrp = find_vpid(who); |
118 | do_each_pid_task(pgrp, PIDTYPE_PGID, p) { | 118 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { |
119 | ret = set_task_ioprio(p, ioprio); | 119 | ret = set_task_ioprio(p, ioprio); |
120 | if (ret) | 120 | if (ret) |
121 | break; | 121 | break; |
122 | } while_each_pid_task(pgrp, PIDTYPE_PGID, p); | 122 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); |
123 | break; | 123 | break; |
124 | case IOPRIO_WHO_USER: | 124 | case IOPRIO_WHO_USER: |
125 | if (!who) | 125 | if (!who) |
@@ -204,7 +204,7 @@ asmlinkage long sys_ioprio_get(int which, int who) | |||
204 | pgrp = task_pgrp(current); | 204 | pgrp = task_pgrp(current); |
205 | else | 205 | else |
206 | pgrp = find_vpid(who); | 206 | pgrp = find_vpid(who); |
207 | do_each_pid_task(pgrp, PIDTYPE_PGID, p) { | 207 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { |
208 | tmpio = get_task_ioprio(p); | 208 | tmpio = get_task_ioprio(p); |
209 | if (tmpio < 0) | 209 | if (tmpio < 0) |
210 | continue; | 210 | continue; |
@@ -212,7 +212,7 @@ asmlinkage long sys_ioprio_get(int which, int who) | |||
212 | ret = tmpio; | 212 | ret = tmpio; |
213 | else | 213 | else |
214 | ret = ioprio_best(ret, tmpio); | 214 | ret = ioprio_best(ret, tmpio); |
215 | } while_each_pid_task(pgrp, PIDTYPE_PGID, p); | 215 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); |
216 | break; | 216 | break; |
217 | case IOPRIO_WHO_USER: | 217 | case IOPRIO_WHO_USER: |
218 | if (!who) | 218 | if (!who) |
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 26948a6033b6..3f8af0f1505b 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
@@ -310,7 +310,7 @@ enum { | |||
310 | Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode, | 310 | Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode, |
311 | }; | 311 | }; |
312 | 312 | ||
313 | static match_table_t tokens = { | 313 | static const match_table_t tokens = { |
314 | {Opt_norock, "norock"}, | 314 | {Opt_norock, "norock"}, |
315 | {Opt_nojoliet, "nojoliet"}, | 315 | {Opt_nojoliet, "nojoliet"}, |
316 | {Opt_unhide, "unhide"}, | 316 | {Opt_unhide, "unhide"}, |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 8dee32007500..0540ca27a446 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -291,7 +291,7 @@ handle_t *journal_start(journal_t *journal, int nblocks) | |||
291 | goto out; | 291 | goto out; |
292 | } | 292 | } |
293 | 293 | ||
294 | lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); | 294 | lock_map_acquire(&handle->h_lockdep_map); |
295 | 295 | ||
296 | out: | 296 | out: |
297 | return handle; | 297 | return handle; |
@@ -1448,7 +1448,7 @@ int journal_stop(handle_t *handle) | |||
1448 | spin_unlock(&journal->j_state_lock); | 1448 | spin_unlock(&journal->j_state_lock); |
1449 | } | 1449 | } |
1450 | 1450 | ||
1451 | lock_release(&handle->h_lockdep_map, 1, _THIS_IP_); | 1451 | lock_map_release(&handle->h_lockdep_map); |
1452 | 1452 | ||
1453 | jbd_free_handle(handle); | 1453 | jbd_free_handle(handle); |
1454 | return err; | 1454 | return err; |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 91389c8aee8a..9203c3332f17 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/time.h> | 20 | #include <linux/time.h> |
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include <linux/jbd2.h> | 22 | #include <linux/jbd2.h> |
23 | #include <linux/marker.h> | ||
23 | #include <linux/errno.h> | 24 | #include <linux/errno.h> |
24 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
25 | 26 | ||
@@ -93,7 +94,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
93 | int ret = 0; | 94 | int ret = 0; |
94 | struct buffer_head *bh = jh2bh(jh); | 95 | struct buffer_head *bh = jh2bh(jh); |
95 | 96 | ||
96 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { | 97 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && |
98 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | ||
97 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 99 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
98 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; | 100 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; |
99 | jbd_unlock_bh_state(bh); | 101 | jbd_unlock_bh_state(bh); |
@@ -126,14 +128,29 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
126 | 128 | ||
127 | /* | 129 | /* |
128 | * Test again, another process may have checkpointed while we | 130 | * Test again, another process may have checkpointed while we |
129 | * were waiting for the checkpoint lock | 131 | * were waiting for the checkpoint lock. If there are no |
132 | * outstanding transactions there is nothing to checkpoint and | ||
133 | * we can't make progress. Abort the journal in this case. | ||
130 | */ | 134 | */ |
131 | spin_lock(&journal->j_state_lock); | 135 | spin_lock(&journal->j_state_lock); |
136 | spin_lock(&journal->j_list_lock); | ||
132 | nblocks = jbd_space_needed(journal); | 137 | nblocks = jbd_space_needed(journal); |
133 | if (__jbd2_log_space_left(journal) < nblocks) { | 138 | if (__jbd2_log_space_left(journal) < nblocks) { |
139 | int chkpt = journal->j_checkpoint_transactions != NULL; | ||
140 | |||
141 | spin_unlock(&journal->j_list_lock); | ||
134 | spin_unlock(&journal->j_state_lock); | 142 | spin_unlock(&journal->j_state_lock); |
135 | jbd2_log_do_checkpoint(journal); | 143 | if (chkpt) { |
144 | jbd2_log_do_checkpoint(journal); | ||
145 | } else { | ||
146 | printk(KERN_ERR "%s: no transactions\n", | ||
147 | __func__); | ||
148 | jbd2_journal_abort(journal, 0); | ||
149 | } | ||
150 | |||
136 | spin_lock(&journal->j_state_lock); | 151 | spin_lock(&journal->j_state_lock); |
152 | } else { | ||
153 | spin_unlock(&journal->j_list_lock); | ||
137 | } | 154 | } |
138 | mutex_unlock(&journal->j_checkpoint_mutex); | 155 | mutex_unlock(&journal->j_checkpoint_mutex); |
139 | } | 156 | } |
@@ -160,21 +177,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) | |||
160 | * buffers. Note that we take the buffers in the opposite ordering | 177 | * buffers. Note that we take the buffers in the opposite ordering |
161 | * from the one in which they were submitted for IO. | 178 | * from the one in which they were submitted for IO. |
162 | * | 179 | * |
180 | * Return 0 on success, and return <0 if some buffers have failed | ||
181 | * to be written out. | ||
182 | * | ||
163 | * Called with j_list_lock held. | 183 | * Called with j_list_lock held. |
164 | */ | 184 | */ |
165 | static void __wait_cp_io(journal_t *journal, transaction_t *transaction) | 185 | static int __wait_cp_io(journal_t *journal, transaction_t *transaction) |
166 | { | 186 | { |
167 | struct journal_head *jh; | 187 | struct journal_head *jh; |
168 | struct buffer_head *bh; | 188 | struct buffer_head *bh; |
169 | tid_t this_tid; | 189 | tid_t this_tid; |
170 | int released = 0; | 190 | int released = 0; |
191 | int ret = 0; | ||
171 | 192 | ||
172 | this_tid = transaction->t_tid; | 193 | this_tid = transaction->t_tid; |
173 | restart: | 194 | restart: |
174 | /* Did somebody clean up the transaction in the meanwhile? */ | 195 | /* Did somebody clean up the transaction in the meanwhile? */ |
175 | if (journal->j_checkpoint_transactions != transaction || | 196 | if (journal->j_checkpoint_transactions != transaction || |
176 | transaction->t_tid != this_tid) | 197 | transaction->t_tid != this_tid) |
177 | return; | 198 | return ret; |
178 | while (!released && transaction->t_checkpoint_io_list) { | 199 | while (!released && transaction->t_checkpoint_io_list) { |
179 | jh = transaction->t_checkpoint_io_list; | 200 | jh = transaction->t_checkpoint_io_list; |
180 | bh = jh2bh(jh); | 201 | bh = jh2bh(jh); |
@@ -194,6 +215,9 @@ restart: | |||
194 | spin_lock(&journal->j_list_lock); | 215 | spin_lock(&journal->j_list_lock); |
195 | goto restart; | 216 | goto restart; |
196 | } | 217 | } |
218 | if (unlikely(buffer_write_io_error(bh))) | ||
219 | ret = -EIO; | ||
220 | |||
197 | /* | 221 | /* |
198 | * Now in whatever state the buffer currently is, we know that | 222 | * Now in whatever state the buffer currently is, we know that |
199 | * it has been written out and so we can drop it from the list | 223 | * it has been written out and so we can drop it from the list |
@@ -203,6 +227,8 @@ restart: | |||
203 | jbd2_journal_remove_journal_head(bh); | 227 | jbd2_journal_remove_journal_head(bh); |
204 | __brelse(bh); | 228 | __brelse(bh); |
205 | } | 229 | } |
230 | |||
231 | return ret; | ||
206 | } | 232 | } |
207 | 233 | ||
208 | #define NR_BATCH 64 | 234 | #define NR_BATCH 64 |
@@ -226,7 +252,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | |||
226 | * Try to flush one buffer from the checkpoint list to disk. | 252 | * Try to flush one buffer from the checkpoint list to disk. |
227 | * | 253 | * |
228 | * Return 1 if something happened which requires us to abort the current | 254 | * Return 1 if something happened which requires us to abort the current |
229 | * scan of the checkpoint list. | 255 | * scan of the checkpoint list. Return <0 if the buffer has failed to |
256 | * be written out. | ||
230 | * | 257 | * |
231 | * Called with j_list_lock held and drops it if 1 is returned | 258 | * Called with j_list_lock held and drops it if 1 is returned |
232 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | 259 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it |
@@ -258,6 +285,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
258 | jbd2_log_wait_commit(journal, tid); | 285 | jbd2_log_wait_commit(journal, tid); |
259 | ret = 1; | 286 | ret = 1; |
260 | } else if (!buffer_dirty(bh)) { | 287 | } else if (!buffer_dirty(bh)) { |
288 | ret = 1; | ||
289 | if (unlikely(buffer_write_io_error(bh))) | ||
290 | ret = -EIO; | ||
261 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); | 291 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); |
262 | BUFFER_TRACE(bh, "remove from checkpoint"); | 292 | BUFFER_TRACE(bh, "remove from checkpoint"); |
263 | __jbd2_journal_remove_checkpoint(jh); | 293 | __jbd2_journal_remove_checkpoint(jh); |
@@ -265,7 +295,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
265 | jbd_unlock_bh_state(bh); | 295 | jbd_unlock_bh_state(bh); |
266 | jbd2_journal_remove_journal_head(bh); | 296 | jbd2_journal_remove_journal_head(bh); |
267 | __brelse(bh); | 297 | __brelse(bh); |
268 | ret = 1; | ||
269 | } else { | 298 | } else { |
270 | /* | 299 | /* |
271 | * Important: we are about to write the buffer, and | 300 | * Important: we are about to write the buffer, and |
@@ -298,6 +327,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
298 | * to disk. We submit larger chunks of data at once. | 327 | * to disk. We submit larger chunks of data at once. |
299 | * | 328 | * |
300 | * The journal should be locked before calling this function. | 329 | * The journal should be locked before calling this function. |
330 | * Called with j_checkpoint_mutex held. | ||
301 | */ | 331 | */ |
302 | int jbd2_log_do_checkpoint(journal_t *journal) | 332 | int jbd2_log_do_checkpoint(journal_t *journal) |
303 | { | 333 | { |
@@ -313,6 +343,8 @@ int jbd2_log_do_checkpoint(journal_t *journal) | |||
313 | * journal straight away. | 343 | * journal straight away. |
314 | */ | 344 | */ |
315 | result = jbd2_cleanup_journal_tail(journal); | 345 | result = jbd2_cleanup_journal_tail(journal); |
346 | trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d", | ||
347 | journal->j_devname, result); | ||
316 | jbd_debug(1, "cleanup_journal_tail returned %d\n", result); | 348 | jbd_debug(1, "cleanup_journal_tail returned %d\n", result); |
317 | if (result <= 0) | 349 | if (result <= 0) |
318 | return result; | 350 | return result; |
@@ -321,6 +353,7 @@ int jbd2_log_do_checkpoint(journal_t *journal) | |||
321 | * OK, we need to start writing disk blocks. Take one transaction | 353 | * OK, we need to start writing disk blocks. Take one transaction |
322 | * and write it. | 354 | * and write it. |
323 | */ | 355 | */ |
356 | result = 0; | ||
324 | spin_lock(&journal->j_list_lock); | 357 | spin_lock(&journal->j_list_lock); |
325 | if (!journal->j_checkpoint_transactions) | 358 | if (!journal->j_checkpoint_transactions) |
326 | goto out; | 359 | goto out; |
@@ -339,7 +372,7 @@ restart: | |||
339 | int batch_count = 0; | 372 | int batch_count = 0; |
340 | struct buffer_head *bhs[NR_BATCH]; | 373 | struct buffer_head *bhs[NR_BATCH]; |
341 | struct journal_head *jh; | 374 | struct journal_head *jh; |
342 | int retry = 0; | 375 | int retry = 0, err; |
343 | 376 | ||
344 | while (!retry && transaction->t_checkpoint_list) { | 377 | while (!retry && transaction->t_checkpoint_list) { |
345 | struct buffer_head *bh; | 378 | struct buffer_head *bh; |
@@ -353,6 +386,8 @@ restart: | |||
353 | } | 386 | } |
354 | retry = __process_buffer(journal, jh, bhs, &batch_count, | 387 | retry = __process_buffer(journal, jh, bhs, &batch_count, |
355 | transaction); | 388 | transaction); |
389 | if (retry < 0 && !result) | ||
390 | result = retry; | ||
356 | if (!retry && (need_resched() || | 391 | if (!retry && (need_resched() || |
357 | spin_needbreak(&journal->j_list_lock))) { | 392 | spin_needbreak(&journal->j_list_lock))) { |
358 | spin_unlock(&journal->j_list_lock); | 393 | spin_unlock(&journal->j_list_lock); |
@@ -377,14 +412,18 @@ restart: | |||
377 | * Now we have cleaned up the first transaction's checkpoint | 412 | * Now we have cleaned up the first transaction's checkpoint |
378 | * list. Let's clean up the second one | 413 | * list. Let's clean up the second one |
379 | */ | 414 | */ |
380 | __wait_cp_io(journal, transaction); | 415 | err = __wait_cp_io(journal, transaction); |
416 | if (!result) | ||
417 | result = err; | ||
381 | } | 418 | } |
382 | out: | 419 | out: |
383 | spin_unlock(&journal->j_list_lock); | 420 | spin_unlock(&journal->j_list_lock); |
384 | result = jbd2_cleanup_journal_tail(journal); | ||
385 | if (result < 0) | 421 | if (result < 0) |
386 | return result; | 422 | jbd2_journal_abort(journal, result); |
387 | return 0; | 423 | else |
424 | result = jbd2_cleanup_journal_tail(journal); | ||
425 | |||
426 | return (result < 0) ? result : 0; | ||
388 | } | 427 | } |
389 | 428 | ||
390 | /* | 429 | /* |
@@ -400,8 +439,9 @@ out: | |||
400 | * This is the only part of the journaling code which really needs to be | 439 | * This is the only part of the journaling code which really needs to be |
401 | * aware of transaction aborts. Checkpointing involves writing to the | 440 | * aware of transaction aborts. Checkpointing involves writing to the |
402 | * main filesystem area rather than to the journal, so it can proceed | 441 | * main filesystem area rather than to the journal, so it can proceed |
403 | * even in abort state, but we must not update the journal superblock if | 442 | * even in abort state, but we must not update the super block if |
404 | * we have an abort error outstanding. | 443 | * checkpointing may have failed. Otherwise, we would lose some metadata |
444 | * buffers which should be written-back to the filesystem. | ||
405 | */ | 445 | */ |
406 | 446 | ||
407 | int jbd2_cleanup_journal_tail(journal_t *journal) | 447 | int jbd2_cleanup_journal_tail(journal_t *journal) |
@@ -410,6 +450,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
410 | tid_t first_tid; | 450 | tid_t first_tid; |
411 | unsigned long blocknr, freed; | 451 | unsigned long blocknr, freed; |
412 | 452 | ||
453 | if (is_journal_aborted(journal)) | ||
454 | return 1; | ||
455 | |||
413 | /* OK, work out the oldest transaction remaining in the log, and | 456 | /* OK, work out the oldest transaction remaining in the log, and |
414 | * the log block it starts at. | 457 | * the log block it starts at. |
415 | * | 458 | * |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f2ad061e95ec..0abe02c4242a 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/time.h> | 16 | #include <linux/time.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | #include <linux/jbd2.h> | 18 | #include <linux/jbd2.h> |
19 | #include <linux/marker.h> | ||
19 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
20 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
21 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
@@ -126,8 +127,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
126 | 127 | ||
127 | JBUFFER_TRACE(descriptor, "submit commit block"); | 128 | JBUFFER_TRACE(descriptor, "submit commit block"); |
128 | lock_buffer(bh); | 129 | lock_buffer(bh); |
129 | get_bh(bh); | 130 | clear_buffer_dirty(bh); |
130 | set_buffer_dirty(bh); | ||
131 | set_buffer_uptodate(bh); | 131 | set_buffer_uptodate(bh); |
132 | bh->b_end_io = journal_end_buffer_io_sync; | 132 | bh->b_end_io = journal_end_buffer_io_sync; |
133 | 133 | ||
@@ -147,12 +147,9 @@ static int journal_submit_commit_record(journal_t *journal, | |||
147 | * to remember if we sent a barrier request | 147 | * to remember if we sent a barrier request |
148 | */ | 148 | */ |
149 | if (ret == -EOPNOTSUPP && barrier_done) { | 149 | if (ret == -EOPNOTSUPP && barrier_done) { |
150 | char b[BDEVNAME_SIZE]; | ||
151 | |||
152 | printk(KERN_WARNING | 150 | printk(KERN_WARNING |
153 | "JBD: barrier-based sync failed on %s - " | 151 | "JBD: barrier-based sync failed on %s - " |
154 | "disabling barriers\n", | 152 | "disabling barriers\n", journal->j_devname); |
155 | bdevname(journal->j_dev, b)); | ||
156 | spin_lock(&journal->j_state_lock); | 153 | spin_lock(&journal->j_state_lock); |
157 | journal->j_flags &= ~JBD2_BARRIER; | 154 | journal->j_flags &= ~JBD2_BARRIER; |
158 | spin_unlock(&journal->j_state_lock); | 155 | spin_unlock(&journal->j_state_lock); |
@@ -160,7 +157,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
160 | /* And try again, without the barrier */ | 157 | /* And try again, without the barrier */ |
161 | lock_buffer(bh); | 158 | lock_buffer(bh); |
162 | set_buffer_uptodate(bh); | 159 | set_buffer_uptodate(bh); |
163 | set_buffer_dirty(bh); | 160 | clear_buffer_dirty(bh); |
164 | ret = submit_bh(WRITE, bh); | 161 | ret = submit_bh(WRITE, bh); |
165 | } | 162 | } |
166 | *cbh = bh; | 163 | *cbh = bh; |
@@ -371,6 +368,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
371 | commit_transaction = journal->j_running_transaction; | 368 | commit_transaction = journal->j_running_transaction; |
372 | J_ASSERT(commit_transaction->t_state == T_RUNNING); | 369 | J_ASSERT(commit_transaction->t_state == T_RUNNING); |
373 | 370 | ||
371 | trace_mark(jbd2_start_commit, "dev %s transaction %d", | ||
372 | journal->j_devname, commit_transaction->t_tid); | ||
374 | jbd_debug(1, "JBD: starting commit of transaction %d\n", | 373 | jbd_debug(1, "JBD: starting commit of transaction %d\n", |
375 | commit_transaction->t_tid); | 374 | commit_transaction->t_tid); |
376 | 375 | ||
@@ -505,9 +504,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
505 | jh = commit_transaction->t_buffers; | 504 | jh = commit_transaction->t_buffers; |
506 | 505 | ||
507 | /* If we're in abort mode, we just un-journal the buffer and | 506 | /* If we're in abort mode, we just un-journal the buffer and |
508 | release it for background writing. */ | 507 | release it. */ |
509 | 508 | ||
510 | if (is_journal_aborted(journal)) { | 509 | if (is_journal_aborted(journal)) { |
510 | clear_buffer_jbddirty(jh2bh(jh)); | ||
511 | JBUFFER_TRACE(jh, "journal is aborting: refile"); | 511 | JBUFFER_TRACE(jh, "journal is aborting: refile"); |
512 | jbd2_journal_refile_buffer(journal, jh); | 512 | jbd2_journal_refile_buffer(journal, jh); |
513 | /* If that was the last one, we need to clean up | 513 | /* If that was the last one, we need to clean up |
@@ -681,11 +681,11 @@ start_journal_io: | |||
681 | */ | 681 | */ |
682 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | 682 | err = journal_finish_inode_data_buffers(journal, commit_transaction); |
683 | if (err) { | 683 | if (err) { |
684 | char b[BDEVNAME_SIZE]; | ||
685 | |||
686 | printk(KERN_WARNING | 684 | printk(KERN_WARNING |
687 | "JBD2: Detected IO errors while flushing file data " | 685 | "JBD2: Detected IO errors while flushing file data " |
688 | "on %s\n", bdevname(journal->j_fs_dev, b)); | 686 | "on %s\n", journal->j_devname); |
687 | if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) | ||
688 | jbd2_journal_abort(journal, err); | ||
689 | err = 0; | 689 | err = 0; |
690 | } | 690 | } |
691 | 691 | ||
@@ -786,6 +786,9 @@ wait_for_iobuf: | |||
786 | /* AKPM: bforget here */ | 786 | /* AKPM: bforget here */ |
787 | } | 787 | } |
788 | 788 | ||
789 | if (err) | ||
790 | jbd2_journal_abort(journal, err); | ||
791 | |||
789 | jbd_debug(3, "JBD: commit phase 5\n"); | 792 | jbd_debug(3, "JBD: commit phase 5\n"); |
790 | 793 | ||
791 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | 794 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
@@ -884,6 +887,8 @@ restart_loop: | |||
884 | if (buffer_jbddirty(bh)) { | 887 | if (buffer_jbddirty(bh)) { |
885 | JBUFFER_TRACE(jh, "add to new checkpointing trans"); | 888 | JBUFFER_TRACE(jh, "add to new checkpointing trans"); |
886 | __jbd2_journal_insert_checkpoint(jh, commit_transaction); | 889 | __jbd2_journal_insert_checkpoint(jh, commit_transaction); |
890 | if (is_journal_aborted(journal)) | ||
891 | clear_buffer_jbddirty(bh); | ||
887 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); | 892 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); |
888 | __jbd2_journal_refile_buffer(jh); | 893 | __jbd2_journal_refile_buffer(jh); |
889 | jbd_unlock_bh_state(bh); | 894 | jbd_unlock_bh_state(bh); |
@@ -990,6 +995,9 @@ restart_loop: | |||
990 | } | 995 | } |
991 | spin_unlock(&journal->j_list_lock); | 996 | spin_unlock(&journal->j_list_lock); |
992 | 997 | ||
998 | trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", | ||
999 | journal->j_devname, commit_transaction->t_tid, | ||
1000 | journal->j_tail_sequence); | ||
993 | jbd_debug(1, "JBD: commit %d complete, head %d\n", | 1001 | jbd_debug(1, "JBD: commit %d complete, head %d\n", |
994 | journal->j_commit_sequence, journal->j_tail_sequence); | 1002 | journal->j_commit_sequence, journal->j_tail_sequence); |
995 | 1003 | ||
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 8207a01c4edb..783de118de92 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -597,13 +597,9 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, | |||
597 | if (ret) | 597 | if (ret) |
598 | *retp = ret; | 598 | *retp = ret; |
599 | else { | 599 | else { |
600 | char b[BDEVNAME_SIZE]; | ||
601 | |||
602 | printk(KERN_ALERT "%s: journal block not found " | 600 | printk(KERN_ALERT "%s: journal block not found " |
603 | "at offset %lu on %s\n", | 601 | "at offset %lu on %s\n", |
604 | __func__, | 602 | __func__, blocknr, journal->j_devname); |
605 | blocknr, | ||
606 | bdevname(journal->j_dev, b)); | ||
607 | err = -EIO; | 603 | err = -EIO; |
608 | __journal_abort_soft(journal, err); | 604 | __journal_abort_soft(journal, err); |
609 | } | 605 | } |
@@ -901,10 +897,7 @@ static struct proc_dir_entry *proc_jbd2_stats; | |||
901 | 897 | ||
902 | static void jbd2_stats_proc_init(journal_t *journal) | 898 | static void jbd2_stats_proc_init(journal_t *journal) |
903 | { | 899 | { |
904 | char name[BDEVNAME_SIZE]; | 900 | journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats); |
905 | |||
906 | bdevname(journal->j_dev, name); | ||
907 | journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); | ||
908 | if (journal->j_proc_entry) { | 901 | if (journal->j_proc_entry) { |
909 | proc_create_data("history", S_IRUGO, journal->j_proc_entry, | 902 | proc_create_data("history", S_IRUGO, journal->j_proc_entry, |
910 | &jbd2_seq_history_fops, journal); | 903 | &jbd2_seq_history_fops, journal); |
@@ -915,12 +908,9 @@ static void jbd2_stats_proc_init(journal_t *journal) | |||
915 | 908 | ||
916 | static void jbd2_stats_proc_exit(journal_t *journal) | 909 | static void jbd2_stats_proc_exit(journal_t *journal) |
917 | { | 910 | { |
918 | char name[BDEVNAME_SIZE]; | ||
919 | |||
920 | bdevname(journal->j_dev, name); | ||
921 | remove_proc_entry("info", journal->j_proc_entry); | 911 | remove_proc_entry("info", journal->j_proc_entry); |
922 | remove_proc_entry("history", journal->j_proc_entry); | 912 | remove_proc_entry("history", journal->j_proc_entry); |
923 | remove_proc_entry(name, proc_jbd2_stats); | 913 | remove_proc_entry(journal->j_devname, proc_jbd2_stats); |
924 | } | 914 | } |
925 | 915 | ||
926 | static void journal_init_stats(journal_t *journal) | 916 | static void journal_init_stats(journal_t *journal) |
@@ -1018,6 +1008,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, | |||
1018 | { | 1008 | { |
1019 | journal_t *journal = journal_init_common(); | 1009 | journal_t *journal = journal_init_common(); |
1020 | struct buffer_head *bh; | 1010 | struct buffer_head *bh; |
1011 | char *p; | ||
1021 | int n; | 1012 | int n; |
1022 | 1013 | ||
1023 | if (!journal) | 1014 | if (!journal) |
@@ -1039,6 +1030,10 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, | |||
1039 | journal->j_fs_dev = fs_dev; | 1030 | journal->j_fs_dev = fs_dev; |
1040 | journal->j_blk_offset = start; | 1031 | journal->j_blk_offset = start; |
1041 | journal->j_maxlen = len; | 1032 | journal->j_maxlen = len; |
1033 | bdevname(journal->j_dev, journal->j_devname); | ||
1034 | p = journal->j_devname; | ||
1035 | while ((p = strchr(p, '/'))) | ||
1036 | *p = '!'; | ||
1042 | jbd2_stats_proc_init(journal); | 1037 | jbd2_stats_proc_init(journal); |
1043 | 1038 | ||
1044 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); | 1039 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); |
@@ -1061,6 +1056,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) | |||
1061 | { | 1056 | { |
1062 | struct buffer_head *bh; | 1057 | struct buffer_head *bh; |
1063 | journal_t *journal = journal_init_common(); | 1058 | journal_t *journal = journal_init_common(); |
1059 | char *p; | ||
1064 | int err; | 1060 | int err; |
1065 | int n; | 1061 | int n; |
1066 | unsigned long long blocknr; | 1062 | unsigned long long blocknr; |
@@ -1070,6 +1066,12 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) | |||
1070 | 1066 | ||
1071 | journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; | 1067 | journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; |
1072 | journal->j_inode = inode; | 1068 | journal->j_inode = inode; |
1069 | bdevname(journal->j_dev, journal->j_devname); | ||
1070 | p = journal->j_devname; | ||
1071 | while ((p = strchr(p, '/'))) | ||
1072 | *p = '!'; | ||
1073 | p = journal->j_devname + strlen(journal->j_devname); | ||
1074 | sprintf(p, ":%lu", journal->j_inode->i_ino); | ||
1073 | jbd_debug(1, | 1075 | jbd_debug(1, |
1074 | "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", | 1076 | "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", |
1075 | journal, inode->i_sb->s_id, inode->i_ino, | 1077 | journal, inode->i_sb->s_id, inode->i_ino, |
@@ -1253,6 +1255,22 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
1253 | goto out; | 1255 | goto out; |
1254 | } | 1256 | } |
1255 | 1257 | ||
1258 | if (buffer_write_io_error(bh)) { | ||
1259 | /* | ||
1260 | * Oh, dear. A previous attempt to write the journal | ||
1261 | * superblock failed. This could happen because the | ||
1262 | * USB device was yanked out. Or it could happen to | ||
1263 | * be a transient write error and maybe the block will | ||
1264 | * be remapped. Nothing we can do but to retry the | ||
1265 | * write and hope for the best. | ||
1266 | */ | ||
1267 | printk(KERN_ERR "JBD2: previous I/O error detected " | ||
1268 | "for journal superblock update for %s.\n", | ||
1269 | journal->j_devname); | ||
1270 | clear_buffer_write_io_error(bh); | ||
1271 | set_buffer_uptodate(bh); | ||
1272 | } | ||
1273 | |||
1256 | spin_lock(&journal->j_state_lock); | 1274 | spin_lock(&journal->j_state_lock); |
1257 | jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", | 1275 | jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", |
1258 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); | 1276 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); |
@@ -1264,9 +1282,16 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
1264 | 1282 | ||
1265 | BUFFER_TRACE(bh, "marking dirty"); | 1283 | BUFFER_TRACE(bh, "marking dirty"); |
1266 | mark_buffer_dirty(bh); | 1284 | mark_buffer_dirty(bh); |
1267 | if (wait) | 1285 | if (wait) { |
1268 | sync_dirty_buffer(bh); | 1286 | sync_dirty_buffer(bh); |
1269 | else | 1287 | if (buffer_write_io_error(bh)) { |
1288 | printk(KERN_ERR "JBD2: I/O error detected " | ||
1289 | "when updating journal superblock for %s.\n", | ||
1290 | journal->j_devname); | ||
1291 | clear_buffer_write_io_error(bh); | ||
1292 | set_buffer_uptodate(bh); | ||
1293 | } | ||
1294 | } else | ||
1270 | ll_rw_block(SWRITE, 1, &bh); | 1295 | ll_rw_block(SWRITE, 1, &bh); |
1271 | 1296 | ||
1272 | out: | 1297 | out: |
@@ -1426,9 +1451,12 @@ recovery_error: | |||
1426 | * | 1451 | * |
1427 | * Release a journal_t structure once it is no longer in use by the | 1452 | * Release a journal_t structure once it is no longer in use by the |
1428 | * journaled object. | 1453 | * journaled object. |
1454 | * Return <0 if we couldn't clean up the journal. | ||
1429 | */ | 1455 | */ |
1430 | void jbd2_journal_destroy(journal_t *journal) | 1456 | int jbd2_journal_destroy(journal_t *journal) |
1431 | { | 1457 | { |
1458 | int err = 0; | ||
1459 | |||
1432 | /* Wait for the commit thread to wake up and die. */ | 1460 | /* Wait for the commit thread to wake up and die. */ |
1433 | journal_kill_thread(journal); | 1461 | journal_kill_thread(journal); |
1434 | 1462 | ||
@@ -1451,11 +1479,16 @@ void jbd2_journal_destroy(journal_t *journal) | |||
1451 | J_ASSERT(journal->j_checkpoint_transactions == NULL); | 1479 | J_ASSERT(journal->j_checkpoint_transactions == NULL); |
1452 | spin_unlock(&journal->j_list_lock); | 1480 | spin_unlock(&journal->j_list_lock); |
1453 | 1481 | ||
1454 | /* We can now mark the journal as empty. */ | ||
1455 | journal->j_tail = 0; | ||
1456 | journal->j_tail_sequence = ++journal->j_transaction_sequence; | ||
1457 | if (journal->j_sb_buffer) { | 1482 | if (journal->j_sb_buffer) { |
1458 | jbd2_journal_update_superblock(journal, 1); | 1483 | if (!is_journal_aborted(journal)) { |
1484 | /* We can now mark the journal as empty. */ | ||
1485 | journal->j_tail = 0; | ||
1486 | journal->j_tail_sequence = | ||
1487 | ++journal->j_transaction_sequence; | ||
1488 | jbd2_journal_update_superblock(journal, 1); | ||
1489 | } else { | ||
1490 | err = -EIO; | ||
1491 | } | ||
1459 | brelse(journal->j_sb_buffer); | 1492 | brelse(journal->j_sb_buffer); |
1460 | } | 1493 | } |
1461 | 1494 | ||
@@ -1467,6 +1500,8 @@ void jbd2_journal_destroy(journal_t *journal) | |||
1467 | jbd2_journal_destroy_revoke(journal); | 1500 | jbd2_journal_destroy_revoke(journal); |
1468 | kfree(journal->j_wbuf); | 1501 | kfree(journal->j_wbuf); |
1469 | kfree(journal); | 1502 | kfree(journal); |
1503 | |||
1504 | return err; | ||
1470 | } | 1505 | } |
1471 | 1506 | ||
1472 | 1507 | ||
@@ -1692,10 +1727,16 @@ int jbd2_journal_flush(journal_t *journal) | |||
1692 | spin_lock(&journal->j_list_lock); | 1727 | spin_lock(&journal->j_list_lock); |
1693 | while (!err && journal->j_checkpoint_transactions != NULL) { | 1728 | while (!err && journal->j_checkpoint_transactions != NULL) { |
1694 | spin_unlock(&journal->j_list_lock); | 1729 | spin_unlock(&journal->j_list_lock); |
1730 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1695 | err = jbd2_log_do_checkpoint(journal); | 1731 | err = jbd2_log_do_checkpoint(journal); |
1732 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1696 | spin_lock(&journal->j_list_lock); | 1733 | spin_lock(&journal->j_list_lock); |
1697 | } | 1734 | } |
1698 | spin_unlock(&journal->j_list_lock); | 1735 | spin_unlock(&journal->j_list_lock); |
1736 | |||
1737 | if (is_journal_aborted(journal)) | ||
1738 | return -EIO; | ||
1739 | |||
1699 | jbd2_cleanup_journal_tail(journal); | 1740 | jbd2_cleanup_journal_tail(journal); |
1700 | 1741 | ||
1701 | /* Finally, mark the journal as really needing no recovery. | 1742 | /* Finally, mark the journal as really needing no recovery. |
@@ -1717,7 +1758,7 @@ int jbd2_journal_flush(journal_t *journal) | |||
1717 | J_ASSERT(journal->j_head == journal->j_tail); | 1758 | J_ASSERT(journal->j_head == journal->j_tail); |
1718 | J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); | 1759 | J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); |
1719 | spin_unlock(&journal->j_state_lock); | 1760 | spin_unlock(&journal->j_state_lock); |
1720 | return err; | 1761 | return 0; |
1721 | } | 1762 | } |
1722 | 1763 | ||
1723 | /** | 1764 | /** |
@@ -1761,23 +1802,6 @@ int jbd2_journal_wipe(journal_t *journal, int write) | |||
1761 | } | 1802 | } |
1762 | 1803 | ||
1763 | /* | 1804 | /* |
1764 | * journal_dev_name: format a character string to describe on what | ||
1765 | * device this journal is present. | ||
1766 | */ | ||
1767 | |||
1768 | static const char *journal_dev_name(journal_t *journal, char *buffer) | ||
1769 | { | ||
1770 | struct block_device *bdev; | ||
1771 | |||
1772 | if (journal->j_inode) | ||
1773 | bdev = journal->j_inode->i_sb->s_bdev; | ||
1774 | else | ||
1775 | bdev = journal->j_dev; | ||
1776 | |||
1777 | return bdevname(bdev, buffer); | ||
1778 | } | ||
1779 | |||
1780 | /* | ||
1781 | * Journal abort has very specific semantics, which we describe | 1805 | * Journal abort has very specific semantics, which we describe |
1782 | * for journal abort. | 1806 | * for journal abort. |
1783 | * | 1807 | * |
@@ -1793,13 +1817,12 @@ static const char *journal_dev_name(journal_t *journal, char *buffer) | |||
1793 | void __jbd2_journal_abort_hard(journal_t *journal) | 1817 | void __jbd2_journal_abort_hard(journal_t *journal) |
1794 | { | 1818 | { |
1795 | transaction_t *transaction; | 1819 | transaction_t *transaction; |
1796 | char b[BDEVNAME_SIZE]; | ||
1797 | 1820 | ||
1798 | if (journal->j_flags & JBD2_ABORT) | 1821 | if (journal->j_flags & JBD2_ABORT) |
1799 | return; | 1822 | return; |
1800 | 1823 | ||
1801 | printk(KERN_ERR "Aborting journal on device %s.\n", | 1824 | printk(KERN_ERR "Aborting journal on device %s.\n", |
1802 | journal_dev_name(journal, b)); | 1825 | journal->j_devname); |
1803 | 1826 | ||
1804 | spin_lock(&journal->j_state_lock); | 1827 | spin_lock(&journal->j_state_lock); |
1805 | journal->j_flags |= JBD2_ABORT; | 1828 | journal->j_flags |= JBD2_ABORT; |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 058f50f65b76..73063285b13f 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -225,7 +225,7 @@ do { \ | |||
225 | */ | 225 | */ |
226 | int jbd2_journal_recover(journal_t *journal) | 226 | int jbd2_journal_recover(journal_t *journal) |
227 | { | 227 | { |
228 | int err; | 228 | int err, err2; |
229 | journal_superblock_t * sb; | 229 | journal_superblock_t * sb; |
230 | 230 | ||
231 | struct recovery_info info; | 231 | struct recovery_info info; |
@@ -263,7 +263,10 @@ int jbd2_journal_recover(journal_t *journal) | |||
263 | journal->j_transaction_sequence = ++info.end_transaction; | 263 | journal->j_transaction_sequence = ++info.end_transaction; |
264 | 264 | ||
265 | jbd2_journal_clear_revoke(journal); | 265 | jbd2_journal_clear_revoke(journal); |
266 | sync_blockdev(journal->j_fs_dev); | 266 | err2 = sync_blockdev(journal->j_fs_dev); |
267 | if (!err) | ||
268 | err = err2; | ||
269 | |||
267 | return err; | 270 | return err; |
268 | } | 271 | } |
269 | 272 | ||
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 4f7cadbb19fa..e5d540588fa9 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -301,7 +301,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks) | |||
301 | goto out; | 301 | goto out; |
302 | } | 302 | } |
303 | 303 | ||
304 | lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); | 304 | lock_map_acquire(&handle->h_lockdep_map); |
305 | out: | 305 | out: |
306 | return handle; | 306 | return handle; |
307 | } | 307 | } |
@@ -1279,7 +1279,7 @@ int jbd2_journal_stop(handle_t *handle) | |||
1279 | spin_unlock(&journal->j_state_lock); | 1279 | spin_unlock(&journal->j_state_lock); |
1280 | } | 1280 | } |
1281 | 1281 | ||
1282 | lock_release(&handle->h_lockdep_map, 1, _THIS_IP_); | 1282 | lock_map_release(&handle->h_lockdep_map); |
1283 | 1283 | ||
1284 | jbd2_free_handle(handle); | 1284 | jbd2_free_handle(handle); |
1285 | return err; | 1285 | return err; |
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h index 31559f45fdde..4c41db91eaa4 100644 --- a/fs/jffs2/jffs2_fs_i.h +++ b/fs/jffs2/jffs2_fs_i.h | |||
@@ -12,7 +12,6 @@ | |||
12 | #ifndef _JFFS2_FS_I | 12 | #ifndef _JFFS2_FS_I |
13 | #define _JFFS2_FS_I | 13 | #define _JFFS2_FS_I |
14 | 14 | ||
15 | #include <linux/version.h> | ||
16 | #include <linux/rbtree.h> | 15 | #include <linux/rbtree.h> |
17 | #include <linux/posix_acl.h> | 16 | #include <linux/posix_acl.h> |
18 | #include <linux/mutex.h> | 17 | #include <linux/mutex.h> |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 3630718be395..0dae345e481b 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -199,7 +199,7 @@ enum { | |||
199 | Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask | 199 | Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask |
200 | }; | 200 | }; |
201 | 201 | ||
202 | static match_table_t tokens = { | 202 | static const match_table_t tokens = { |
203 | {Opt_integrity, "integrity"}, | 203 | {Opt_integrity, "integrity"}, |
204 | {Opt_nointegrity, "nointegrity"}, | 204 | {Opt_nointegrity, "nointegrity"}, |
205 | {Opt_iocharset, "iocharset=%s"}, | 205 | {Opt_iocharset, "iocharset=%s"}, |
diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile index 7725a0a9a555..97f6073ab339 100644 --- a/fs/lockd/Makefile +++ b/fs/lockd/Makefile | |||
@@ -5,6 +5,6 @@ | |||
5 | obj-$(CONFIG_LOCKD) += lockd.o | 5 | obj-$(CONFIG_LOCKD) += lockd.o |
6 | 6 | ||
7 | lockd-objs-y := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \ | 7 | lockd-objs-y := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \ |
8 | svcproc.o svcsubs.o mon.o xdr.o | 8 | svcproc.o svcsubs.o mon.o xdr.o grace.o |
9 | lockd-objs-$(CONFIG_LOCKD_V4) += xdr4.o svc4proc.o | 9 | lockd-objs-$(CONFIG_LOCKD_V4) += xdr4.o svc4proc.o |
10 | lockd-objs := $(lockd-objs-y) | 10 | lockd-objs := $(lockd-objs-y) |
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 0b45fd3a4bfd..8307dd64bf46 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c | |||
@@ -54,14 +54,13 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) | |||
54 | u32 nlm_version = (nlm_init->nfs_version == 2) ? 1 : 4; | 54 | u32 nlm_version = (nlm_init->nfs_version == 2) ? 1 : 4; |
55 | int status; | 55 | int status; |
56 | 56 | ||
57 | status = lockd_up(nlm_init->protocol); | 57 | status = lockd_up(); |
58 | if (status < 0) | 58 | if (status < 0) |
59 | return ERR_PTR(status); | 59 | return ERR_PTR(status); |
60 | 60 | ||
61 | host = nlmclnt_lookup_host((struct sockaddr_in *)nlm_init->address, | 61 | host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen, |
62 | nlm_init->protocol, nlm_version, | 62 | nlm_init->protocol, nlm_version, |
63 | nlm_init->hostname, | 63 | nlm_init->hostname); |
64 | strlen(nlm_init->hostname)); | ||
65 | if (host == NULL) { | 64 | if (host == NULL) { |
66 | lockd_down(); | 65 | lockd_down(); |
67 | return ERR_PTR(-ENOLCK); | 66 | return ERR_PTR(-ENOLCK); |
@@ -142,7 +141,7 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) | |||
142 | /* | 141 | /* |
143 | * The server lockd has called us back to tell us the lock was granted | 142 | * The server lockd has called us back to tell us the lock was granted |
144 | */ | 143 | */ |
145 | __be32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock) | 144 | __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock) |
146 | { | 145 | { |
147 | const struct file_lock *fl = &lock->fl; | 146 | const struct file_lock *fl = &lock->fl; |
148 | const struct nfs_fh *fh = &lock->fh; | 147 | const struct nfs_fh *fh = &lock->fh; |
@@ -166,7 +165,7 @@ __be32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock | |||
166 | */ | 165 | */ |
167 | if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) | 166 | if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) |
168 | continue; | 167 | continue; |
169 | if (!nlm_cmp_addr(&block->b_host->h_addr, addr)) | 168 | if (!nlm_cmp_addr(nlm_addr(block->b_host), addr)) |
170 | continue; | 169 | continue; |
171 | if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) | 170 | if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) |
172 | continue; | 171 | continue; |
@@ -216,7 +215,7 @@ reclaimer(void *ptr) | |||
216 | /* This one ensures that our parent doesn't terminate while the | 215 | /* This one ensures that our parent doesn't terminate while the |
217 | * reclaim is in progress */ | 216 | * reclaim is in progress */ |
218 | lock_kernel(); | 217 | lock_kernel(); |
219 | lockd_up(0); /* note: this cannot fail as lockd is already running */ | 218 | lockd_up(); /* note: this cannot fail as lockd is already running */ |
220 | 219 | ||
221 | dprintk("lockd: reclaiming locks for host %s\n", host->h_name); | 220 | dprintk("lockd: reclaiming locks for host %s\n", host->h_name); |
222 | 221 | ||
diff --git a/fs/lockd/grace.c b/fs/lockd/grace.c new file mode 100644 index 000000000000..183cc1f0af1c --- /dev/null +++ b/fs/lockd/grace.c | |||
@@ -0,0 +1,59 @@ | |||
1 | /* | ||
2 | * Common code for control of lockd and nfsv4 grace periods. | ||
3 | */ | ||
4 | |||
5 | #include <linux/module.h> | ||
6 | #include <linux/lockd/bind.h> | ||
7 | |||
8 | static LIST_HEAD(grace_list); | ||
9 | static DEFINE_SPINLOCK(grace_lock); | ||
10 | |||
11 | /** | ||
12 | * locks_start_grace | ||
13 | * @lm: who this grace period is for | ||
14 | * | ||
15 | * A grace period is a period during which locks should not be given | ||
16 | * out. Currently grace periods are only enforced by the two lock | ||
17 | * managers (lockd and nfsd), using the locks_in_grace() function to | ||
18 | * check when they are in a grace period. | ||
19 | * | ||
20 | * This function is called to start a grace period. | ||
21 | */ | ||
22 | void locks_start_grace(struct lock_manager *lm) | ||
23 | { | ||
24 | spin_lock(&grace_lock); | ||
25 | list_add(&lm->list, &grace_list); | ||
26 | spin_unlock(&grace_lock); | ||
27 | } | ||
28 | EXPORT_SYMBOL_GPL(locks_start_grace); | ||
29 | |||
30 | /** | ||
31 | * locks_end_grace | ||
32 | * @lm: who this grace period is for | ||
33 | * | ||
34 | * Call this function to state that the given lock manager is ready to | ||
35 | * resume regular locking. The grace period will not end until all lock | ||
36 | * managers that called locks_start_grace() also call locks_end_grace(). | ||
37 | * Note that callers count on it being safe to call this more than once, | ||
38 | * and the second call should be a no-op. | ||
39 | */ | ||
40 | void locks_end_grace(struct lock_manager *lm) | ||
41 | { | ||
42 | spin_lock(&grace_lock); | ||
43 | list_del_init(&lm->list); | ||
44 | spin_unlock(&grace_lock); | ||
45 | } | ||
46 | EXPORT_SYMBOL_GPL(locks_end_grace); | ||
47 | |||
48 | /** | ||
49 | * locks_in_grace | ||
50 | * | ||
51 | * Lock managers call this function to determine when it is OK for them | ||
52 | * to answer ordinary lock requests, and when they should accept only | ||
53 | * lock reclaims. | ||
54 | */ | ||
55 | int locks_in_grace(void) | ||
56 | { | ||
57 | return !list_empty(&grace_list); | ||
58 | } | ||
59 | EXPORT_SYMBOL_GPL(locks_in_grace); | ||
diff --git a/fs/lockd/host.c b/fs/lockd/host.c index a17664c7eacc..9fd8889097b7 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c | |||
@@ -11,16 +11,17 @@ | |||
11 | #include <linux/types.h> | 11 | #include <linux/types.h> |
12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
13 | #include <linux/in.h> | 13 | #include <linux/in.h> |
14 | #include <linux/in6.h> | ||
14 | #include <linux/sunrpc/clnt.h> | 15 | #include <linux/sunrpc/clnt.h> |
15 | #include <linux/sunrpc/svc.h> | 16 | #include <linux/sunrpc/svc.h> |
16 | #include <linux/lockd/lockd.h> | 17 | #include <linux/lockd/lockd.h> |
17 | #include <linux/lockd/sm_inter.h> | 18 | #include <linux/lockd/sm_inter.h> |
18 | #include <linux/mutex.h> | 19 | #include <linux/mutex.h> |
19 | 20 | ||
21 | #include <net/ipv6.h> | ||
20 | 22 | ||
21 | #define NLMDBG_FACILITY NLMDBG_HOSTCACHE | 23 | #define NLMDBG_FACILITY NLMDBG_HOSTCACHE |
22 | #define NLM_HOST_NRHASH 32 | 24 | #define NLM_HOST_NRHASH 32 |
23 | #define NLM_ADDRHASH(addr) (ntohl(addr) & (NLM_HOST_NRHASH-1)) | ||
24 | #define NLM_HOST_REBIND (60 * HZ) | 25 | #define NLM_HOST_REBIND (60 * HZ) |
25 | #define NLM_HOST_EXPIRE (300 * HZ) | 26 | #define NLM_HOST_EXPIRE (300 * HZ) |
26 | #define NLM_HOST_COLLECT (120 * HZ) | 27 | #define NLM_HOST_COLLECT (120 * HZ) |
@@ -30,42 +31,115 @@ static unsigned long next_gc; | |||
30 | static int nrhosts; | 31 | static int nrhosts; |
31 | static DEFINE_MUTEX(nlm_host_mutex); | 32 | static DEFINE_MUTEX(nlm_host_mutex); |
32 | 33 | ||
33 | |||
34 | static void nlm_gc_hosts(void); | 34 | static void nlm_gc_hosts(void); |
35 | static struct nsm_handle * __nsm_find(const struct sockaddr_in *, | 35 | static struct nsm_handle *nsm_find(const struct sockaddr *sap, |
36 | const char *, unsigned int, int); | 36 | const size_t salen, |
37 | static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, | 37 | const char *hostname, |
38 | const char *hostname, | 38 | const size_t hostname_len, |
39 | unsigned int hostname_len); | 39 | const int create); |
40 | |||
41 | struct nlm_lookup_host_info { | ||
42 | const int server; /* search for server|client */ | ||
43 | const struct sockaddr *sap; /* address to search for */ | ||
44 | const size_t salen; /* it's length */ | ||
45 | const unsigned short protocol; /* transport to search for*/ | ||
46 | const u32 version; /* NLM version to search for */ | ||
47 | const char *hostname; /* remote's hostname */ | ||
48 | const size_t hostname_len; /* it's length */ | ||
49 | const struct sockaddr *src_sap; /* our address (optional) */ | ||
50 | const size_t src_len; /* it's length */ | ||
51 | }; | ||
52 | |||
53 | /* | ||
54 | * Hash function must work well on big- and little-endian platforms | ||
55 | */ | ||
56 | static unsigned int __nlm_hash32(const __be32 n) | ||
57 | { | ||
58 | unsigned int hash = (__force u32)n ^ ((__force u32)n >> 16); | ||
59 | return hash ^ (hash >> 8); | ||
60 | } | ||
61 | |||
62 | static unsigned int __nlm_hash_addr4(const struct sockaddr *sap) | ||
63 | { | ||
64 | const struct sockaddr_in *sin = (struct sockaddr_in *)sap; | ||
65 | return __nlm_hash32(sin->sin_addr.s_addr); | ||
66 | } | ||
67 | |||
68 | static unsigned int __nlm_hash_addr6(const struct sockaddr *sap) | ||
69 | { | ||
70 | const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; | ||
71 | const struct in6_addr addr = sin6->sin6_addr; | ||
72 | return __nlm_hash32(addr.s6_addr32[0]) ^ | ||
73 | __nlm_hash32(addr.s6_addr32[1]) ^ | ||
74 | __nlm_hash32(addr.s6_addr32[2]) ^ | ||
75 | __nlm_hash32(addr.s6_addr32[3]); | ||
76 | } | ||
77 | |||
78 | static unsigned int nlm_hash_address(const struct sockaddr *sap) | ||
79 | { | ||
80 | unsigned int hash; | ||
81 | |||
82 | switch (sap->sa_family) { | ||
83 | case AF_INET: | ||
84 | hash = __nlm_hash_addr4(sap); | ||
85 | break; | ||
86 | case AF_INET6: | ||
87 | hash = __nlm_hash_addr6(sap); | ||
88 | break; | ||
89 | default: | ||
90 | hash = 0; | ||
91 | } | ||
92 | return hash & (NLM_HOST_NRHASH - 1); | ||
93 | } | ||
94 | |||
95 | static void nlm_clear_port(struct sockaddr *sap) | ||
96 | { | ||
97 | switch (sap->sa_family) { | ||
98 | case AF_INET: | ||
99 | ((struct sockaddr_in *)sap)->sin_port = 0; | ||
100 | break; | ||
101 | case AF_INET6: | ||
102 | ((struct sockaddr_in6 *)sap)->sin6_port = 0; | ||
103 | break; | ||
104 | } | ||
105 | } | ||
106 | |||
107 | static void nlm_display_address(const struct sockaddr *sap, | ||
108 | char *buf, const size_t len) | ||
109 | { | ||
110 | const struct sockaddr_in *sin = (struct sockaddr_in *)sap; | ||
111 | const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; | ||
112 | |||
113 | switch (sap->sa_family) { | ||
114 | case AF_UNSPEC: | ||
115 | snprintf(buf, len, "unspecified"); | ||
116 | break; | ||
117 | case AF_INET: | ||
118 | snprintf(buf, len, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr)); | ||
119 | break; | ||
120 | case AF_INET6: | ||
121 | if (ipv6_addr_v4mapped(&sin6->sin6_addr)) | ||
122 | snprintf(buf, len, NIPQUAD_FMT, | ||
123 | NIPQUAD(sin6->sin6_addr.s6_addr32[3])); | ||
124 | else | ||
125 | snprintf(buf, len, NIP6_FMT, NIP6(sin6->sin6_addr)); | ||
126 | break; | ||
127 | default: | ||
128 | snprintf(buf, len, "unsupported address family"); | ||
129 | break; | ||
130 | } | ||
131 | } | ||
40 | 132 | ||
41 | /* | 133 | /* |
42 | * Common host lookup routine for server & client | 134 | * Common host lookup routine for server & client |
43 | */ | 135 | */ |
44 | static struct nlm_host *nlm_lookup_host(int server, | 136 | static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) |
45 | const struct sockaddr_in *sin, | ||
46 | int proto, u32 version, | ||
47 | const char *hostname, | ||
48 | unsigned int hostname_len, | ||
49 | const struct sockaddr_in *ssin) | ||
50 | { | 137 | { |
51 | struct hlist_head *chain; | 138 | struct hlist_head *chain; |
52 | struct hlist_node *pos; | 139 | struct hlist_node *pos; |
53 | struct nlm_host *host; | 140 | struct nlm_host *host; |
54 | struct nsm_handle *nsm = NULL; | 141 | struct nsm_handle *nsm = NULL; |
55 | int hash; | ||
56 | |||
57 | dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT | ||
58 | ", p=%d, v=%u, my role=%s, name=%.*s)\n", | ||
59 | NIPQUAD(ssin->sin_addr.s_addr), | ||
60 | NIPQUAD(sin->sin_addr.s_addr), proto, version, | ||
61 | server? "server" : "client", | ||
62 | hostname_len, | ||
63 | hostname? hostname : "<none>"); | ||
64 | 142 | ||
65 | |||
66 | hash = NLM_ADDRHASH(sin->sin_addr.s_addr); | ||
67 | |||
68 | /* Lock hash table */ | ||
69 | mutex_lock(&nlm_host_mutex); | 143 | mutex_lock(&nlm_host_mutex); |
70 | 144 | ||
71 | if (time_after_eq(jiffies, next_gc)) | 145 | if (time_after_eq(jiffies, next_gc)) |
@@ -78,22 +152,22 @@ static struct nlm_host *nlm_lookup_host(int server, | |||
78 | * different NLM rpc_clients into one single nlm_host object. | 152 | * different NLM rpc_clients into one single nlm_host object. |
79 | * This would allow us to have one nlm_host per address. | 153 | * This would allow us to have one nlm_host per address. |
80 | */ | 154 | */ |
81 | chain = &nlm_hosts[hash]; | 155 | chain = &nlm_hosts[nlm_hash_address(ni->sap)]; |
82 | hlist_for_each_entry(host, pos, chain, h_hash) { | 156 | hlist_for_each_entry(host, pos, chain, h_hash) { |
83 | if (!nlm_cmp_addr(&host->h_addr, sin)) | 157 | if (!nlm_cmp_addr(nlm_addr(host), ni->sap)) |
84 | continue; | 158 | continue; |
85 | 159 | ||
86 | /* See if we have an NSM handle for this client */ | 160 | /* See if we have an NSM handle for this client */ |
87 | if (!nsm) | 161 | if (!nsm) |
88 | nsm = host->h_nsmhandle; | 162 | nsm = host->h_nsmhandle; |
89 | 163 | ||
90 | if (host->h_proto != proto) | 164 | if (host->h_proto != ni->protocol) |
91 | continue; | 165 | continue; |
92 | if (host->h_version != version) | 166 | if (host->h_version != ni->version) |
93 | continue; | 167 | continue; |
94 | if (host->h_server != server) | 168 | if (host->h_server != ni->server) |
95 | continue; | 169 | continue; |
96 | if (!nlm_cmp_addr(&host->h_saddr, ssin)) | 170 | if (!nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap)) |
97 | continue; | 171 | continue; |
98 | 172 | ||
99 | /* Move to head of hash chain. */ | 173 | /* Move to head of hash chain. */ |
@@ -101,30 +175,41 @@ static struct nlm_host *nlm_lookup_host(int server, | |||
101 | hlist_add_head(&host->h_hash, chain); | 175 | hlist_add_head(&host->h_hash, chain); |
102 | 176 | ||
103 | nlm_get_host(host); | 177 | nlm_get_host(host); |
178 | dprintk("lockd: nlm_lookup_host found host %s (%s)\n", | ||
179 | host->h_name, host->h_addrbuf); | ||
104 | goto out; | 180 | goto out; |
105 | } | 181 | } |
106 | if (nsm) | ||
107 | atomic_inc(&nsm->sm_count); | ||
108 | |||
109 | host = NULL; | ||
110 | 182 | ||
111 | /* Sadly, the host isn't in our hash table yet. See if | 183 | /* |
112 | * we have an NSM handle for it. If not, create one. | 184 | * The host wasn't in our hash table. If we don't |
185 | * have an NSM handle for it yet, create one. | ||
113 | */ | 186 | */ |
114 | if (!nsm && !(nsm = nsm_find(sin, hostname, hostname_len))) | 187 | if (nsm) |
115 | goto out; | 188 | atomic_inc(&nsm->sm_count); |
189 | else { | ||
190 | host = NULL; | ||
191 | nsm = nsm_find(ni->sap, ni->salen, | ||
192 | ni->hostname, ni->hostname_len, 1); | ||
193 | if (!nsm) { | ||
194 | dprintk("lockd: nlm_lookup_host failed; " | ||
195 | "no nsm handle\n"); | ||
196 | goto out; | ||
197 | } | ||
198 | } | ||
116 | 199 | ||
117 | host = kzalloc(sizeof(*host), GFP_KERNEL); | 200 | host = kzalloc(sizeof(*host), GFP_KERNEL); |
118 | if (!host) { | 201 | if (!host) { |
119 | nsm_release(nsm); | 202 | nsm_release(nsm); |
203 | dprintk("lockd: nlm_lookup_host failed; no memory\n"); | ||
120 | goto out; | 204 | goto out; |
121 | } | 205 | } |
122 | host->h_name = nsm->sm_name; | 206 | host->h_name = nsm->sm_name; |
123 | host->h_addr = *sin; | 207 | memcpy(nlm_addr(host), ni->sap, ni->salen); |
124 | host->h_addr.sin_port = 0; /* ouch! */ | 208 | host->h_addrlen = ni->salen; |
125 | host->h_saddr = *ssin; | 209 | nlm_clear_port(nlm_addr(host)); |
126 | host->h_version = version; | 210 | memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len); |
127 | host->h_proto = proto; | 211 | host->h_version = ni->version; |
212 | host->h_proto = ni->protocol; | ||
128 | host->h_rpcclnt = NULL; | 213 | host->h_rpcclnt = NULL; |
129 | mutex_init(&host->h_mutex); | 214 | mutex_init(&host->h_mutex); |
130 | host->h_nextrebind = jiffies + NLM_HOST_REBIND; | 215 | host->h_nextrebind = jiffies + NLM_HOST_REBIND; |
@@ -135,7 +220,7 @@ static struct nlm_host *nlm_lookup_host(int server, | |||
135 | host->h_state = 0; /* pseudo NSM state */ | 220 | host->h_state = 0; /* pseudo NSM state */ |
136 | host->h_nsmstate = 0; /* real NSM state */ | 221 | host->h_nsmstate = 0; /* real NSM state */ |
137 | host->h_nsmhandle = nsm; | 222 | host->h_nsmhandle = nsm; |
138 | host->h_server = server; | 223 | host->h_server = ni->server; |
139 | hlist_add_head(&host->h_hash, chain); | 224 | hlist_add_head(&host->h_hash, chain); |
140 | INIT_LIST_HEAD(&host->h_lockowners); | 225 | INIT_LIST_HEAD(&host->h_lockowners); |
141 | spin_lock_init(&host->h_lock); | 226 | spin_lock_init(&host->h_lock); |
@@ -143,6 +228,15 @@ static struct nlm_host *nlm_lookup_host(int server, | |||
143 | INIT_LIST_HEAD(&host->h_reclaim); | 228 | INIT_LIST_HEAD(&host->h_reclaim); |
144 | 229 | ||
145 | nrhosts++; | 230 | nrhosts++; |
231 | |||
232 | nlm_display_address((struct sockaddr *)&host->h_addr, | ||
233 | host->h_addrbuf, sizeof(host->h_addrbuf)); | ||
234 | nlm_display_address((struct sockaddr *)&host->h_srcaddr, | ||
235 | host->h_srcaddrbuf, sizeof(host->h_srcaddrbuf)); | ||
236 | |||
237 | dprintk("lockd: nlm_lookup_host created host %s\n", | ||
238 | host->h_name); | ||
239 | |||
146 | out: | 240 | out: |
147 | mutex_unlock(&nlm_host_mutex); | 241 | mutex_unlock(&nlm_host_mutex); |
148 | return host; | 242 | return host; |
@@ -170,33 +264,103 @@ nlm_destroy_host(struct nlm_host *host) | |||
170 | kfree(host); | 264 | kfree(host); |
171 | } | 265 | } |
172 | 266 | ||
173 | /* | 267 | /** |
174 | * Find an NLM server handle in the cache. If there is none, create it. | 268 | * nlmclnt_lookup_host - Find an NLM host handle matching a remote server |
269 | * @sap: network address of server | ||
270 | * @salen: length of server address | ||
271 | * @protocol: transport protocol to use | ||
272 | * @version: NLM protocol version | ||
273 | * @hostname: '\0'-terminated hostname of server | ||
274 | * | ||
275 | * Returns an nlm_host structure that matches the passed-in | ||
276 | * [server address, transport protocol, NLM version, server hostname]. | ||
277 | * If one doesn't already exist in the host cache, a new handle is | ||
278 | * created and returned. | ||
175 | */ | 279 | */ |
176 | struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *sin, | 280 | struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, |
177 | int proto, u32 version, | 281 | const size_t salen, |
178 | const char *hostname, | 282 | const unsigned short protocol, |
179 | unsigned int hostname_len) | 283 | const u32 version, const char *hostname) |
180 | { | 284 | { |
181 | struct sockaddr_in ssin = {0}; | 285 | const struct sockaddr source = { |
182 | 286 | .sa_family = AF_UNSPEC, | |
183 | return nlm_lookup_host(0, sin, proto, version, | 287 | }; |
184 | hostname, hostname_len, &ssin); | 288 | struct nlm_lookup_host_info ni = { |
289 | .server = 0, | ||
290 | .sap = sap, | ||
291 | .salen = salen, | ||
292 | .protocol = protocol, | ||
293 | .version = version, | ||
294 | .hostname = hostname, | ||
295 | .hostname_len = strlen(hostname), | ||
296 | .src_sap = &source, | ||
297 | .src_len = sizeof(source), | ||
298 | }; | ||
299 | |||
300 | dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__, | ||
301 | (hostname ? hostname : "<none>"), version, | ||
302 | (protocol == IPPROTO_UDP ? "udp" : "tcp")); | ||
303 | |||
304 | return nlm_lookup_host(&ni); | ||
185 | } | 305 | } |
186 | 306 | ||
187 | /* | 307 | /** |
188 | * Find an NLM client handle in the cache. If there is none, create it. | 308 | * nlmsvc_lookup_host - Find an NLM host handle matching a remote client |
309 | * @rqstp: incoming NLM request | ||
310 | * @hostname: name of client host | ||
311 | * @hostname_len: length of client hostname | ||
312 | * | ||
313 | * Returns an nlm_host structure that matches the [client address, | ||
314 | * transport protocol, NLM version, client hostname] of the passed-in | ||
315 | * NLM request. If one doesn't already exist in the host cache, a | ||
316 | * new handle is created and returned. | ||
317 | * | ||
318 | * Before possibly creating a new nlm_host, construct a sockaddr | ||
319 | * for a specific source address in case the local system has | ||
320 | * multiple network addresses. The family of the address in | ||
321 | * rq_daddr is guaranteed to be the same as the family of the | ||
322 | * address in rq_addr, so it's safe to use the same family for | ||
323 | * the source address. | ||
189 | */ | 324 | */ |
190 | struct nlm_host * | 325 | struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, |
191 | nlmsvc_lookup_host(struct svc_rqst *rqstp, | 326 | const char *hostname, |
192 | const char *hostname, unsigned int hostname_len) | 327 | const size_t hostname_len) |
193 | { | 328 | { |
194 | struct sockaddr_in ssin = {0}; | 329 | struct sockaddr_in sin = { |
330 | .sin_family = AF_INET, | ||
331 | }; | ||
332 | struct sockaddr_in6 sin6 = { | ||
333 | .sin6_family = AF_INET6, | ||
334 | }; | ||
335 | struct nlm_lookup_host_info ni = { | ||
336 | .server = 1, | ||
337 | .sap = svc_addr(rqstp), | ||
338 | .salen = rqstp->rq_addrlen, | ||
339 | .protocol = rqstp->rq_prot, | ||
340 | .version = rqstp->rq_vers, | ||
341 | .hostname = hostname, | ||
342 | .hostname_len = hostname_len, | ||
343 | .src_len = rqstp->rq_addrlen, | ||
344 | }; | ||
345 | |||
346 | dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__, | ||
347 | (int)hostname_len, hostname, rqstp->rq_vers, | ||
348 | (rqstp->rq_prot == IPPROTO_UDP ? "udp" : "tcp")); | ||
349 | |||
350 | switch (ni.sap->sa_family) { | ||
351 | case AF_INET: | ||
352 | sin.sin_addr.s_addr = rqstp->rq_daddr.addr.s_addr; | ||
353 | ni.src_sap = (struct sockaddr *)&sin; | ||
354 | break; | ||
355 | case AF_INET6: | ||
356 | ipv6_addr_copy(&sin6.sin6_addr, &rqstp->rq_daddr.addr6); | ||
357 | ni.src_sap = (struct sockaddr *)&sin6; | ||
358 | break; | ||
359 | default: | ||
360 | return NULL; | ||
361 | } | ||
195 | 362 | ||
196 | ssin.sin_addr = rqstp->rq_daddr.addr; | 363 | return nlm_lookup_host(&ni); |
197 | return nlm_lookup_host(1, svc_addr_in(rqstp), | ||
198 | rqstp->rq_prot, rqstp->rq_vers, | ||
199 | hostname, hostname_len, &ssin); | ||
200 | } | 364 | } |
201 | 365 | ||
202 | /* | 366 | /* |
@@ -207,9 +371,8 @@ nlm_bind_host(struct nlm_host *host) | |||
207 | { | 371 | { |
208 | struct rpc_clnt *clnt; | 372 | struct rpc_clnt *clnt; |
209 | 373 | ||
210 | dprintk("lockd: nlm_bind_host("NIPQUAD_FMT"->"NIPQUAD_FMT")\n", | 374 | dprintk("lockd: nlm_bind_host %s (%s), my addr=%s\n", |
211 | NIPQUAD(host->h_saddr.sin_addr), | 375 | host->h_name, host->h_addrbuf, host->h_srcaddrbuf); |
212 | NIPQUAD(host->h_addr.sin_addr)); | ||
213 | 376 | ||
214 | /* Lock host handle */ | 377 | /* Lock host handle */ |
215 | mutex_lock(&host->h_mutex); | 378 | mutex_lock(&host->h_mutex); |
@@ -221,7 +384,7 @@ nlm_bind_host(struct nlm_host *host) | |||
221 | if (time_after_eq(jiffies, host->h_nextrebind)) { | 384 | if (time_after_eq(jiffies, host->h_nextrebind)) { |
222 | rpc_force_rebind(clnt); | 385 | rpc_force_rebind(clnt); |
223 | host->h_nextrebind = jiffies + NLM_HOST_REBIND; | 386 | host->h_nextrebind = jiffies + NLM_HOST_REBIND; |
224 | dprintk("lockd: next rebind in %ld jiffies\n", | 387 | dprintk("lockd: next rebind in %lu jiffies\n", |
225 | host->h_nextrebind - jiffies); | 388 | host->h_nextrebind - jiffies); |
226 | } | 389 | } |
227 | } else { | 390 | } else { |
@@ -234,9 +397,9 @@ nlm_bind_host(struct nlm_host *host) | |||
234 | }; | 397 | }; |
235 | struct rpc_create_args args = { | 398 | struct rpc_create_args args = { |
236 | .protocol = host->h_proto, | 399 | .protocol = host->h_proto, |
237 | .address = (struct sockaddr *)&host->h_addr, | 400 | .address = nlm_addr(host), |
238 | .addrsize = sizeof(host->h_addr), | 401 | .addrsize = host->h_addrlen, |
239 | .saddress = (struct sockaddr *)&host->h_saddr, | 402 | .saddress = nlm_srcaddr(host), |
240 | .timeout = &timeparms, | 403 | .timeout = &timeparms, |
241 | .servername = host->h_name, | 404 | .servername = host->h_name, |
242 | .program = &nlm_program, | 405 | .program = &nlm_program, |
@@ -324,12 +487,16 @@ void nlm_host_rebooted(const struct sockaddr_in *sin, | |||
324 | struct nsm_handle *nsm; | 487 | struct nsm_handle *nsm; |
325 | struct nlm_host *host; | 488 | struct nlm_host *host; |
326 | 489 | ||
327 | dprintk("lockd: nlm_host_rebooted(%s, %u.%u.%u.%u)\n", | 490 | nsm = nsm_find((struct sockaddr *)sin, sizeof(*sin), |
328 | hostname, NIPQUAD(sin->sin_addr)); | 491 | hostname, hostname_len, 0); |
329 | 492 | if (nsm == NULL) { | |
330 | /* Find the NSM handle for this peer */ | 493 | dprintk("lockd: never saw rebooted peer '%.*s' before\n", |
331 | if (!(nsm = __nsm_find(sin, hostname, hostname_len, 0))) | 494 | hostname_len, hostname); |
332 | return; | 495 | return; |
496 | } | ||
497 | |||
498 | dprintk("lockd: nlm_host_rebooted(%.*s, %s)\n", | ||
499 | hostname_len, hostname, nsm->sm_addrbuf); | ||
333 | 500 | ||
334 | /* When reclaiming locks on this peer, make sure that | 501 | /* When reclaiming locks on this peer, make sure that |
335 | * we set up a new notification */ | 502 | * we set up a new notification */ |
@@ -461,22 +628,23 @@ nlm_gc_hosts(void) | |||
461 | static LIST_HEAD(nsm_handles); | 628 | static LIST_HEAD(nsm_handles); |
462 | static DEFINE_SPINLOCK(nsm_lock); | 629 | static DEFINE_SPINLOCK(nsm_lock); |
463 | 630 | ||
464 | static struct nsm_handle * | 631 | static struct nsm_handle *nsm_find(const struct sockaddr *sap, |
465 | __nsm_find(const struct sockaddr_in *sin, | 632 | const size_t salen, |
466 | const char *hostname, unsigned int hostname_len, | 633 | const char *hostname, |
467 | int create) | 634 | const size_t hostname_len, |
635 | const int create) | ||
468 | { | 636 | { |
469 | struct nsm_handle *nsm = NULL; | 637 | struct nsm_handle *nsm = NULL; |
470 | struct nsm_handle *pos; | 638 | struct nsm_handle *pos; |
471 | 639 | ||
472 | if (!sin) | 640 | if (!sap) |
473 | return NULL; | 641 | return NULL; |
474 | 642 | ||
475 | if (hostname && memchr(hostname, '/', hostname_len) != NULL) { | 643 | if (hostname && memchr(hostname, '/', hostname_len) != NULL) { |
476 | if (printk_ratelimit()) { | 644 | if (printk_ratelimit()) { |
477 | printk(KERN_WARNING "Invalid hostname \"%.*s\" " | 645 | printk(KERN_WARNING "Invalid hostname \"%.*s\" " |
478 | "in NFS lock request\n", | 646 | "in NFS lock request\n", |
479 | hostname_len, hostname); | 647 | (int)hostname_len, hostname); |
480 | } | 648 | } |
481 | return NULL; | 649 | return NULL; |
482 | } | 650 | } |
@@ -489,7 +657,7 @@ retry: | |||
489 | if (strlen(pos->sm_name) != hostname_len | 657 | if (strlen(pos->sm_name) != hostname_len |
490 | || memcmp(pos->sm_name, hostname, hostname_len)) | 658 | || memcmp(pos->sm_name, hostname, hostname_len)) |
491 | continue; | 659 | continue; |
492 | } else if (!nlm_cmp_addr(&pos->sm_addr, sin)) | 660 | } else if (!nlm_cmp_addr(nsm_addr(pos), sap)) |
493 | continue; | 661 | continue; |
494 | atomic_inc(&pos->sm_count); | 662 | atomic_inc(&pos->sm_count); |
495 | kfree(nsm); | 663 | kfree(nsm); |
@@ -509,10 +677,13 @@ retry: | |||
509 | if (nsm == NULL) | 677 | if (nsm == NULL) |
510 | return NULL; | 678 | return NULL; |
511 | 679 | ||
512 | nsm->sm_addr = *sin; | 680 | memcpy(nsm_addr(nsm), sap, salen); |
681 | nsm->sm_addrlen = salen; | ||
513 | nsm->sm_name = (char *) (nsm + 1); | 682 | nsm->sm_name = (char *) (nsm + 1); |
514 | memcpy(nsm->sm_name, hostname, hostname_len); | 683 | memcpy(nsm->sm_name, hostname, hostname_len); |
515 | nsm->sm_name[hostname_len] = '\0'; | 684 | nsm->sm_name[hostname_len] = '\0'; |
685 | nlm_display_address((struct sockaddr *)&nsm->sm_addr, | ||
686 | nsm->sm_addrbuf, sizeof(nsm->sm_addrbuf)); | ||
516 | atomic_set(&nsm->sm_count, 1); | 687 | atomic_set(&nsm->sm_count, 1); |
517 | goto retry; | 688 | goto retry; |
518 | 689 | ||
@@ -521,13 +692,6 @@ found: | |||
521 | return nsm; | 692 | return nsm; |
522 | } | 693 | } |
523 | 694 | ||
524 | static struct nsm_handle * | ||
525 | nsm_find(const struct sockaddr_in *sin, const char *hostname, | ||
526 | unsigned int hostname_len) | ||
527 | { | ||
528 | return __nsm_find(sin, hostname, hostname_len, 1); | ||
529 | } | ||
530 | |||
531 | /* | 695 | /* |
532 | * Release an NSM handle | 696 | * Release an NSM handle |
533 | */ | 697 | */ |
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index e4d563543b11..4e7e958e8f67 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c | |||
@@ -51,7 +51,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) | |||
51 | 51 | ||
52 | memset(&args, 0, sizeof(args)); | 52 | memset(&args, 0, sizeof(args)); |
53 | args.mon_name = nsm->sm_name; | 53 | args.mon_name = nsm->sm_name; |
54 | args.addr = nsm->sm_addr.sin_addr.s_addr; | 54 | args.addr = nsm_addr_in(nsm)->sin_addr.s_addr; |
55 | args.prog = NLM_PROGRAM; | 55 | args.prog = NLM_PROGRAM; |
56 | args.vers = 3; | 56 | args.vers = 3; |
57 | args.proc = NLMPROC_NSM_NOTIFY; | 57 | args.proc = NLMPROC_NSM_NOTIFY; |
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 5bd9bf0fa9df..c631a83931ce 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c | |||
@@ -51,7 +51,6 @@ static DEFINE_MUTEX(nlmsvc_mutex); | |||
51 | static unsigned int nlmsvc_users; | 51 | static unsigned int nlmsvc_users; |
52 | static struct task_struct *nlmsvc_task; | 52 | static struct task_struct *nlmsvc_task; |
53 | static struct svc_rqst *nlmsvc_rqst; | 53 | static struct svc_rqst *nlmsvc_rqst; |
54 | int nlmsvc_grace_period; | ||
55 | unsigned long nlmsvc_timeout; | 54 | unsigned long nlmsvc_timeout; |
56 | 55 | ||
57 | /* | 56 | /* |
@@ -85,27 +84,23 @@ static unsigned long get_lockd_grace_period(void) | |||
85 | return nlm_timeout * 5 * HZ; | 84 | return nlm_timeout * 5 * HZ; |
86 | } | 85 | } |
87 | 86 | ||
88 | unsigned long get_nfs_grace_period(void) | 87 | static struct lock_manager lockd_manager = { |
89 | { | 88 | }; |
90 | unsigned long lockdgrace = get_lockd_grace_period(); | ||
91 | unsigned long nfsdgrace = 0; | ||
92 | |||
93 | if (nlmsvc_ops) | ||
94 | nfsdgrace = nlmsvc_ops->get_grace_period(); | ||
95 | |||
96 | return max(lockdgrace, nfsdgrace); | ||
97 | } | ||
98 | EXPORT_SYMBOL(get_nfs_grace_period); | ||
99 | 89 | ||
100 | static unsigned long set_grace_period(void) | 90 | static void grace_ender(struct work_struct *not_used) |
101 | { | 91 | { |
102 | nlmsvc_grace_period = 1; | 92 | locks_end_grace(&lockd_manager); |
103 | return get_nfs_grace_period() + jiffies; | ||
104 | } | 93 | } |
105 | 94 | ||
106 | static inline void clear_grace_period(void) | 95 | static DECLARE_DELAYED_WORK(grace_period_end, grace_ender); |
96 | |||
97 | static void set_grace_period(void) | ||
107 | { | 98 | { |
108 | nlmsvc_grace_period = 0; | 99 | unsigned long grace_period = get_lockd_grace_period(); |
100 | |||
101 | locks_start_grace(&lockd_manager); | ||
102 | cancel_delayed_work_sync(&grace_period_end); | ||
103 | schedule_delayed_work(&grace_period_end, grace_period); | ||
109 | } | 104 | } |
110 | 105 | ||
111 | /* | 106 | /* |
@@ -116,7 +111,6 @@ lockd(void *vrqstp) | |||
116 | { | 111 | { |
117 | int err = 0, preverr = 0; | 112 | int err = 0, preverr = 0; |
118 | struct svc_rqst *rqstp = vrqstp; | 113 | struct svc_rqst *rqstp = vrqstp; |
119 | unsigned long grace_period_expire; | ||
120 | 114 | ||
121 | /* try_to_freeze() is called from svc_recv() */ | 115 | /* try_to_freeze() is called from svc_recv() */ |
122 | set_freezable(); | 116 | set_freezable(); |
@@ -139,7 +133,7 @@ lockd(void *vrqstp) | |||
139 | nlm_timeout = LOCKD_DFLT_TIMEO; | 133 | nlm_timeout = LOCKD_DFLT_TIMEO; |
140 | nlmsvc_timeout = nlm_timeout * HZ; | 134 | nlmsvc_timeout = nlm_timeout * HZ; |
141 | 135 | ||
142 | grace_period_expire = set_grace_period(); | 136 | set_grace_period(); |
143 | 137 | ||
144 | /* | 138 | /* |
145 | * The main request loop. We don't terminate until the last | 139 | * The main request loop. We don't terminate until the last |
@@ -153,21 +147,12 @@ lockd(void *vrqstp) | |||
153 | flush_signals(current); | 147 | flush_signals(current); |
154 | if (nlmsvc_ops) { | 148 | if (nlmsvc_ops) { |
155 | nlmsvc_invalidate_all(); | 149 | nlmsvc_invalidate_all(); |
156 | grace_period_expire = set_grace_period(); | 150 | set_grace_period(); |
157 | } | 151 | } |
158 | continue; | 152 | continue; |
159 | } | 153 | } |
160 | 154 | ||
161 | /* | 155 | timeout = nlmsvc_retry_blocked(); |
162 | * Retry any blocked locks that have been notified by | ||
163 | * the VFS. Don't do this during grace period. | ||
164 | * (Theoretically, there shouldn't even be blocked locks | ||
165 | * during grace period). | ||
166 | */ | ||
167 | if (!nlmsvc_grace_period) { | ||
168 | timeout = nlmsvc_retry_blocked(); | ||
169 | } else if (time_before(grace_period_expire, jiffies)) | ||
170 | clear_grace_period(); | ||
171 | 156 | ||
172 | /* | 157 | /* |
173 | * Find a socket with data available and call its | 158 | * Find a socket with data available and call its |
@@ -195,6 +180,7 @@ lockd(void *vrqstp) | |||
195 | svc_process(rqstp); | 180 | svc_process(rqstp); |
196 | } | 181 | } |
197 | flush_signals(current); | 182 | flush_signals(current); |
183 | cancel_delayed_work_sync(&grace_period_end); | ||
198 | if (nlmsvc_ops) | 184 | if (nlmsvc_ops) |
199 | nlmsvc_invalidate_all(); | 185 | nlmsvc_invalidate_all(); |
200 | nlm_shutdown_hosts(); | 186 | nlm_shutdown_hosts(); |
@@ -203,25 +189,28 @@ lockd(void *vrqstp) | |||
203 | } | 189 | } |
204 | 190 | ||
205 | /* | 191 | /* |
206 | * Make any sockets that are needed but not present. | 192 | * Ensure there are active UDP and TCP listeners for lockd. |
207 | * If nlm_udpport or nlm_tcpport were set as module | 193 | * |
208 | * options, make those sockets unconditionally | 194 | * Even if we have only TCP NFS mounts and/or TCP NFSDs, some |
195 | * local services (such as rpc.statd) still require UDP, and | ||
196 | * some NFS servers do not yet support NLM over TCP. | ||
197 | * | ||
198 | * Returns zero if all listeners are available; otherwise a | ||
199 | * negative errno value is returned. | ||
209 | */ | 200 | */ |
210 | static int make_socks(struct svc_serv *serv, int proto) | 201 | static int make_socks(struct svc_serv *serv) |
211 | { | 202 | { |
212 | static int warned; | 203 | static int warned; |
213 | struct svc_xprt *xprt; | 204 | struct svc_xprt *xprt; |
214 | int err = 0; | 205 | int err = 0; |
215 | 206 | ||
216 | if (proto == IPPROTO_UDP || nlm_udpport) { | 207 | xprt = svc_find_xprt(serv, "udp", 0, 0); |
217 | xprt = svc_find_xprt(serv, "udp", 0, 0); | 208 | if (!xprt) |
218 | if (!xprt) | 209 | err = svc_create_xprt(serv, "udp", nlm_udpport, |
219 | err = svc_create_xprt(serv, "udp", nlm_udpport, | 210 | SVC_SOCK_DEFAULTS); |
220 | SVC_SOCK_DEFAULTS); | 211 | else |
221 | else | 212 | svc_xprt_put(xprt); |
222 | svc_xprt_put(xprt); | 213 | if (err >= 0) { |
223 | } | ||
224 | if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport)) { | ||
225 | xprt = svc_find_xprt(serv, "tcp", 0, 0); | 214 | xprt = svc_find_xprt(serv, "tcp", 0, 0); |
226 | if (!xprt) | 215 | if (!xprt) |
227 | err = svc_create_xprt(serv, "tcp", nlm_tcpport, | 216 | err = svc_create_xprt(serv, "tcp", nlm_tcpport, |
@@ -241,8 +230,7 @@ static int make_socks(struct svc_serv *serv, int proto) | |||
241 | /* | 230 | /* |
242 | * Bring up the lockd process if it's not already up. | 231 | * Bring up the lockd process if it's not already up. |
243 | */ | 232 | */ |
244 | int | 233 | int lockd_up(void) |
245 | lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ | ||
246 | { | 234 | { |
247 | struct svc_serv *serv; | 235 | struct svc_serv *serv; |
248 | int error = 0; | 236 | int error = 0; |
@@ -251,11 +239,8 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ | |||
251 | /* | 239 | /* |
252 | * Check whether we're already up and running. | 240 | * Check whether we're already up and running. |
253 | */ | 241 | */ |
254 | if (nlmsvc_rqst) { | 242 | if (nlmsvc_rqst) |
255 | if (proto) | ||
256 | error = make_socks(nlmsvc_rqst->rq_server, proto); | ||
257 | goto out; | 243 | goto out; |
258 | } | ||
259 | 244 | ||
260 | /* | 245 | /* |
261 | * Sanity check: if there's no pid, | 246 | * Sanity check: if there's no pid, |
@@ -266,13 +251,14 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ | |||
266 | "lockd_up: no pid, %d users??\n", nlmsvc_users); | 251 | "lockd_up: no pid, %d users??\n", nlmsvc_users); |
267 | 252 | ||
268 | error = -ENOMEM; | 253 | error = -ENOMEM; |
269 | serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL); | 254 | serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, AF_INET, NULL); |
270 | if (!serv) { | 255 | if (!serv) { |
271 | printk(KERN_WARNING "lockd_up: create service failed\n"); | 256 | printk(KERN_WARNING "lockd_up: create service failed\n"); |
272 | goto out; | 257 | goto out; |
273 | } | 258 | } |
274 | 259 | ||
275 | if ((error = make_socks(serv, proto)) < 0) | 260 | error = make_socks(serv); |
261 | if (error < 0) | ||
276 | goto destroy_and_out; | 262 | goto destroy_and_out; |
277 | 263 | ||
278 | /* | 264 | /* |
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 399444639337..014f6ce48172 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c | |||
@@ -83,17 +83,11 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
83 | { | 83 | { |
84 | struct nlm_host *host; | 84 | struct nlm_host *host; |
85 | struct nlm_file *file; | 85 | struct nlm_file *file; |
86 | int rc = rpc_success; | 86 | __be32 rc = rpc_success; |
87 | 87 | ||
88 | dprintk("lockd: TEST4 called\n"); | 88 | dprintk("lockd: TEST4 called\n"); |
89 | resp->cookie = argp->cookie; | 89 | resp->cookie = argp->cookie; |
90 | 90 | ||
91 | /* Don't accept test requests during grace period */ | ||
92 | if (nlmsvc_grace_period) { | ||
93 | resp->status = nlm_lck_denied_grace_period; | ||
94 | return rc; | ||
95 | } | ||
96 | |||
97 | /* Obtain client and file */ | 91 | /* Obtain client and file */ |
98 | if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) | 92 | if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) |
99 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; | 93 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; |
@@ -116,18 +110,12 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
116 | { | 110 | { |
117 | struct nlm_host *host; | 111 | struct nlm_host *host; |
118 | struct nlm_file *file; | 112 | struct nlm_file *file; |
119 | int rc = rpc_success; | 113 | __be32 rc = rpc_success; |
120 | 114 | ||
121 | dprintk("lockd: LOCK called\n"); | 115 | dprintk("lockd: LOCK called\n"); |
122 | 116 | ||
123 | resp->cookie = argp->cookie; | 117 | resp->cookie = argp->cookie; |
124 | 118 | ||
125 | /* Don't accept new lock requests during grace period */ | ||
126 | if (nlmsvc_grace_period && !argp->reclaim) { | ||
127 | resp->status = nlm_lck_denied_grace_period; | ||
128 | return rc; | ||
129 | } | ||
130 | |||
131 | /* Obtain client and file */ | 119 | /* Obtain client and file */ |
132 | if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) | 120 | if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) |
133 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; | 121 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; |
@@ -146,7 +134,8 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
146 | 134 | ||
147 | /* Now try to lock the file */ | 135 | /* Now try to lock the file */ |
148 | resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock, | 136 | resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock, |
149 | argp->block, &argp->cookie); | 137 | argp->block, &argp->cookie, |
138 | argp->reclaim); | ||
150 | if (resp->status == nlm_drop_reply) | 139 | if (resp->status == nlm_drop_reply) |
151 | rc = rpc_drop_reply; | 140 | rc = rpc_drop_reply; |
152 | else | 141 | else |
@@ -169,7 +158,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
169 | resp->cookie = argp->cookie; | 158 | resp->cookie = argp->cookie; |
170 | 159 | ||
171 | /* Don't accept requests during grace period */ | 160 | /* Don't accept requests during grace period */ |
172 | if (nlmsvc_grace_period) { | 161 | if (locks_in_grace()) { |
173 | resp->status = nlm_lck_denied_grace_period; | 162 | resp->status = nlm_lck_denied_grace_period; |
174 | return rpc_success; | 163 | return rpc_success; |
175 | } | 164 | } |
@@ -202,7 +191,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
202 | resp->cookie = argp->cookie; | 191 | resp->cookie = argp->cookie; |
203 | 192 | ||
204 | /* Don't accept new lock requests during grace period */ | 193 | /* Don't accept new lock requests during grace period */ |
205 | if (nlmsvc_grace_period) { | 194 | if (locks_in_grace()) { |
206 | resp->status = nlm_lck_denied_grace_period; | 195 | resp->status = nlm_lck_denied_grace_period; |
207 | return rpc_success; | 196 | return rpc_success; |
208 | } | 197 | } |
@@ -231,7 +220,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
231 | resp->cookie = argp->cookie; | 220 | resp->cookie = argp->cookie; |
232 | 221 | ||
233 | dprintk("lockd: GRANTED called\n"); | 222 | dprintk("lockd: GRANTED called\n"); |
234 | resp->status = nlmclnt_grant(svc_addr_in(rqstp), &argp->lock); | 223 | resp->status = nlmclnt_grant(svc_addr(rqstp), &argp->lock); |
235 | dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); | 224 | dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); |
236 | return rpc_success; | 225 | return rpc_success; |
237 | } | 226 | } |
@@ -341,7 +330,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
341 | resp->cookie = argp->cookie; | 330 | resp->cookie = argp->cookie; |
342 | 331 | ||
343 | /* Don't accept new lock requests during grace period */ | 332 | /* Don't accept new lock requests during grace period */ |
344 | if (nlmsvc_grace_period && !argp->reclaim) { | 333 | if (locks_in_grace() && !argp->reclaim) { |
345 | resp->status = nlm_lck_denied_grace_period; | 334 | resp->status = nlm_lck_denied_grace_period; |
346 | return rpc_success; | 335 | return rpc_success; |
347 | } | 336 | } |
@@ -374,7 +363,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
374 | resp->cookie = argp->cookie; | 363 | resp->cookie = argp->cookie; |
375 | 364 | ||
376 | /* Don't accept requests during grace period */ | 365 | /* Don't accept requests during grace period */ |
377 | if (nlmsvc_grace_period) { | 366 | if (locks_in_grace()) { |
378 | resp->status = nlm_lck_denied_grace_period; | 367 | resp->status = nlm_lck_denied_grace_period; |
379 | return rpc_success; | 368 | return rpc_success; |
380 | } | 369 | } |
@@ -432,11 +421,9 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, | |||
432 | { | 421 | { |
433 | struct sockaddr_in saddr; | 422 | struct sockaddr_in saddr; |
434 | 423 | ||
435 | memcpy(&saddr, svc_addr_in(rqstp), sizeof(saddr)); | ||
436 | |||
437 | dprintk("lockd: SM_NOTIFY called\n"); | 424 | dprintk("lockd: SM_NOTIFY called\n"); |
438 | if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) | 425 | |
439 | || ntohs(saddr.sin_port) >= 1024) { | 426 | if (!nlm_privileged_requester(rqstp)) { |
440 | char buf[RPC_MAX_ADDRBUFLEN]; | 427 | char buf[RPC_MAX_ADDRBUFLEN]; |
441 | printk(KERN_WARNING "lockd: rejected NSM callback from %s\n", | 428 | printk(KERN_WARNING "lockd: rejected NSM callback from %s\n", |
442 | svc_print_addr(rqstp, buf, sizeof(buf))); | 429 | svc_print_addr(rqstp, buf, sizeof(buf))); |
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index cf0d5c2c318d..6063a8e4b9f3 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
@@ -360,7 +360,7 @@ nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block) | |||
360 | __be32 | 360 | __be32 |
361 | nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, | 361 | nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, |
362 | struct nlm_host *host, struct nlm_lock *lock, int wait, | 362 | struct nlm_host *host, struct nlm_lock *lock, int wait, |
363 | struct nlm_cookie *cookie) | 363 | struct nlm_cookie *cookie, int reclaim) |
364 | { | 364 | { |
365 | struct nlm_block *block = NULL; | 365 | struct nlm_block *block = NULL; |
366 | int error; | 366 | int error; |
@@ -406,6 +406,15 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, | |||
406 | goto out; | 406 | goto out; |
407 | } | 407 | } |
408 | 408 | ||
409 | if (locks_in_grace() && !reclaim) { | ||
410 | ret = nlm_lck_denied_grace_period; | ||
411 | goto out; | ||
412 | } | ||
413 | if (reclaim && !locks_in_grace()) { | ||
414 | ret = nlm_lck_denied_grace_period; | ||
415 | goto out; | ||
416 | } | ||
417 | |||
409 | if (!wait) | 418 | if (!wait) |
410 | lock->fl.fl_flags &= ~FL_SLEEP; | 419 | lock->fl.fl_flags &= ~FL_SLEEP; |
411 | error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL); | 420 | error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL); |
@@ -502,6 +511,10 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, | |||
502 | goto out; | 511 | goto out; |
503 | } | 512 | } |
504 | 513 | ||
514 | if (locks_in_grace()) { | ||
515 | ret = nlm_lck_denied_grace_period; | ||
516 | goto out; | ||
517 | } | ||
505 | error = vfs_test_lock(file->f_file, &lock->fl); | 518 | error = vfs_test_lock(file->f_file, &lock->fl); |
506 | if (error == FILE_LOCK_DEFERRED) { | 519 | if (error == FILE_LOCK_DEFERRED) { |
507 | ret = nlmsvc_defer_lock_rqst(rqstp, block); | 520 | ret = nlmsvc_defer_lock_rqst(rqstp, block); |
@@ -582,6 +595,9 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock) | |||
582 | (long long)lock->fl.fl_start, | 595 | (long long)lock->fl.fl_start, |
583 | (long long)lock->fl.fl_end); | 596 | (long long)lock->fl.fl_end); |
584 | 597 | ||
598 | if (locks_in_grace()) | ||
599 | return nlm_lck_denied_grace_period; | ||
600 | |||
585 | mutex_lock(&file->f_mutex); | 601 | mutex_lock(&file->f_mutex); |
586 | block = nlmsvc_lookup_block(file, lock); | 602 | block = nlmsvc_lookup_block(file, lock); |
587 | mutex_unlock(&file->f_mutex); | 603 | mutex_unlock(&file->f_mutex); |
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 76019d2ff72d..548b0bb2b84d 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c | |||
@@ -112,17 +112,11 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
112 | { | 112 | { |
113 | struct nlm_host *host; | 113 | struct nlm_host *host; |
114 | struct nlm_file *file; | 114 | struct nlm_file *file; |
115 | int rc = rpc_success; | 115 | __be32 rc = rpc_success; |
116 | 116 | ||
117 | dprintk("lockd: TEST called\n"); | 117 | dprintk("lockd: TEST called\n"); |
118 | resp->cookie = argp->cookie; | 118 | resp->cookie = argp->cookie; |
119 | 119 | ||
120 | /* Don't accept test requests during grace period */ | ||
121 | if (nlmsvc_grace_period) { | ||
122 | resp->status = nlm_lck_denied_grace_period; | ||
123 | return rc; | ||
124 | } | ||
125 | |||
126 | /* Obtain client and file */ | 120 | /* Obtain client and file */ |
127 | if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) | 121 | if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) |
128 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; | 122 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; |
@@ -146,18 +140,12 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
146 | { | 140 | { |
147 | struct nlm_host *host; | 141 | struct nlm_host *host; |
148 | struct nlm_file *file; | 142 | struct nlm_file *file; |
149 | int rc = rpc_success; | 143 | __be32 rc = rpc_success; |
150 | 144 | ||
151 | dprintk("lockd: LOCK called\n"); | 145 | dprintk("lockd: LOCK called\n"); |
152 | 146 | ||
153 | resp->cookie = argp->cookie; | 147 | resp->cookie = argp->cookie; |
154 | 148 | ||
155 | /* Don't accept new lock requests during grace period */ | ||
156 | if (nlmsvc_grace_period && !argp->reclaim) { | ||
157 | resp->status = nlm_lck_denied_grace_period; | ||
158 | return rc; | ||
159 | } | ||
160 | |||
161 | /* Obtain client and file */ | 149 | /* Obtain client and file */ |
162 | if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) | 150 | if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) |
163 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; | 151 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; |
@@ -176,7 +164,8 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
176 | 164 | ||
177 | /* Now try to lock the file */ | 165 | /* Now try to lock the file */ |
178 | resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock, | 166 | resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock, |
179 | argp->block, &argp->cookie)); | 167 | argp->block, &argp->cookie, |
168 | argp->reclaim)); | ||
180 | if (resp->status == nlm_drop_reply) | 169 | if (resp->status == nlm_drop_reply) |
181 | rc = rpc_drop_reply; | 170 | rc = rpc_drop_reply; |
182 | else | 171 | else |
@@ -199,7 +188,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
199 | resp->cookie = argp->cookie; | 188 | resp->cookie = argp->cookie; |
200 | 189 | ||
201 | /* Don't accept requests during grace period */ | 190 | /* Don't accept requests during grace period */ |
202 | if (nlmsvc_grace_period) { | 191 | if (locks_in_grace()) { |
203 | resp->status = nlm_lck_denied_grace_period; | 192 | resp->status = nlm_lck_denied_grace_period; |
204 | return rpc_success; | 193 | return rpc_success; |
205 | } | 194 | } |
@@ -232,7 +221,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
232 | resp->cookie = argp->cookie; | 221 | resp->cookie = argp->cookie; |
233 | 222 | ||
234 | /* Don't accept new lock requests during grace period */ | 223 | /* Don't accept new lock requests during grace period */ |
235 | if (nlmsvc_grace_period) { | 224 | if (locks_in_grace()) { |
236 | resp->status = nlm_lck_denied_grace_period; | 225 | resp->status = nlm_lck_denied_grace_period; |
237 | return rpc_success; | 226 | return rpc_success; |
238 | } | 227 | } |
@@ -261,7 +250,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
261 | resp->cookie = argp->cookie; | 250 | resp->cookie = argp->cookie; |
262 | 251 | ||
263 | dprintk("lockd: GRANTED called\n"); | 252 | dprintk("lockd: GRANTED called\n"); |
264 | resp->status = nlmclnt_grant(svc_addr_in(rqstp), &argp->lock); | 253 | resp->status = nlmclnt_grant(svc_addr(rqstp), &argp->lock); |
265 | dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); | 254 | dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); |
266 | return rpc_success; | 255 | return rpc_success; |
267 | } | 256 | } |
@@ -373,7 +362,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
373 | resp->cookie = argp->cookie; | 362 | resp->cookie = argp->cookie; |
374 | 363 | ||
375 | /* Don't accept new lock requests during grace period */ | 364 | /* Don't accept new lock requests during grace period */ |
376 | if (nlmsvc_grace_period && !argp->reclaim) { | 365 | if (locks_in_grace() && !argp->reclaim) { |
377 | resp->status = nlm_lck_denied_grace_period; | 366 | resp->status = nlm_lck_denied_grace_period; |
378 | return rpc_success; | 367 | return rpc_success; |
379 | } | 368 | } |
@@ -406,7 +395,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
406 | resp->cookie = argp->cookie; | 395 | resp->cookie = argp->cookie; |
407 | 396 | ||
408 | /* Don't accept requests during grace period */ | 397 | /* Don't accept requests during grace period */ |
409 | if (nlmsvc_grace_period) { | 398 | if (locks_in_grace()) { |
410 | resp->status = nlm_lck_denied_grace_period; | 399 | resp->status = nlm_lck_denied_grace_period; |
411 | return rpc_success; | 400 | return rpc_success; |
412 | } | 401 | } |
@@ -464,11 +453,9 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, | |||
464 | { | 453 | { |
465 | struct sockaddr_in saddr; | 454 | struct sockaddr_in saddr; |
466 | 455 | ||
467 | memcpy(&saddr, svc_addr_in(rqstp), sizeof(saddr)); | ||
468 | |||
469 | dprintk("lockd: SM_NOTIFY called\n"); | 456 | dprintk("lockd: SM_NOTIFY called\n"); |
470 | if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) | 457 | |
471 | || ntohs(saddr.sin_port) >= 1024) { | 458 | if (!nlm_privileged_requester(rqstp)) { |
472 | char buf[RPC_MAX_ADDRBUFLEN]; | 459 | char buf[RPC_MAX_ADDRBUFLEN]; |
473 | printk(KERN_WARNING "lockd: rejected NSM callback from %s\n", | 460 | printk(KERN_WARNING "lockd: rejected NSM callback from %s\n", |
474 | svc_print_addr(rqstp, buf, sizeof(buf))); | 461 | svc_print_addr(rqstp, buf, sizeof(buf))); |
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 198b4e55b373..34c2766e27c7 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c | |||
@@ -418,7 +418,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb); | |||
418 | static int | 418 | static int |
419 | nlmsvc_match_ip(void *datap, struct nlm_host *host) | 419 | nlmsvc_match_ip(void *datap, struct nlm_host *host) |
420 | { | 420 | { |
421 | return nlm_cmp_addr(&host->h_saddr, datap); | 421 | return nlm_cmp_addr(nlm_srcaddr(host), datap); |
422 | } | 422 | } |
423 | 423 | ||
424 | /** | 424 | /** |
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 3e459e18cc31..1f226290c67c 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c | |||
@@ -351,8 +351,6 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp) | |||
351 | argp->state = ntohl(*p++); | 351 | argp->state = ntohl(*p++); |
352 | /* Preserve the address in network byte order */ | 352 | /* Preserve the address in network byte order */ |
353 | argp->addr = *p++; | 353 | argp->addr = *p++; |
354 | argp->vers = *p++; | ||
355 | argp->proto = *p++; | ||
356 | return xdr_argsize_check(rqstp, p); | 354 | return xdr_argsize_check(rqstp, p); |
357 | } | 355 | } |
358 | 356 | ||
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 43ff9397e6c6..50c493a8ad8e 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c | |||
@@ -358,8 +358,6 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp | |||
358 | argp->state = ntohl(*p++); | 358 | argp->state = ntohl(*p++); |
359 | /* Preserve the address in network byte order */ | 359 | /* Preserve the address in network byte order */ |
360 | argp->addr = *p++; | 360 | argp->addr = *p++; |
361 | argp->vers = *p++; | ||
362 | argp->proto = *p++; | ||
363 | return xdr_argsize_check(rqstp, p); | 361 | return xdr_argsize_check(rqstp, p); |
364 | } | 362 | } |
365 | 363 | ||
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index f447f4b4476c..6a09760c5960 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -105,7 +105,8 @@ int nfs_callback_up(void) | |||
105 | mutex_lock(&nfs_callback_mutex); | 105 | mutex_lock(&nfs_callback_mutex); |
106 | if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) | 106 | if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) |
107 | goto out; | 107 | goto out; |
108 | serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); | 108 | serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, |
109 | AF_INET, NULL); | ||
109 | ret = -ENOMEM; | 110 | ret = -ENOMEM; |
110 | if (!serv) | 111 | if (!serv) |
111 | goto out_err; | 112 | goto out_err; |
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 46763d1cd397..8478fc25daee 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c | |||
@@ -127,7 +127,7 @@ enum { | |||
127 | Opt_err | 127 | Opt_err |
128 | }; | 128 | }; |
129 | 129 | ||
130 | static match_table_t __initdata tokens = { | 130 | static match_table_t __initconst tokens = { |
131 | {Opt_port, "port=%u"}, | 131 | {Opt_port, "port=%u"}, |
132 | {Opt_rsize, "rsize=%u"}, | 132 | {Opt_rsize, "rsize=%u"}, |
133 | {Opt_wsize, "wsize=%u"}, | 133 | {Opt_wsize, "wsize=%u"}, |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 9abcd2b329f7..ffb697416cb1 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -98,7 +98,7 @@ enum { | |||
98 | Opt_err | 98 | Opt_err |
99 | }; | 99 | }; |
100 | 100 | ||
101 | static match_table_t nfs_mount_option_tokens = { | 101 | static const match_table_t nfs_mount_option_tokens = { |
102 | { Opt_userspace, "bg" }, | 102 | { Opt_userspace, "bg" }, |
103 | { Opt_userspace, "fg" }, | 103 | { Opt_userspace, "fg" }, |
104 | { Opt_userspace, "retry=%s" }, | 104 | { Opt_userspace, "retry=%s" }, |
@@ -163,7 +163,7 @@ enum { | |||
163 | Opt_xprt_err | 163 | Opt_xprt_err |
164 | }; | 164 | }; |
165 | 165 | ||
166 | static match_table_t nfs_xprt_protocol_tokens = { | 166 | static const match_table_t nfs_xprt_protocol_tokens = { |
167 | { Opt_xprt_udp, "udp" }, | 167 | { Opt_xprt_udp, "udp" }, |
168 | { Opt_xprt_tcp, "tcp" }, | 168 | { Opt_xprt_tcp, "tcp" }, |
169 | { Opt_xprt_rdma, "rdma" }, | 169 | { Opt_xprt_rdma, "rdma" }, |
@@ -180,7 +180,7 @@ enum { | |||
180 | Opt_sec_err | 180 | Opt_sec_err |
181 | }; | 181 | }; |
182 | 182 | ||
183 | static match_table_t nfs_secflavor_tokens = { | 183 | static const match_table_t nfs_secflavor_tokens = { |
184 | { Opt_sec_none, "none" }, | 184 | { Opt_sec_none, "none" }, |
185 | { Opt_sec_none, "null" }, | 185 | { Opt_sec_none, "null" }, |
186 | { Opt_sec_sys, "sys" }, | 186 | { Opt_sec_sys, "sys" }, |
@@ -1279,6 +1279,12 @@ static int nfs_parse_mount_options(char *raw, | |||
1279 | } | 1279 | } |
1280 | } | 1280 | } |
1281 | 1281 | ||
1282 | if (errors > 0) { | ||
1283 | dfprintk(MOUNT, "NFS: parsing encountered %d error%s\n", | ||
1284 | errors, (errors == 1 ? "" : "s")); | ||
1285 | if (!sloppy) | ||
1286 | return 0; | ||
1287 | } | ||
1282 | return 1; | 1288 | return 1; |
1283 | 1289 | ||
1284 | out_nomem: | 1290 | out_nomem: |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 33bfcf09db46..9dc036f18356 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -1023,7 +1023,7 @@ exp_export(struct nfsctl_export *nxp) | |||
1023 | /* Look up the dentry */ | 1023 | /* Look up the dentry */ |
1024 | err = path_lookup(nxp->ex_path, 0, &nd); | 1024 | err = path_lookup(nxp->ex_path, 0, &nd); |
1025 | if (err) | 1025 | if (err) |
1026 | goto out_unlock; | 1026 | goto out_put_clp; |
1027 | err = -EINVAL; | 1027 | err = -EINVAL; |
1028 | 1028 | ||
1029 | exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL); | 1029 | exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL); |
@@ -1090,9 +1090,9 @@ finish: | |||
1090 | exp_put(exp); | 1090 | exp_put(exp); |
1091 | if (fsid_key && !IS_ERR(fsid_key)) | 1091 | if (fsid_key && !IS_ERR(fsid_key)) |
1092 | cache_put(&fsid_key->h, &svc_expkey_cache); | 1092 | cache_put(&fsid_key->h, &svc_expkey_cache); |
1093 | if (clp) | ||
1094 | auth_domain_put(clp); | ||
1095 | path_put(&nd.path); | 1093 | path_put(&nd.path); |
1094 | out_put_clp: | ||
1095 | auth_domain_put(clp); | ||
1096 | out_unlock: | 1096 | out_unlock: |
1097 | exp_writeunlock(); | 1097 | exp_writeunlock(); |
1098 | out: | 1098 | out: |
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 15c6faeec77c..b2786a5f9afe 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c | |||
@@ -70,7 +70,6 @@ nlm_fclose(struct file *filp) | |||
70 | static struct nlmsvc_binding nfsd_nlm_ops = { | 70 | static struct nlmsvc_binding nfsd_nlm_ops = { |
71 | .fopen = nlm_fopen, /* open file for locking */ | 71 | .fopen = nlm_fopen, /* open file for locking */ |
72 | .fclose = nlm_fclose, /* close file */ | 72 | .fclose = nlm_fclose, /* close file */ |
73 | .get_grace_period = get_nfs4_grace_period, | ||
74 | }; | 73 | }; |
75 | 74 | ||
76 | void | 75 | void |
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 4d617ea28cfc..9dbd2eb91281 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c | |||
@@ -63,7 +63,8 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, | |||
63 | SVCFH_fmt(&argp->fh)); | 63 | SVCFH_fmt(&argp->fh)); |
64 | 64 | ||
65 | fh_copy(&resp->fh, &argp->fh); | 65 | fh_copy(&resp->fh, &argp->fh); |
66 | nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); | 66 | nfserr = fh_verify(rqstp, &resp->fh, 0, |
67 | NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); | ||
67 | if (nfserr) | 68 | if (nfserr) |
68 | RETURN_STATUS(nfserr); | 69 | RETURN_STATUS(nfserr); |
69 | 70 | ||
@@ -530,7 +531,7 @@ nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, | |||
530 | dprintk("nfsd: FSSTAT(3) %s\n", | 531 | dprintk("nfsd: FSSTAT(3) %s\n", |
531 | SVCFH_fmt(&argp->fh)); | 532 | SVCFH_fmt(&argp->fh)); |
532 | 533 | ||
533 | nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats); | 534 | nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0); |
534 | fh_put(&argp->fh); | 535 | fh_put(&argp->fh); |
535 | RETURN_STATUS(nfserr); | 536 | RETURN_STATUS(nfserr); |
536 | } | 537 | } |
@@ -558,7 +559,8 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, | |||
558 | resp->f_maxfilesize = ~(u32) 0; | 559 | resp->f_maxfilesize = ~(u32) 0; |
559 | resp->f_properties = NFS3_FSF_DEFAULT; | 560 | resp->f_properties = NFS3_FSF_DEFAULT; |
560 | 561 | ||
561 | nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP); | 562 | nfserr = fh_verify(rqstp, &argp->fh, 0, |
563 | NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); | ||
562 | 564 | ||
563 | /* Check special features of the file system. May request | 565 | /* Check special features of the file system. May request |
564 | * different read/write sizes for file systems known to have | 566 | * different read/write sizes for file systems known to have |
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index b6ed38380ab8..54b8b4140c8f 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c | |||
@@ -443,7 +443,7 @@ init_state(struct posix_acl_state *state, int cnt) | |||
443 | * enough space for either: | 443 | * enough space for either: |
444 | */ | 444 | */ |
445 | alloc = sizeof(struct posix_ace_state_array) | 445 | alloc = sizeof(struct posix_ace_state_array) |
446 | + cnt*sizeof(struct posix_ace_state); | 446 | + cnt*sizeof(struct posix_user_ace_state); |
447 | state->users = kzalloc(alloc, GFP_KERNEL); | 447 | state->users = kzalloc(alloc, GFP_KERNEL); |
448 | if (!state->users) | 448 | if (!state->users) |
449 | return -ENOMEM; | 449 | return -ENOMEM; |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 702fa577aa6e..094747a1227c 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -225,7 +225,8 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) | |||
225 | 225 | ||
226 | RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); | 226 | RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); |
227 | WRITE32(OP_CB_RECALL); | 227 | WRITE32(OP_CB_RECALL); |
228 | WRITEMEM(&cb_rec->cbr_stateid, sizeof(stateid_t)); | 228 | WRITE32(cb_rec->cbr_stateid.si_generation); |
229 | WRITEMEM(&cb_rec->cbr_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
229 | WRITE32(cb_rec->cbr_trunc); | 230 | WRITE32(cb_rec->cbr_trunc); |
230 | WRITE32(len); | 231 | WRITE32(len); |
231 | WRITEMEM(cb_rec->cbr_fhval, len); | 232 | WRITEMEM(cb_rec->cbr_fhval, len); |
@@ -379,6 +380,7 @@ static int do_probe_callback(void *data) | |||
379 | .addrsize = sizeof(addr), | 380 | .addrsize = sizeof(addr), |
380 | .timeout = &timeparms, | 381 | .timeout = &timeparms, |
381 | .program = &cb_program, | 382 | .program = &cb_program, |
383 | .prognumber = cb->cb_prog, | ||
382 | .version = nfs_cb_version[1]->number, | 384 | .version = nfs_cb_version[1]->number, |
383 | .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ | 385 | .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ |
384 | .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), | 386 | .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), |
@@ -396,9 +398,6 @@ static int do_probe_callback(void *data) | |||
396 | addr.sin_port = htons(cb->cb_port); | 398 | addr.sin_port = htons(cb->cb_port); |
397 | addr.sin_addr.s_addr = htonl(cb->cb_addr); | 399 | addr.sin_addr.s_addr = htonl(cb->cb_addr); |
398 | 400 | ||
399 | /* Initialize rpc_stat */ | ||
400 | memset(args.program->stats, 0, sizeof(struct rpc_stat)); | ||
401 | |||
402 | /* Create RPC client */ | 401 | /* Create RPC client */ |
403 | client = rpc_create(&args); | 402 | client = rpc_create(&args); |
404 | if (IS_ERR(client)) { | 403 | if (IS_ERR(client)) { |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 2e51adac65de..669461e291ae 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -201,10 +201,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
201 | /* Openowner is now set, so sequence id will get bumped. Now we need | 201 | /* Openowner is now set, so sequence id will get bumped. Now we need |
202 | * these checks before we do any creates: */ | 202 | * these checks before we do any creates: */ |
203 | status = nfserr_grace; | 203 | status = nfserr_grace; |
204 | if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) | 204 | if (locks_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) |
205 | goto out; | 205 | goto out; |
206 | status = nfserr_no_grace; | 206 | status = nfserr_no_grace; |
207 | if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) | 207 | if (!locks_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) |
208 | goto out; | 208 | goto out; |
209 | 209 | ||
210 | switch (open->op_claim_type) { | 210 | switch (open->op_claim_type) { |
@@ -575,7 +575,7 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
575 | { | 575 | { |
576 | __be32 status; | 576 | __be32 status; |
577 | 577 | ||
578 | if (nfs4_in_grace()) | 578 | if (locks_in_grace()) |
579 | return nfserr_grace; | 579 | return nfserr_grace; |
580 | status = nfsd_unlink(rqstp, &cstate->current_fh, 0, | 580 | status = nfsd_unlink(rqstp, &cstate->current_fh, 0, |
581 | remove->rm_name, remove->rm_namelen); | 581 | remove->rm_name, remove->rm_namelen); |
@@ -596,7 +596,7 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
596 | 596 | ||
597 | if (!cstate->save_fh.fh_dentry) | 597 | if (!cstate->save_fh.fh_dentry) |
598 | return status; | 598 | return status; |
599 | if (nfs4_in_grace() && !(cstate->save_fh.fh_export->ex_flags | 599 | if (locks_in_grace() && !(cstate->save_fh.fh_export->ex_flags |
600 | & NFSEXP_NOSUBTREECHECK)) | 600 | & NFSEXP_NOSUBTREECHECK)) |
601 | return nfserr_grace; | 601 | return nfserr_grace; |
602 | status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, | 602 | status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, |
@@ -867,11 +867,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
867 | int slack_bytes; | 867 | int slack_bytes; |
868 | __be32 status; | 868 | __be32 status; |
869 | 869 | ||
870 | status = nfserr_resource; | ||
871 | cstate = cstate_alloc(); | ||
872 | if (cstate == NULL) | ||
873 | goto out; | ||
874 | |||
875 | resp->xbuf = &rqstp->rq_res; | 870 | resp->xbuf = &rqstp->rq_res; |
876 | resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; | 871 | resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; |
877 | resp->tagp = resp->p; | 872 | resp->tagp = resp->p; |
@@ -890,6 +885,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
890 | if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) | 885 | if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) |
891 | goto out; | 886 | goto out; |
892 | 887 | ||
888 | status = nfserr_resource; | ||
889 | cstate = cstate_alloc(); | ||
890 | if (cstate == NULL) | ||
891 | goto out; | ||
892 | |||
893 | status = nfs_ok; | 893 | status = nfs_ok; |
894 | while (!status && resp->opcnt < args->opcnt) { | 894 | while (!status && resp->opcnt < args->opcnt) { |
895 | op = &args->ops[resp->opcnt++]; | 895 | op = &args->ops[resp->opcnt++]; |
@@ -957,9 +957,9 @@ encode_op: | |||
957 | nfsd4_increment_op_stats(op->opnum); | 957 | nfsd4_increment_op_stats(op->opnum); |
958 | } | 958 | } |
959 | 959 | ||
960 | cstate_free(cstate); | ||
960 | out: | 961 | out: |
961 | nfsd4_release_compoundargs(args); | 962 | nfsd4_release_compoundargs(args); |
962 | cstate_free(cstate); | ||
963 | dprintk("nfsv4 compound returned %d\n", ntohl(status)); | 963 | dprintk("nfsv4 compound returned %d\n", ntohl(status)); |
964 | return status; | 964 | return status; |
965 | } | 965 | } |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1578d7a2667e..0cc7ff5d5ab5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -61,7 +61,6 @@ | |||
61 | static time_t lease_time = 90; /* default lease time */ | 61 | static time_t lease_time = 90; /* default lease time */ |
62 | static time_t user_lease_time = 90; | 62 | static time_t user_lease_time = 90; |
63 | static time_t boot_time; | 63 | static time_t boot_time; |
64 | static int in_grace = 1; | ||
65 | static u32 current_ownerid = 1; | 64 | static u32 current_ownerid = 1; |
66 | static u32 current_fileid = 1; | 65 | static u32 current_fileid = 1; |
67 | static u32 current_delegid = 1; | 66 | static u32 current_delegid = 1; |
@@ -1640,7 +1639,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta | |||
1640 | case NFS4_OPEN_CLAIM_NULL: | 1639 | case NFS4_OPEN_CLAIM_NULL: |
1641 | /* Let's not give out any delegations till everyone's | 1640 | /* Let's not give out any delegations till everyone's |
1642 | * had the chance to reclaim theirs.... */ | 1641 | * had the chance to reclaim theirs.... */ |
1643 | if (nfs4_in_grace()) | 1642 | if (locks_in_grace()) |
1644 | goto out; | 1643 | goto out; |
1645 | if (!atomic_read(&cb->cb_set) || !sop->so_confirmed) | 1644 | if (!atomic_read(&cb->cb_set) || !sop->so_confirmed) |
1646 | goto out; | 1645 | goto out; |
@@ -1816,12 +1815,15 @@ out: | |||
1816 | return status; | 1815 | return status; |
1817 | } | 1816 | } |
1818 | 1817 | ||
1818 | struct lock_manager nfsd4_manager = { | ||
1819 | }; | ||
1820 | |||
1819 | static void | 1821 | static void |
1820 | end_grace(void) | 1822 | nfsd4_end_grace(void) |
1821 | { | 1823 | { |
1822 | dprintk("NFSD: end of grace period\n"); | 1824 | dprintk("NFSD: end of grace period\n"); |
1823 | nfsd4_recdir_purge_old(); | 1825 | nfsd4_recdir_purge_old(); |
1824 | in_grace = 0; | 1826 | locks_end_grace(&nfsd4_manager); |
1825 | } | 1827 | } |
1826 | 1828 | ||
1827 | static time_t | 1829 | static time_t |
@@ -1838,8 +1840,8 @@ nfs4_laundromat(void) | |||
1838 | nfs4_lock_state(); | 1840 | nfs4_lock_state(); |
1839 | 1841 | ||
1840 | dprintk("NFSD: laundromat service - starting\n"); | 1842 | dprintk("NFSD: laundromat service - starting\n"); |
1841 | if (in_grace) | 1843 | if (locks_in_grace()) |
1842 | end_grace(); | 1844 | nfsd4_end_grace(); |
1843 | list_for_each_safe(pos, next, &client_lru) { | 1845 | list_for_each_safe(pos, next, &client_lru) { |
1844 | clp = list_entry(pos, struct nfs4_client, cl_lru); | 1846 | clp = list_entry(pos, struct nfs4_client, cl_lru); |
1845 | if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { | 1847 | if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { |
@@ -1974,7 +1976,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) | |||
1974 | return nfserr_bad_stateid; | 1976 | return nfserr_bad_stateid; |
1975 | else if (ONE_STATEID(stateid) && (flags & RD_STATE)) | 1977 | else if (ONE_STATEID(stateid) && (flags & RD_STATE)) |
1976 | return nfs_ok; | 1978 | return nfs_ok; |
1977 | else if (nfs4_in_grace()) { | 1979 | else if (locks_in_grace()) { |
1978 | /* Answer in remaining cases depends on existance of | 1980 | /* Answer in remaining cases depends on existance of |
1979 | * conflicting state; so we must wait out the grace period. */ | 1981 | * conflicting state; so we must wait out the grace period. */ |
1980 | return nfserr_grace; | 1982 | return nfserr_grace; |
@@ -1993,7 +1995,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) | |||
1993 | static inline int | 1995 | static inline int |
1994 | io_during_grace_disallowed(struct inode *inode, int flags) | 1996 | io_during_grace_disallowed(struct inode *inode, int flags) |
1995 | { | 1997 | { |
1996 | return nfs4_in_grace() && (flags & (RD_STATE | WR_STATE)) | 1998 | return locks_in_grace() && (flags & (RD_STATE | WR_STATE)) |
1997 | && mandatory_lock(inode); | 1999 | && mandatory_lock(inode); |
1998 | } | 2000 | } |
1999 | 2001 | ||
@@ -2693,10 +2695,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2693 | filp = lock_stp->st_vfs_file; | 2695 | filp = lock_stp->st_vfs_file; |
2694 | 2696 | ||
2695 | status = nfserr_grace; | 2697 | status = nfserr_grace; |
2696 | if (nfs4_in_grace() && !lock->lk_reclaim) | 2698 | if (locks_in_grace() && !lock->lk_reclaim) |
2697 | goto out; | 2699 | goto out; |
2698 | status = nfserr_no_grace; | 2700 | status = nfserr_no_grace; |
2699 | if (!nfs4_in_grace() && lock->lk_reclaim) | 2701 | if (!locks_in_grace() && lock->lk_reclaim) |
2700 | goto out; | 2702 | goto out; |
2701 | 2703 | ||
2702 | locks_init_lock(&file_lock); | 2704 | locks_init_lock(&file_lock); |
@@ -2779,7 +2781,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2779 | int error; | 2781 | int error; |
2780 | __be32 status; | 2782 | __be32 status; |
2781 | 2783 | ||
2782 | if (nfs4_in_grace()) | 2784 | if (locks_in_grace()) |
2783 | return nfserr_grace; | 2785 | return nfserr_grace; |
2784 | 2786 | ||
2785 | if (check_lock_length(lockt->lt_offset, lockt->lt_length)) | 2787 | if (check_lock_length(lockt->lt_offset, lockt->lt_length)) |
@@ -3192,9 +3194,9 @@ __nfs4_state_start(void) | |||
3192 | unsigned long grace_time; | 3194 | unsigned long grace_time; |
3193 | 3195 | ||
3194 | boot_time = get_seconds(); | 3196 | boot_time = get_seconds(); |
3195 | grace_time = get_nfs_grace_period(); | 3197 | grace_time = get_nfs4_grace_period(); |
3196 | lease_time = user_lease_time; | 3198 | lease_time = user_lease_time; |
3197 | in_grace = 1; | 3199 | locks_start_grace(&nfsd4_manager); |
3198 | printk(KERN_INFO "NFSD: starting %ld-second grace period\n", | 3200 | printk(KERN_INFO "NFSD: starting %ld-second grace period\n", |
3199 | grace_time/HZ); | 3201 | grace_time/HZ); |
3200 | laundry_wq = create_singlethread_workqueue("nfsd4"); | 3202 | laundry_wq = create_singlethread_workqueue("nfsd4"); |
@@ -3213,12 +3215,6 @@ nfs4_state_start(void) | |||
3213 | return; | 3215 | return; |
3214 | } | 3216 | } |
3215 | 3217 | ||
3216 | int | ||
3217 | nfs4_in_grace(void) | ||
3218 | { | ||
3219 | return in_grace; | ||
3220 | } | ||
3221 | |||
3222 | time_t | 3218 | time_t |
3223 | nfs4_lease_time(void) | 3219 | nfs4_lease_time(void) |
3224 | { | 3220 | { |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 14ba4d9b2859..afcdf4b76843 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -413,6 +413,18 @@ out_nfserr: | |||
413 | } | 413 | } |
414 | 414 | ||
415 | static __be32 | 415 | static __be32 |
416 | nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid) | ||
417 | { | ||
418 | DECODE_HEAD; | ||
419 | |||
420 | READ_BUF(sizeof(stateid_t)); | ||
421 | READ32(sid->si_generation); | ||
422 | COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); | ||
423 | |||
424 | DECODE_TAIL; | ||
425 | } | ||
426 | |||
427 | static __be32 | ||
416 | nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access) | 428 | nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access) |
417 | { | 429 | { |
418 | DECODE_HEAD; | 430 | DECODE_HEAD; |
@@ -429,10 +441,9 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) | |||
429 | DECODE_HEAD; | 441 | DECODE_HEAD; |
430 | 442 | ||
431 | close->cl_stateowner = NULL; | 443 | close->cl_stateowner = NULL; |
432 | READ_BUF(4 + sizeof(stateid_t)); | 444 | READ_BUF(4); |
433 | READ32(close->cl_seqid); | 445 | READ32(close->cl_seqid); |
434 | READ32(close->cl_stateid.si_generation); | 446 | return nfsd4_decode_stateid(argp, &close->cl_stateid); |
435 | COPYMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
436 | 447 | ||
437 | DECODE_TAIL; | 448 | DECODE_TAIL; |
438 | } | 449 | } |
@@ -493,13 +504,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create | |||
493 | static inline __be32 | 504 | static inline __be32 |
494 | nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) | 505 | nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) |
495 | { | 506 | { |
496 | DECODE_HEAD; | 507 | return nfsd4_decode_stateid(argp, &dr->dr_stateid); |
497 | |||
498 | READ_BUF(sizeof(stateid_t)); | ||
499 | READ32(dr->dr_stateid.si_generation); | ||
500 | COPYMEM(&dr->dr_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
501 | |||
502 | DECODE_TAIL; | ||
503 | } | 508 | } |
504 | 509 | ||
505 | static inline __be32 | 510 | static inline __be32 |
@@ -542,20 +547,22 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) | |||
542 | READ32(lock->lk_is_new); | 547 | READ32(lock->lk_is_new); |
543 | 548 | ||
544 | if (lock->lk_is_new) { | 549 | if (lock->lk_is_new) { |
545 | READ_BUF(36); | 550 | READ_BUF(4); |
546 | READ32(lock->lk_new_open_seqid); | 551 | READ32(lock->lk_new_open_seqid); |
547 | READ32(lock->lk_new_open_stateid.si_generation); | 552 | status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid); |
548 | 553 | if (status) | |
549 | COPYMEM(&lock->lk_new_open_stateid.si_opaque, sizeof(stateid_opaque_t)); | 554 | return status; |
555 | READ_BUF(8 + sizeof(clientid_t)); | ||
550 | READ32(lock->lk_new_lock_seqid); | 556 | READ32(lock->lk_new_lock_seqid); |
551 | COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); | 557 | COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); |
552 | READ32(lock->lk_new_owner.len); | 558 | READ32(lock->lk_new_owner.len); |
553 | READ_BUF(lock->lk_new_owner.len); | 559 | READ_BUF(lock->lk_new_owner.len); |
554 | READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len); | 560 | READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len); |
555 | } else { | 561 | } else { |
556 | READ_BUF(20); | 562 | status = nfsd4_decode_stateid(argp, &lock->lk_old_lock_stateid); |
557 | READ32(lock->lk_old_lock_stateid.si_generation); | 563 | if (status) |
558 | COPYMEM(&lock->lk_old_lock_stateid.si_opaque, sizeof(stateid_opaque_t)); | 564 | return status; |
565 | READ_BUF(4); | ||
559 | READ32(lock->lk_old_lock_seqid); | 566 | READ32(lock->lk_old_lock_seqid); |
560 | } | 567 | } |
561 | 568 | ||
@@ -587,13 +594,15 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) | |||
587 | DECODE_HEAD; | 594 | DECODE_HEAD; |
588 | 595 | ||
589 | locku->lu_stateowner = NULL; | 596 | locku->lu_stateowner = NULL; |
590 | READ_BUF(24 + sizeof(stateid_t)); | 597 | READ_BUF(8); |
591 | READ32(locku->lu_type); | 598 | READ32(locku->lu_type); |
592 | if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) | 599 | if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) |
593 | goto xdr_error; | 600 | goto xdr_error; |
594 | READ32(locku->lu_seqid); | 601 | READ32(locku->lu_seqid); |
595 | READ32(locku->lu_stateid.si_generation); | 602 | status = nfsd4_decode_stateid(argp, &locku->lu_stateid); |
596 | COPYMEM(&locku->lu_stateid.si_opaque, sizeof(stateid_opaque_t)); | 603 | if (status) |
604 | return status; | ||
605 | READ_BUF(16); | ||
597 | READ64(locku->lu_offset); | 606 | READ64(locku->lu_offset); |
598 | READ64(locku->lu_length); | 607 | READ64(locku->lu_length); |
599 | 608 | ||
@@ -678,8 +687,10 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) | |||
678 | READ32(open->op_delegate_type); | 687 | READ32(open->op_delegate_type); |
679 | break; | 688 | break; |
680 | case NFS4_OPEN_CLAIM_DELEGATE_CUR: | 689 | case NFS4_OPEN_CLAIM_DELEGATE_CUR: |
681 | READ_BUF(sizeof(stateid_t) + 4); | 690 | status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); |
682 | COPYMEM(&open->op_delegate_stateid, sizeof(stateid_t)); | 691 | if (status) |
692 | return status; | ||
693 | READ_BUF(4); | ||
683 | READ32(open->op_fname.len); | 694 | READ32(open->op_fname.len); |
684 | READ_BUF(open->op_fname.len); | 695 | READ_BUF(open->op_fname.len); |
685 | SAVEMEM(open->op_fname.data, open->op_fname.len); | 696 | SAVEMEM(open->op_fname.data, open->op_fname.len); |
@@ -699,9 +710,10 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con | |||
699 | DECODE_HEAD; | 710 | DECODE_HEAD; |
700 | 711 | ||
701 | open_conf->oc_stateowner = NULL; | 712 | open_conf->oc_stateowner = NULL; |
702 | READ_BUF(4 + sizeof(stateid_t)); | 713 | status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid); |
703 | READ32(open_conf->oc_req_stateid.si_generation); | 714 | if (status) |
704 | COPYMEM(&open_conf->oc_req_stateid.si_opaque, sizeof(stateid_opaque_t)); | 715 | return status; |
716 | READ_BUF(4); | ||
705 | READ32(open_conf->oc_seqid); | 717 | READ32(open_conf->oc_seqid); |
706 | 718 | ||
707 | DECODE_TAIL; | 719 | DECODE_TAIL; |
@@ -713,9 +725,10 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d | |||
713 | DECODE_HEAD; | 725 | DECODE_HEAD; |
714 | 726 | ||
715 | open_down->od_stateowner = NULL; | 727 | open_down->od_stateowner = NULL; |
716 | READ_BUF(12 + sizeof(stateid_t)); | 728 | status = nfsd4_decode_stateid(argp, &open_down->od_stateid); |
717 | READ32(open_down->od_stateid.si_generation); | 729 | if (status) |
718 | COPYMEM(&open_down->od_stateid.si_opaque, sizeof(stateid_opaque_t)); | 730 | return status; |
731 | READ_BUF(12); | ||
719 | READ32(open_down->od_seqid); | 732 | READ32(open_down->od_seqid); |
720 | READ32(open_down->od_share_access); | 733 | READ32(open_down->od_share_access); |
721 | READ32(open_down->od_share_deny); | 734 | READ32(open_down->od_share_deny); |
@@ -743,9 +756,10 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) | |||
743 | { | 756 | { |
744 | DECODE_HEAD; | 757 | DECODE_HEAD; |
745 | 758 | ||
746 | READ_BUF(sizeof(stateid_t) + 12); | 759 | status = nfsd4_decode_stateid(argp, &read->rd_stateid); |
747 | READ32(read->rd_stateid.si_generation); | 760 | if (status) |
748 | COPYMEM(&read->rd_stateid.si_opaque, sizeof(stateid_opaque_t)); | 761 | return status; |
762 | READ_BUF(12); | ||
749 | READ64(read->rd_offset); | 763 | READ64(read->rd_offset); |
750 | READ32(read->rd_length); | 764 | READ32(read->rd_length); |
751 | 765 | ||
@@ -834,15 +848,13 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, | |||
834 | static __be32 | 848 | static __be32 |
835 | nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) | 849 | nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) |
836 | { | 850 | { |
837 | DECODE_HEAD; | 851 | __be32 status; |
838 | |||
839 | READ_BUF(sizeof(stateid_t)); | ||
840 | READ32(setattr->sa_stateid.si_generation); | ||
841 | COPYMEM(&setattr->sa_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
842 | if ((status = nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, &setattr->sa_acl))) | ||
843 | goto out; | ||
844 | 852 | ||
845 | DECODE_TAIL; | 853 | status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); |
854 | if (status) | ||
855 | return status; | ||
856 | return nfsd4_decode_fattr(argp, setattr->sa_bmval, | ||
857 | &setattr->sa_iattr, &setattr->sa_acl); | ||
846 | } | 858 | } |
847 | 859 | ||
848 | static __be32 | 860 | static __be32 |
@@ -927,9 +939,10 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) | |||
927 | int len; | 939 | int len; |
928 | DECODE_HEAD; | 940 | DECODE_HEAD; |
929 | 941 | ||
930 | READ_BUF(sizeof(stateid_opaque_t) + 20); | 942 | status = nfsd4_decode_stateid(argp, &write->wr_stateid); |
931 | READ32(write->wr_stateid.si_generation); | 943 | if (status) |
932 | COPYMEM(&write->wr_stateid.si_opaque, sizeof(stateid_opaque_t)); | 944 | return status; |
945 | READ_BUF(16); | ||
933 | READ64(write->wr_offset); | 946 | READ64(write->wr_offset); |
934 | READ32(write->wr_stable_how); | 947 | READ32(write->wr_stable_how); |
935 | if (write->wr_stable_how > 2) | 948 | if (write->wr_stable_how > 2) |
@@ -1183,7 +1196,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) | |||
1183 | * Header routine to setup seqid operation replay cache | 1196 | * Header routine to setup seqid operation replay cache |
1184 | */ | 1197 | */ |
1185 | #define ENCODE_SEQID_OP_HEAD \ | 1198 | #define ENCODE_SEQID_OP_HEAD \ |
1186 | __be32 *p; \ | ||
1187 | __be32 *save; \ | 1199 | __be32 *save; \ |
1188 | \ | 1200 | \ |
1189 | save = resp->p; | 1201 | save = resp->p; |
@@ -1950,6 +1962,17 @@ fail: | |||
1950 | return -EINVAL; | 1962 | return -EINVAL; |
1951 | } | 1963 | } |
1952 | 1964 | ||
1965 | static void | ||
1966 | nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid) | ||
1967 | { | ||
1968 | ENCODE_HEAD; | ||
1969 | |||
1970 | RESERVE_SPACE(sizeof(stateid_t)); | ||
1971 | WRITE32(sid->si_generation); | ||
1972 | WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); | ||
1973 | ADJUST_ARGS(); | ||
1974 | } | ||
1975 | |||
1953 | static __be32 | 1976 | static __be32 |
1954 | nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) | 1977 | nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) |
1955 | { | 1978 | { |
@@ -1969,12 +1992,9 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c | |||
1969 | { | 1992 | { |
1970 | ENCODE_SEQID_OP_HEAD; | 1993 | ENCODE_SEQID_OP_HEAD; |
1971 | 1994 | ||
1972 | if (!nfserr) { | 1995 | if (!nfserr) |
1973 | RESERVE_SPACE(sizeof(stateid_t)); | 1996 | nfsd4_encode_stateid(resp, &close->cl_stateid); |
1974 | WRITE32(close->cl_stateid.si_generation); | 1997 | |
1975 | WRITEMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
1976 | ADJUST_ARGS(); | ||
1977 | } | ||
1978 | ENCODE_SEQID_OP_TAIL(close->cl_stateowner); | 1998 | ENCODE_SEQID_OP_TAIL(close->cl_stateowner); |
1979 | return nfserr; | 1999 | return nfserr; |
1980 | } | 2000 | } |
@@ -2074,12 +2094,9 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo | |||
2074 | { | 2094 | { |
2075 | ENCODE_SEQID_OP_HEAD; | 2095 | ENCODE_SEQID_OP_HEAD; |
2076 | 2096 | ||
2077 | if (!nfserr) { | 2097 | if (!nfserr) |
2078 | RESERVE_SPACE(4 + sizeof(stateid_t)); | 2098 | nfsd4_encode_stateid(resp, &lock->lk_resp_stateid); |
2079 | WRITE32(lock->lk_resp_stateid.si_generation); | 2099 | else if (nfserr == nfserr_denied) |
2080 | WRITEMEM(&lock->lk_resp_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
2081 | ADJUST_ARGS(); | ||
2082 | } else if (nfserr == nfserr_denied) | ||
2083 | nfsd4_encode_lock_denied(resp, &lock->lk_denied); | 2100 | nfsd4_encode_lock_denied(resp, &lock->lk_denied); |
2084 | 2101 | ||
2085 | ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner); | 2102 | ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner); |
@@ -2099,13 +2116,9 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l | |||
2099 | { | 2116 | { |
2100 | ENCODE_SEQID_OP_HEAD; | 2117 | ENCODE_SEQID_OP_HEAD; |
2101 | 2118 | ||
2102 | if (!nfserr) { | 2119 | if (!nfserr) |
2103 | RESERVE_SPACE(sizeof(stateid_t)); | 2120 | nfsd4_encode_stateid(resp, &locku->lu_stateid); |
2104 | WRITE32(locku->lu_stateid.si_generation); | 2121 | |
2105 | WRITEMEM(&locku->lu_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
2106 | ADJUST_ARGS(); | ||
2107 | } | ||
2108 | |||
2109 | ENCODE_SEQID_OP_TAIL(locku->lu_stateowner); | 2122 | ENCODE_SEQID_OP_TAIL(locku->lu_stateowner); |
2110 | return nfserr; | 2123 | return nfserr; |
2111 | } | 2124 | } |
@@ -2128,14 +2141,14 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li | |||
2128 | static __be32 | 2141 | static __be32 |
2129 | nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) | 2142 | nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) |
2130 | { | 2143 | { |
2144 | ENCODE_HEAD; | ||
2131 | ENCODE_SEQID_OP_HEAD; | 2145 | ENCODE_SEQID_OP_HEAD; |
2132 | 2146 | ||
2133 | if (nfserr) | 2147 | if (nfserr) |
2134 | goto out; | 2148 | goto out; |
2135 | 2149 | ||
2136 | RESERVE_SPACE(36 + sizeof(stateid_t)); | 2150 | nfsd4_encode_stateid(resp, &open->op_stateid); |
2137 | WRITE32(open->op_stateid.si_generation); | 2151 | RESERVE_SPACE(40); |
2138 | WRITEMEM(&open->op_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
2139 | WRITECINFO(open->op_cinfo); | 2152 | WRITECINFO(open->op_cinfo); |
2140 | WRITE32(open->op_rflags); | 2153 | WRITE32(open->op_rflags); |
2141 | WRITE32(2); | 2154 | WRITE32(2); |
@@ -2148,8 +2161,8 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op | |||
2148 | case NFS4_OPEN_DELEGATE_NONE: | 2161 | case NFS4_OPEN_DELEGATE_NONE: |
2149 | break; | 2162 | break; |
2150 | case NFS4_OPEN_DELEGATE_READ: | 2163 | case NFS4_OPEN_DELEGATE_READ: |
2151 | RESERVE_SPACE(20 + sizeof(stateid_t)); | 2164 | nfsd4_encode_stateid(resp, &open->op_delegate_stateid); |
2152 | WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); | 2165 | RESERVE_SPACE(20); |
2153 | WRITE32(open->op_recall); | 2166 | WRITE32(open->op_recall); |
2154 | 2167 | ||
2155 | /* | 2168 | /* |
@@ -2162,8 +2175,8 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op | |||
2162 | ADJUST_ARGS(); | 2175 | ADJUST_ARGS(); |
2163 | break; | 2176 | break; |
2164 | case NFS4_OPEN_DELEGATE_WRITE: | 2177 | case NFS4_OPEN_DELEGATE_WRITE: |
2165 | RESERVE_SPACE(32 + sizeof(stateid_t)); | 2178 | nfsd4_encode_stateid(resp, &open->op_delegate_stateid); |
2166 | WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); | 2179 | RESERVE_SPACE(32); |
2167 | WRITE32(0); | 2180 | WRITE32(0); |
2168 | 2181 | ||
2169 | /* | 2182 | /* |
@@ -2195,13 +2208,9 @@ static __be32 | |||
2195 | nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) | 2208 | nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) |
2196 | { | 2209 | { |
2197 | ENCODE_SEQID_OP_HEAD; | 2210 | ENCODE_SEQID_OP_HEAD; |
2198 | 2211 | ||
2199 | if (!nfserr) { | 2212 | if (!nfserr) |
2200 | RESERVE_SPACE(sizeof(stateid_t)); | 2213 | nfsd4_encode_stateid(resp, &oc->oc_resp_stateid); |
2201 | WRITE32(oc->oc_resp_stateid.si_generation); | ||
2202 | WRITEMEM(&oc->oc_resp_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
2203 | ADJUST_ARGS(); | ||
2204 | } | ||
2205 | 2214 | ||
2206 | ENCODE_SEQID_OP_TAIL(oc->oc_stateowner); | 2215 | ENCODE_SEQID_OP_TAIL(oc->oc_stateowner); |
2207 | return nfserr; | 2216 | return nfserr; |
@@ -2211,13 +2220,9 @@ static __be32 | |||
2211 | nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) | 2220 | nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) |
2212 | { | 2221 | { |
2213 | ENCODE_SEQID_OP_HEAD; | 2222 | ENCODE_SEQID_OP_HEAD; |
2214 | 2223 | ||
2215 | if (!nfserr) { | 2224 | if (!nfserr) |
2216 | RESERVE_SPACE(sizeof(stateid_t)); | 2225 | nfsd4_encode_stateid(resp, &od->od_stateid); |
2217 | WRITE32(od->od_stateid.si_generation); | ||
2218 | WRITEMEM(&od->od_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
2219 | ADJUST_ARGS(); | ||
2220 | } | ||
2221 | 2226 | ||
2222 | ENCODE_SEQID_OP_TAIL(od->od_stateowner); | 2227 | ENCODE_SEQID_OP_TAIL(od->od_stateowner); |
2223 | return nfserr; | 2228 | return nfserr; |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index c53e65f8f3a2..97543df58242 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -614,10 +614,9 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) | |||
614 | return -EINVAL; | 614 | return -EINVAL; |
615 | err = nfsd_create_serv(); | 615 | err = nfsd_create_serv(); |
616 | if (!err) { | 616 | if (!err) { |
617 | int proto = 0; | 617 | err = svc_addsock(nfsd_serv, fd, buf); |
618 | err = svc_addsock(nfsd_serv, fd, buf, &proto); | ||
619 | if (err >= 0) { | 618 | if (err >= 0) { |
620 | err = lockd_up(proto); | 619 | err = lockd_up(); |
621 | if (err < 0) | 620 | if (err < 0) |
622 | svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf); | 621 | svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf); |
623 | } | 622 | } |
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index ea37c96f0445..cd25d91895a1 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c | |||
@@ -302,17 +302,27 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
302 | if (error) | 302 | if (error) |
303 | goto out; | 303 | goto out; |
304 | 304 | ||
305 | if (!(access & NFSD_MAY_LOCK)) { | 305 | /* |
306 | /* | 306 | * pseudoflavor restrictions are not enforced on NLM, |
307 | * pseudoflavor restrictions are not enforced on NLM, | 307 | * which clients virtually always use auth_sys for, |
308 | * which clients virtually always use auth_sys for, | 308 | * even while using RPCSEC_GSS for NFS. |
309 | * even while using RPCSEC_GSS for NFS. | 309 | */ |
310 | */ | 310 | if (access & NFSD_MAY_LOCK) |
311 | error = check_nfsd_access(exp, rqstp); | 311 | goto skip_pseudoflavor_check; |
312 | if (error) | 312 | /* |
313 | goto out; | 313 | * Clients may expect to be able to use auth_sys during mount, |
314 | } | 314 | * even if they use gss for everything else; see section 2.3.2 |
315 | * of rfc 2623. | ||
316 | */ | ||
317 | if (access & NFSD_MAY_BYPASS_GSS_ON_ROOT | ||
318 | && exp->ex_path.dentry == dentry) | ||
319 | goto skip_pseudoflavor_check; | ||
320 | |||
321 | error = check_nfsd_access(exp, rqstp); | ||
322 | if (error) | ||
323 | goto out; | ||
315 | 324 | ||
325 | skip_pseudoflavor_check: | ||
316 | /* Finally, check access permissions. */ | 326 | /* Finally, check access permissions. */ |
317 | error = nfsd_permission(rqstp, exp, dentry, access); | 327 | error = nfsd_permission(rqstp, exp, dentry, access); |
318 | 328 | ||
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 0766f95d236a..5cffeca7acef 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c | |||
@@ -65,7 +65,8 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, | |||
65 | dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); | 65 | dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); |
66 | 66 | ||
67 | fh_copy(&resp->fh, &argp->fh); | 67 | fh_copy(&resp->fh, &argp->fh); |
68 | nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); | 68 | nfserr = fh_verify(rqstp, &resp->fh, 0, |
69 | NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); | ||
69 | return nfsd_return_attrs(nfserr, resp); | 70 | return nfsd_return_attrs(nfserr, resp); |
70 | } | 71 | } |
71 | 72 | ||
@@ -521,7 +522,8 @@ nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, | |||
521 | 522 | ||
522 | dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh)); | 523 | dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh)); |
523 | 524 | ||
524 | nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats); | 525 | nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats, |
526 | NFSD_MAY_BYPASS_GSS_ON_ROOT); | ||
525 | fh_put(&argp->fh); | 527 | fh_put(&argp->fh); |
526 | return nfserr; | 528 | return nfserr; |
527 | } | 529 | } |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 80292ff5e924..59eeb46f82c5 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -229,6 +229,7 @@ int nfsd_create_serv(void) | |||
229 | 229 | ||
230 | atomic_set(&nfsd_busy, 0); | 230 | atomic_set(&nfsd_busy, 0); |
231 | nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, | 231 | nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, |
232 | AF_INET, | ||
232 | nfsd_last_thread, nfsd, THIS_MODULE); | 233 | nfsd_last_thread, nfsd, THIS_MODULE); |
233 | if (nfsd_serv == NULL) | 234 | if (nfsd_serv == NULL) |
234 | err = -ENOMEM; | 235 | err = -ENOMEM; |
@@ -243,25 +244,20 @@ static int nfsd_init_socks(int port) | |||
243 | if (!list_empty(&nfsd_serv->sv_permsocks)) | 244 | if (!list_empty(&nfsd_serv->sv_permsocks)) |
244 | return 0; | 245 | return 0; |
245 | 246 | ||
246 | error = lockd_up(IPPROTO_UDP); | 247 | error = svc_create_xprt(nfsd_serv, "udp", port, |
247 | if (error >= 0) { | ||
248 | error = svc_create_xprt(nfsd_serv, "udp", port, | ||
249 | SVC_SOCK_DEFAULTS); | 248 | SVC_SOCK_DEFAULTS); |
250 | if (error < 0) | ||
251 | lockd_down(); | ||
252 | } | ||
253 | if (error < 0) | 249 | if (error < 0) |
254 | return error; | 250 | return error; |
255 | 251 | ||
256 | error = lockd_up(IPPROTO_TCP); | 252 | error = svc_create_xprt(nfsd_serv, "tcp", port, |
257 | if (error >= 0) { | ||
258 | error = svc_create_xprt(nfsd_serv, "tcp", port, | ||
259 | SVC_SOCK_DEFAULTS); | 253 | SVC_SOCK_DEFAULTS); |
260 | if (error < 0) | ||
261 | lockd_down(); | ||
262 | } | ||
263 | if (error < 0) | 254 | if (error < 0) |
264 | return error; | 255 | return error; |
256 | |||
257 | error = lockd_up(); | ||
258 | if (error < 0) | ||
259 | return error; | ||
260 | |||
265 | return 0; | 261 | return 0; |
266 | } | 262 | } |
267 | 263 | ||
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 18060bed5267..aa1d0d6489a1 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -83,7 +83,6 @@ struct raparm_hbucket { | |||
83 | spinlock_t pb_lock; | 83 | spinlock_t pb_lock; |
84 | } ____cacheline_aligned_in_smp; | 84 | } ____cacheline_aligned_in_smp; |
85 | 85 | ||
86 | static struct raparms * raparml; | ||
87 | #define RAPARM_HASH_BITS 4 | 86 | #define RAPARM_HASH_BITS 4 |
88 | #define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS) | 87 | #define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS) |
89 | #define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) | 88 | #define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) |
@@ -1866,9 +1865,9 @@ out: | |||
1866 | * N.B. After this call fhp needs an fh_put | 1865 | * N.B. After this call fhp needs an fh_put |
1867 | */ | 1866 | */ |
1868 | __be32 | 1867 | __be32 |
1869 | nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat) | 1868 | nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) |
1870 | { | 1869 | { |
1871 | __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP); | 1870 | __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); |
1872 | if (!err && vfs_statfs(fhp->fh_dentry,stat)) | 1871 | if (!err && vfs_statfs(fhp->fh_dentry,stat)) |
1873 | err = nfserr_io; | 1872 | err = nfserr_io; |
1874 | return err; | 1873 | return err; |
@@ -1966,11 +1965,20 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, | |||
1966 | void | 1965 | void |
1967 | nfsd_racache_shutdown(void) | 1966 | nfsd_racache_shutdown(void) |
1968 | { | 1967 | { |
1969 | if (!raparml) | 1968 | struct raparms *raparm, *last_raparm; |
1970 | return; | 1969 | unsigned int i; |
1970 | |||
1971 | dprintk("nfsd: freeing readahead buffers.\n"); | 1971 | dprintk("nfsd: freeing readahead buffers.\n"); |
1972 | kfree(raparml); | 1972 | |
1973 | raparml = NULL; | 1973 | for (i = 0; i < RAPARM_HASH_SIZE; i++) { |
1974 | raparm = raparm_hash[i].pb_head; | ||
1975 | while(raparm) { | ||
1976 | last_raparm = raparm; | ||
1977 | raparm = raparm->p_next; | ||
1978 | kfree(last_raparm); | ||
1979 | } | ||
1980 | raparm_hash[i].pb_head = NULL; | ||
1981 | } | ||
1974 | } | 1982 | } |
1975 | /* | 1983 | /* |
1976 | * Initialize readahead param cache | 1984 | * Initialize readahead param cache |
@@ -1981,35 +1989,38 @@ nfsd_racache_init(int cache_size) | |||
1981 | int i; | 1989 | int i; |
1982 | int j = 0; | 1990 | int j = 0; |
1983 | int nperbucket; | 1991 | int nperbucket; |
1992 | struct raparms **raparm = NULL; | ||
1984 | 1993 | ||
1985 | 1994 | ||
1986 | if (raparml) | 1995 | if (raparm_hash[0].pb_head) |
1987 | return 0; | 1996 | return 0; |
1988 | if (cache_size < 2*RAPARM_HASH_SIZE) | 1997 | nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); |
1989 | cache_size = 2*RAPARM_HASH_SIZE; | 1998 | if (nperbucket < 2) |
1990 | raparml = kcalloc(cache_size, sizeof(struct raparms), GFP_KERNEL); | 1999 | nperbucket = 2; |
1991 | 2000 | cache_size = nperbucket * RAPARM_HASH_SIZE; | |
1992 | if (!raparml) { | ||
1993 | printk(KERN_WARNING | ||
1994 | "nfsd: Could not allocate memory read-ahead cache.\n"); | ||
1995 | return -ENOMEM; | ||
1996 | } | ||
1997 | 2001 | ||
1998 | dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); | 2002 | dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); |
1999 | for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) { | 2003 | |
2000 | raparm_hash[i].pb_head = NULL; | 2004 | for (i = 0; i < RAPARM_HASH_SIZE; i++) { |
2001 | spin_lock_init(&raparm_hash[i].pb_lock); | 2005 | spin_lock_init(&raparm_hash[i].pb_lock); |
2002 | } | 2006 | |
2003 | nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); | 2007 | raparm = &raparm_hash[i].pb_head; |
2004 | for (i = 0; i < cache_size - 1; i++) { | 2008 | for (j = 0; j < nperbucket; j++) { |
2005 | if (i % nperbucket == 0) | 2009 | *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL); |
2006 | raparm_hash[j++].pb_head = raparml + i; | 2010 | if (!*raparm) |
2007 | if (i % nperbucket < nperbucket-1) | 2011 | goto out_nomem; |
2008 | raparml[i].p_next = raparml + i + 1; | 2012 | raparm = &(*raparm)->p_next; |
2013 | } | ||
2014 | *raparm = NULL; | ||
2009 | } | 2015 | } |
2010 | 2016 | ||
2011 | nfsdstats.ra_size = cache_size; | 2017 | nfsdstats.ra_size = cache_size; |
2012 | return 0; | 2018 | return 0; |
2019 | |||
2020 | out_nomem: | ||
2021 | dprintk("nfsd: kmalloc failed, freeing readahead buffers\n"); | ||
2022 | nfsd_racache_shutdown(); | ||
2023 | return -ENOMEM; | ||
2013 | } | 2024 | } |
2014 | 2025 | ||
2015 | #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) | 2026 | #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) |
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index e1781c8b1650..9e8a95be7a1e 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c | |||
@@ -174,7 +174,6 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, | |||
174 | // TODO: Consider moving this lot to a separate function! (AIA) | 174 | // TODO: Consider moving this lot to a separate function! (AIA) |
175 | handle_name: | 175 | handle_name: |
176 | { | 176 | { |
177 | struct dentry *real_dent, *new_dent; | ||
178 | MFT_RECORD *m; | 177 | MFT_RECORD *m; |
179 | ntfs_attr_search_ctx *ctx; | 178 | ntfs_attr_search_ctx *ctx; |
180 | ntfs_inode *ni = NTFS_I(dent_inode); | 179 | ntfs_inode *ni = NTFS_I(dent_inode); |
@@ -255,93 +254,9 @@ handle_name: | |||
255 | } | 254 | } |
256 | nls_name.hash = full_name_hash(nls_name.name, nls_name.len); | 255 | nls_name.hash = full_name_hash(nls_name.name, nls_name.len); |
257 | 256 | ||
258 | /* | 257 | dent = d_add_ci(dent, dent_inode, &nls_name); |
259 | * Note: No need for dent->d_lock lock as i_mutex is held on the | ||
260 | * parent inode. | ||
261 | */ | ||
262 | |||
263 | /* Does a dentry matching the nls_name exist already? */ | ||
264 | real_dent = d_lookup(dent->d_parent, &nls_name); | ||
265 | /* If not, create it now. */ | ||
266 | if (!real_dent) { | ||
267 | real_dent = d_alloc(dent->d_parent, &nls_name); | ||
268 | kfree(nls_name.name); | ||
269 | if (!real_dent) { | ||
270 | err = -ENOMEM; | ||
271 | goto err_out; | ||
272 | } | ||
273 | new_dent = d_splice_alias(dent_inode, real_dent); | ||
274 | if (new_dent) | ||
275 | dput(real_dent); | ||
276 | else | ||
277 | new_dent = real_dent; | ||
278 | ntfs_debug("Done. (Created new dentry.)"); | ||
279 | return new_dent; | ||
280 | } | ||
281 | kfree(nls_name.name); | 258 | kfree(nls_name.name); |
282 | /* Matching dentry exists, check if it is negative. */ | 259 | return dent; |
283 | if (real_dent->d_inode) { | ||
284 | if (unlikely(real_dent->d_inode != dent_inode)) { | ||
285 | /* This can happen because bad inodes are unhashed. */ | ||
286 | BUG_ON(!is_bad_inode(dent_inode)); | ||
287 | BUG_ON(!is_bad_inode(real_dent->d_inode)); | ||
288 | } | ||
289 | /* | ||
290 | * Already have the inode and the dentry attached, decrement | ||
291 | * the reference count to balance the ntfs_iget() we did | ||
292 | * earlier on. We found the dentry using d_lookup() so it | ||
293 | * cannot be disconnected and thus we do not need to worry | ||
294 | * about any NFS/disconnectedness issues here. | ||
295 | */ | ||
296 | iput(dent_inode); | ||
297 | ntfs_debug("Done. (Already had inode and dentry.)"); | ||
298 | return real_dent; | ||
299 | } | ||
300 | /* | ||
301 | * Negative dentry: instantiate it unless the inode is a directory and | ||
302 | * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED), | ||
303 | * in which case d_move() that in place of the found dentry. | ||
304 | */ | ||
305 | if (!S_ISDIR(dent_inode->i_mode)) { | ||
306 | /* Not a directory; everything is easy. */ | ||
307 | d_instantiate(real_dent, dent_inode); | ||
308 | ntfs_debug("Done. (Already had negative file dentry.)"); | ||
309 | return real_dent; | ||
310 | } | ||
311 | spin_lock(&dcache_lock); | ||
312 | if (list_empty(&dent_inode->i_dentry)) { | ||
313 | /* | ||
314 | * Directory without a 'disconnected' dentry; we need to do | ||
315 | * d_instantiate() by hand because it takes dcache_lock which | ||
316 | * we already hold. | ||
317 | */ | ||
318 | list_add(&real_dent->d_alias, &dent_inode->i_dentry); | ||
319 | real_dent->d_inode = dent_inode; | ||
320 | spin_unlock(&dcache_lock); | ||
321 | security_d_instantiate(real_dent, dent_inode); | ||
322 | ntfs_debug("Done. (Already had negative directory dentry.)"); | ||
323 | return real_dent; | ||
324 | } | ||
325 | /* | ||
326 | * Directory with a 'disconnected' dentry; get a reference to the | ||
327 | * 'disconnected' dentry. | ||
328 | */ | ||
329 | new_dent = list_entry(dent_inode->i_dentry.next, struct dentry, | ||
330 | d_alias); | ||
331 | dget_locked(new_dent); | ||
332 | spin_unlock(&dcache_lock); | ||
333 | /* Do security vodoo. */ | ||
334 | security_d_instantiate(real_dent, dent_inode); | ||
335 | /* Move new_dent in place of real_dent. */ | ||
336 | d_move(new_dent, real_dent); | ||
337 | /* Balance the ntfs_iget() we did above. */ | ||
338 | iput(dent_inode); | ||
339 | /* Throw away real_dent. */ | ||
340 | dput(real_dent); | ||
341 | /* Use new_dent as the actual dentry. */ | ||
342 | ntfs_debug("Done. (Already had negative, disconnected directory " | ||
343 | "dentry.)"); | ||
344 | return new_dent; | ||
345 | 260 | ||
346 | eio_err_out: | 261 | eio_err_out: |
347 | ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk."); | 262 | ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk."); |
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h index 3a8af75351e8..4087fbdac327 100644 --- a/fs/ntfs/usnjrnl.h +++ b/fs/ntfs/usnjrnl.h | |||
@@ -113,7 +113,7 @@ typedef struct { | |||
113 | * Reason flags (32-bit). Cumulative flags describing the change(s) to the | 113 | * Reason flags (32-bit). Cumulative flags describing the change(s) to the |
114 | * file since it was last opened. I think the names speak for themselves but | 114 | * file since it was last opened. I think the names speak for themselves but |
115 | * if you disagree check out the descriptions in the Linux NTFS project NTFS | 115 | * if you disagree check out the descriptions in the Linux NTFS project NTFS |
116 | * documentation: http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html | 116 | * documentation: http://www.linux-ntfs.org/ |
117 | */ | 117 | */ |
118 | enum { | 118 | enum { |
119 | USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001), | 119 | USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001), |
@@ -145,7 +145,7 @@ typedef le32 USN_REASON_FLAGS; | |||
145 | * Source info flags (32-bit). Information about the source of the change(s) | 145 | * Source info flags (32-bit). Information about the source of the change(s) |
146 | * to the file. For detailed descriptions of what these mean, see the Linux | 146 | * to the file. For detailed descriptions of what these mean, see the Linux |
147 | * NTFS project NTFS documentation: | 147 | * NTFS project NTFS documentation: |
148 | * http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html | 148 | * http://www.linux-ntfs.org/ |
149 | */ | 149 | */ |
150 | enum { | 150 | enum { |
151 | USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001), | 151 | USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001), |
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index f6956de56fdb..589dcdfdfe3c 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile | |||
@@ -34,7 +34,8 @@ ocfs2-objs := \ | |||
34 | symlink.o \ | 34 | symlink.o \ |
35 | sysfile.o \ | 35 | sysfile.o \ |
36 | uptodate.o \ | 36 | uptodate.o \ |
37 | ver.o | 37 | ver.o \ |
38 | xattr.o | ||
38 | 39 | ||
39 | ocfs2_stackglue-objs := stackglue.o | 40 | ocfs2_stackglue-objs := stackglue.o |
40 | ocfs2_stack_o2cb-objs := stack_o2cb.o | 41 | ocfs2_stack_o2cb-objs := stack_o2cb.o |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 10bfb466e068..0cc2deb9394c 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -49,6 +49,340 @@ | |||
49 | 49 | ||
50 | #include "buffer_head_io.h" | 50 | #include "buffer_head_io.h" |
51 | 51 | ||
52 | |||
53 | /* | ||
54 | * Operations for a specific extent tree type. | ||
55 | * | ||
56 | * To implement an on-disk btree (extent tree) type in ocfs2, add | ||
57 | * an ocfs2_extent_tree_operations structure and the matching | ||
58 | * ocfs2_init_<thingy>_extent_tree() function. That's pretty much it | ||
59 | * for the allocation portion of the extent tree. | ||
60 | */ | ||
61 | struct ocfs2_extent_tree_operations { | ||
62 | /* | ||
63 | * last_eb_blk is the block number of the right most leaf extent | ||
64 | * block. Most on-disk structures containing an extent tree store | ||
65 | * this value for fast access. The ->eo_set_last_eb_blk() and | ||
66 | * ->eo_get_last_eb_blk() operations access this value. They are | ||
67 | * both required. | ||
68 | */ | ||
69 | void (*eo_set_last_eb_blk)(struct ocfs2_extent_tree *et, | ||
70 | u64 blkno); | ||
71 | u64 (*eo_get_last_eb_blk)(struct ocfs2_extent_tree *et); | ||
72 | |||
73 | /* | ||
74 | * The on-disk structure usually keeps track of how many total | ||
75 | * clusters are stored in this extent tree. This function updates | ||
76 | * that value. new_clusters is the delta, and must be | ||
77 | * added to the total. Required. | ||
78 | */ | ||
79 | void (*eo_update_clusters)(struct inode *inode, | ||
80 | struct ocfs2_extent_tree *et, | ||
81 | u32 new_clusters); | ||
82 | |||
83 | /* | ||
84 | * If ->eo_insert_check() exists, it is called before rec is | ||
85 | * inserted into the extent tree. It is optional. | ||
86 | */ | ||
87 | int (*eo_insert_check)(struct inode *inode, | ||
88 | struct ocfs2_extent_tree *et, | ||
89 | struct ocfs2_extent_rec *rec); | ||
90 | int (*eo_sanity_check)(struct inode *inode, struct ocfs2_extent_tree *et); | ||
91 | |||
92 | /* | ||
93 | * -------------------------------------------------------------- | ||
94 | * The remaining are internal to ocfs2_extent_tree and don't have | ||
95 | * accessor functions | ||
96 | */ | ||
97 | |||
98 | /* | ||
99 | * ->eo_fill_root_el() takes et->et_object and sets et->et_root_el. | ||
100 | * It is required. | ||
101 | */ | ||
102 | void (*eo_fill_root_el)(struct ocfs2_extent_tree *et); | ||
103 | |||
104 | /* | ||
105 | * ->eo_fill_max_leaf_clusters sets et->et_max_leaf_clusters if | ||
106 | * it exists. If it does not, et->et_max_leaf_clusters is set | ||
107 | * to 0 (unlimited). Optional. | ||
108 | */ | ||
109 | void (*eo_fill_max_leaf_clusters)(struct inode *inode, | ||
110 | struct ocfs2_extent_tree *et); | ||
111 | }; | ||
112 | |||
113 | |||
114 | /* | ||
115 | * Pre-declare ocfs2_dinode_et_ops so we can use it as a sanity check | ||
116 | * in the methods. | ||
117 | */ | ||
118 | static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et); | ||
119 | static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et, | ||
120 | u64 blkno); | ||
121 | static void ocfs2_dinode_update_clusters(struct inode *inode, | ||
122 | struct ocfs2_extent_tree *et, | ||
123 | u32 clusters); | ||
124 | static int ocfs2_dinode_insert_check(struct inode *inode, | ||
125 | struct ocfs2_extent_tree *et, | ||
126 | struct ocfs2_extent_rec *rec); | ||
127 | static int ocfs2_dinode_sanity_check(struct inode *inode, | ||
128 | struct ocfs2_extent_tree *et); | ||
129 | static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et); | ||
130 | static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = { | ||
131 | .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk, | ||
132 | .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk, | ||
133 | .eo_update_clusters = ocfs2_dinode_update_clusters, | ||
134 | .eo_insert_check = ocfs2_dinode_insert_check, | ||
135 | .eo_sanity_check = ocfs2_dinode_sanity_check, | ||
136 | .eo_fill_root_el = ocfs2_dinode_fill_root_el, | ||
137 | }; | ||
138 | |||
139 | static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et, | ||
140 | u64 blkno) | ||
141 | { | ||
142 | struct ocfs2_dinode *di = et->et_object; | ||
143 | |||
144 | BUG_ON(et->et_ops != &ocfs2_dinode_et_ops); | ||
145 | di->i_last_eb_blk = cpu_to_le64(blkno); | ||
146 | } | ||
147 | |||
148 | static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et) | ||
149 | { | ||
150 | struct ocfs2_dinode *di = et->et_object; | ||
151 | |||
152 | BUG_ON(et->et_ops != &ocfs2_dinode_et_ops); | ||
153 | return le64_to_cpu(di->i_last_eb_blk); | ||
154 | } | ||
155 | |||
156 | static void ocfs2_dinode_update_clusters(struct inode *inode, | ||
157 | struct ocfs2_extent_tree *et, | ||
158 | u32 clusters) | ||
159 | { | ||
160 | struct ocfs2_dinode *di = et->et_object; | ||
161 | |||
162 | le32_add_cpu(&di->i_clusters, clusters); | ||
163 | spin_lock(&OCFS2_I(inode)->ip_lock); | ||
164 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters); | ||
165 | spin_unlock(&OCFS2_I(inode)->ip_lock); | ||
166 | } | ||
167 | |||
168 | static int ocfs2_dinode_insert_check(struct inode *inode, | ||
169 | struct ocfs2_extent_tree *et, | ||
170 | struct ocfs2_extent_rec *rec) | ||
171 | { | ||
172 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
173 | |||
174 | BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL); | ||
175 | mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) && | ||
176 | (OCFS2_I(inode)->ip_clusters != rec->e_cpos), | ||
177 | "Device %s, asking for sparse allocation: inode %llu, " | ||
178 | "cpos %u, clusters %u\n", | ||
179 | osb->dev_str, | ||
180 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
181 | rec->e_cpos, | ||
182 | OCFS2_I(inode)->ip_clusters); | ||
183 | |||
184 | return 0; | ||
185 | } | ||
186 | |||
187 | static int ocfs2_dinode_sanity_check(struct inode *inode, | ||
188 | struct ocfs2_extent_tree *et) | ||
189 | { | ||
190 | int ret = 0; | ||
191 | struct ocfs2_dinode *di; | ||
192 | |||
193 | BUG_ON(et->et_ops != &ocfs2_dinode_et_ops); | ||
194 | |||
195 | di = et->et_object; | ||
196 | if (!OCFS2_IS_VALID_DINODE(di)) { | ||
197 | ret = -EIO; | ||
198 | ocfs2_error(inode->i_sb, | ||
199 | "Inode %llu has invalid path root", | ||
200 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
201 | } | ||
202 | |||
203 | return ret; | ||
204 | } | ||
205 | |||
206 | static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et) | ||
207 | { | ||
208 | struct ocfs2_dinode *di = et->et_object; | ||
209 | |||
210 | et->et_root_el = &di->id2.i_list; | ||
211 | } | ||
212 | |||
213 | |||
214 | static void ocfs2_xattr_value_fill_root_el(struct ocfs2_extent_tree *et) | ||
215 | { | ||
216 | struct ocfs2_xattr_value_root *xv = et->et_object; | ||
217 | |||
218 | et->et_root_el = &xv->xr_list; | ||
219 | } | ||
220 | |||
221 | static void ocfs2_xattr_value_set_last_eb_blk(struct ocfs2_extent_tree *et, | ||
222 | u64 blkno) | ||
223 | { | ||
224 | struct ocfs2_xattr_value_root *xv = | ||
225 | (struct ocfs2_xattr_value_root *)et->et_object; | ||
226 | |||
227 | xv->xr_last_eb_blk = cpu_to_le64(blkno); | ||
228 | } | ||
229 | |||
230 | static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et) | ||
231 | { | ||
232 | struct ocfs2_xattr_value_root *xv = | ||
233 | (struct ocfs2_xattr_value_root *) et->et_object; | ||
234 | |||
235 | return le64_to_cpu(xv->xr_last_eb_blk); | ||
236 | } | ||
237 | |||
238 | static void ocfs2_xattr_value_update_clusters(struct inode *inode, | ||
239 | struct ocfs2_extent_tree *et, | ||
240 | u32 clusters) | ||
241 | { | ||
242 | struct ocfs2_xattr_value_root *xv = | ||
243 | (struct ocfs2_xattr_value_root *)et->et_object; | ||
244 | |||
245 | le32_add_cpu(&xv->xr_clusters, clusters); | ||
246 | } | ||
247 | |||
248 | static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = { | ||
249 | .eo_set_last_eb_blk = ocfs2_xattr_value_set_last_eb_blk, | ||
250 | .eo_get_last_eb_blk = ocfs2_xattr_value_get_last_eb_blk, | ||
251 | .eo_update_clusters = ocfs2_xattr_value_update_clusters, | ||
252 | .eo_fill_root_el = ocfs2_xattr_value_fill_root_el, | ||
253 | }; | ||
254 | |||
255 | static void ocfs2_xattr_tree_fill_root_el(struct ocfs2_extent_tree *et) | ||
256 | { | ||
257 | struct ocfs2_xattr_block *xb = et->et_object; | ||
258 | |||
259 | et->et_root_el = &xb->xb_attrs.xb_root.xt_list; | ||
260 | } | ||
261 | |||
262 | static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct inode *inode, | ||
263 | struct ocfs2_extent_tree *et) | ||
264 | { | ||
265 | et->et_max_leaf_clusters = | ||
266 | ocfs2_clusters_for_bytes(inode->i_sb, | ||
267 | OCFS2_MAX_XATTR_TREE_LEAF_SIZE); | ||
268 | } | ||
269 | |||
270 | static void ocfs2_xattr_tree_set_last_eb_blk(struct ocfs2_extent_tree *et, | ||
271 | u64 blkno) | ||
272 | { | ||
273 | struct ocfs2_xattr_block *xb = et->et_object; | ||
274 | struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root; | ||
275 | |||
276 | xt->xt_last_eb_blk = cpu_to_le64(blkno); | ||
277 | } | ||
278 | |||
279 | static u64 ocfs2_xattr_tree_get_last_eb_blk(struct ocfs2_extent_tree *et) | ||
280 | { | ||
281 | struct ocfs2_xattr_block *xb = et->et_object; | ||
282 | struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root; | ||
283 | |||
284 | return le64_to_cpu(xt->xt_last_eb_blk); | ||
285 | } | ||
286 | |||
287 | static void ocfs2_xattr_tree_update_clusters(struct inode *inode, | ||
288 | struct ocfs2_extent_tree *et, | ||
289 | u32 clusters) | ||
290 | { | ||
291 | struct ocfs2_xattr_block *xb = et->et_object; | ||
292 | |||
293 | le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, clusters); | ||
294 | } | ||
295 | |||
296 | static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = { | ||
297 | .eo_set_last_eb_blk = ocfs2_xattr_tree_set_last_eb_blk, | ||
298 | .eo_get_last_eb_blk = ocfs2_xattr_tree_get_last_eb_blk, | ||
299 | .eo_update_clusters = ocfs2_xattr_tree_update_clusters, | ||
300 | .eo_fill_root_el = ocfs2_xattr_tree_fill_root_el, | ||
301 | .eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters, | ||
302 | }; | ||
303 | |||
304 | static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, | ||
305 | struct inode *inode, | ||
306 | struct buffer_head *bh, | ||
307 | void *obj, | ||
308 | struct ocfs2_extent_tree_operations *ops) | ||
309 | { | ||
310 | et->et_ops = ops; | ||
311 | et->et_root_bh = bh; | ||
312 | if (!obj) | ||
313 | obj = (void *)bh->b_data; | ||
314 | et->et_object = obj; | ||
315 | |||
316 | et->et_ops->eo_fill_root_el(et); | ||
317 | if (!et->et_ops->eo_fill_max_leaf_clusters) | ||
318 | et->et_max_leaf_clusters = 0; | ||
319 | else | ||
320 | et->et_ops->eo_fill_max_leaf_clusters(inode, et); | ||
321 | } | ||
322 | |||
323 | void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et, | ||
324 | struct inode *inode, | ||
325 | struct buffer_head *bh) | ||
326 | { | ||
327 | __ocfs2_init_extent_tree(et, inode, bh, NULL, &ocfs2_dinode_et_ops); | ||
328 | } | ||
329 | |||
330 | void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, | ||
331 | struct inode *inode, | ||
332 | struct buffer_head *bh) | ||
333 | { | ||
334 | __ocfs2_init_extent_tree(et, inode, bh, NULL, | ||
335 | &ocfs2_xattr_tree_et_ops); | ||
336 | } | ||
337 | |||
338 | void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, | ||
339 | struct inode *inode, | ||
340 | struct buffer_head *bh, | ||
341 | struct ocfs2_xattr_value_root *xv) | ||
342 | { | ||
343 | __ocfs2_init_extent_tree(et, inode, bh, xv, | ||
344 | &ocfs2_xattr_value_et_ops); | ||
345 | } | ||
346 | |||
347 | static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et, | ||
348 | u64 new_last_eb_blk) | ||
349 | { | ||
350 | et->et_ops->eo_set_last_eb_blk(et, new_last_eb_blk); | ||
351 | } | ||
352 | |||
353 | static inline u64 ocfs2_et_get_last_eb_blk(struct ocfs2_extent_tree *et) | ||
354 | { | ||
355 | return et->et_ops->eo_get_last_eb_blk(et); | ||
356 | } | ||
357 | |||
358 | static inline void ocfs2_et_update_clusters(struct inode *inode, | ||
359 | struct ocfs2_extent_tree *et, | ||
360 | u32 clusters) | ||
361 | { | ||
362 | et->et_ops->eo_update_clusters(inode, et, clusters); | ||
363 | } | ||
364 | |||
365 | static inline int ocfs2_et_insert_check(struct inode *inode, | ||
366 | struct ocfs2_extent_tree *et, | ||
367 | struct ocfs2_extent_rec *rec) | ||
368 | { | ||
369 | int ret = 0; | ||
370 | |||
371 | if (et->et_ops->eo_insert_check) | ||
372 | ret = et->et_ops->eo_insert_check(inode, et, rec); | ||
373 | return ret; | ||
374 | } | ||
375 | |||
376 | static inline int ocfs2_et_sanity_check(struct inode *inode, | ||
377 | struct ocfs2_extent_tree *et) | ||
378 | { | ||
379 | int ret = 0; | ||
380 | |||
381 | if (et->et_ops->eo_sanity_check) | ||
382 | ret = et->et_ops->eo_sanity_check(inode, et); | ||
383 | return ret; | ||
384 | } | ||
385 | |||
52 | static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); | 386 | static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); |
53 | static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, | 387 | static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, |
54 | struct ocfs2_extent_block *eb); | 388 | struct ocfs2_extent_block *eb); |
@@ -205,17 +539,6 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh, | |||
205 | } | 539 | } |
206 | 540 | ||
207 | /* | 541 | /* |
208 | * Allocate and initialize a new path based on a disk inode tree. | ||
209 | */ | ||
210 | static struct ocfs2_path *ocfs2_new_inode_path(struct buffer_head *di_bh) | ||
211 | { | ||
212 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
213 | struct ocfs2_extent_list *el = &di->id2.i_list; | ||
214 | |||
215 | return ocfs2_new_path(di_bh, el); | ||
216 | } | ||
217 | |||
218 | /* | ||
219 | * Convenience function to journal all components in a path. | 542 | * Convenience function to journal all components in a path. |
220 | */ | 543 | */ |
221 | static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle, | 544 | static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle, |
@@ -368,39 +691,35 @@ struct ocfs2_merge_ctxt { | |||
368 | */ | 691 | */ |
369 | int ocfs2_num_free_extents(struct ocfs2_super *osb, | 692 | int ocfs2_num_free_extents(struct ocfs2_super *osb, |
370 | struct inode *inode, | 693 | struct inode *inode, |
371 | struct ocfs2_dinode *fe) | 694 | struct ocfs2_extent_tree *et) |
372 | { | 695 | { |
373 | int retval; | 696 | int retval; |
374 | struct ocfs2_extent_list *el; | 697 | struct ocfs2_extent_list *el = NULL; |
375 | struct ocfs2_extent_block *eb; | 698 | struct ocfs2_extent_block *eb; |
376 | struct buffer_head *eb_bh = NULL; | 699 | struct buffer_head *eb_bh = NULL; |
700 | u64 last_eb_blk = 0; | ||
377 | 701 | ||
378 | mlog_entry_void(); | 702 | mlog_entry_void(); |
379 | 703 | ||
380 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 704 | el = et->et_root_el; |
381 | OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); | 705 | last_eb_blk = ocfs2_et_get_last_eb_blk(et); |
382 | retval = -EIO; | ||
383 | goto bail; | ||
384 | } | ||
385 | 706 | ||
386 | if (fe->i_last_eb_blk) { | 707 | if (last_eb_blk) { |
387 | retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), | 708 | retval = ocfs2_read_block(inode, last_eb_blk, |
388 | &eb_bh, OCFS2_BH_CACHED, inode); | 709 | &eb_bh); |
389 | if (retval < 0) { | 710 | if (retval < 0) { |
390 | mlog_errno(retval); | 711 | mlog_errno(retval); |
391 | goto bail; | 712 | goto bail; |
392 | } | 713 | } |
393 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | 714 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; |
394 | el = &eb->h_list; | 715 | el = &eb->h_list; |
395 | } else | 716 | } |
396 | el = &fe->id2.i_list; | ||
397 | 717 | ||
398 | BUG_ON(el->l_tree_depth != 0); | 718 | BUG_ON(el->l_tree_depth != 0); |
399 | 719 | ||
400 | retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec); | 720 | retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec); |
401 | bail: | 721 | bail: |
402 | if (eb_bh) | 722 | brelse(eb_bh); |
403 | brelse(eb_bh); | ||
404 | 723 | ||
405 | mlog_exit(retval); | 724 | mlog_exit(retval); |
406 | return retval; | 725 | return retval; |
@@ -486,8 +805,7 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, | |||
486 | bail: | 805 | bail: |
487 | if (status < 0) { | 806 | if (status < 0) { |
488 | for(i = 0; i < wanted; i++) { | 807 | for(i = 0; i < wanted; i++) { |
489 | if (bhs[i]) | 808 | brelse(bhs[i]); |
490 | brelse(bhs[i]); | ||
491 | bhs[i] = NULL; | 809 | bhs[i] = NULL; |
492 | } | 810 | } |
493 | } | 811 | } |
@@ -531,7 +849,7 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el) | |||
531 | static int ocfs2_add_branch(struct ocfs2_super *osb, | 849 | static int ocfs2_add_branch(struct ocfs2_super *osb, |
532 | handle_t *handle, | 850 | handle_t *handle, |
533 | struct inode *inode, | 851 | struct inode *inode, |
534 | struct buffer_head *fe_bh, | 852 | struct ocfs2_extent_tree *et, |
535 | struct buffer_head *eb_bh, | 853 | struct buffer_head *eb_bh, |
536 | struct buffer_head **last_eb_bh, | 854 | struct buffer_head **last_eb_bh, |
537 | struct ocfs2_alloc_context *meta_ac) | 855 | struct ocfs2_alloc_context *meta_ac) |
@@ -540,7 +858,6 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
540 | u64 next_blkno, new_last_eb_blk; | 858 | u64 next_blkno, new_last_eb_blk; |
541 | struct buffer_head *bh; | 859 | struct buffer_head *bh; |
542 | struct buffer_head **new_eb_bhs = NULL; | 860 | struct buffer_head **new_eb_bhs = NULL; |
543 | struct ocfs2_dinode *fe; | ||
544 | struct ocfs2_extent_block *eb; | 861 | struct ocfs2_extent_block *eb; |
545 | struct ocfs2_extent_list *eb_el; | 862 | struct ocfs2_extent_list *eb_el; |
546 | struct ocfs2_extent_list *el; | 863 | struct ocfs2_extent_list *el; |
@@ -550,13 +867,11 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
550 | 867 | ||
551 | BUG_ON(!last_eb_bh || !*last_eb_bh); | 868 | BUG_ON(!last_eb_bh || !*last_eb_bh); |
552 | 869 | ||
553 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | ||
554 | |||
555 | if (eb_bh) { | 870 | if (eb_bh) { |
556 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | 871 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; |
557 | el = &eb->h_list; | 872 | el = &eb->h_list; |
558 | } else | 873 | } else |
559 | el = &fe->id2.i_list; | 874 | el = et->et_root_el; |
560 | 875 | ||
561 | /* we never add a branch to a leaf. */ | 876 | /* we never add a branch to a leaf. */ |
562 | BUG_ON(!el->l_tree_depth); | 877 | BUG_ON(!el->l_tree_depth); |
@@ -646,7 +961,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
646 | mlog_errno(status); | 961 | mlog_errno(status); |
647 | goto bail; | 962 | goto bail; |
648 | } | 963 | } |
649 | status = ocfs2_journal_access(handle, inode, fe_bh, | 964 | status = ocfs2_journal_access(handle, inode, et->et_root_bh, |
650 | OCFS2_JOURNAL_ACCESS_WRITE); | 965 | OCFS2_JOURNAL_ACCESS_WRITE); |
651 | if (status < 0) { | 966 | if (status < 0) { |
652 | mlog_errno(status); | 967 | mlog_errno(status); |
@@ -662,7 +977,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
662 | } | 977 | } |
663 | 978 | ||
664 | /* Link the new branch into the rest of the tree (el will | 979 | /* Link the new branch into the rest of the tree (el will |
665 | * either be on the fe, or the extent block passed in. */ | 980 | * either be on the root_bh, or the extent block passed in. */ |
666 | i = le16_to_cpu(el->l_next_free_rec); | 981 | i = le16_to_cpu(el->l_next_free_rec); |
667 | el->l_recs[i].e_blkno = cpu_to_le64(next_blkno); | 982 | el->l_recs[i].e_blkno = cpu_to_le64(next_blkno); |
668 | el->l_recs[i].e_cpos = cpu_to_le32(new_cpos); | 983 | el->l_recs[i].e_cpos = cpu_to_le32(new_cpos); |
@@ -671,7 +986,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
671 | 986 | ||
672 | /* fe needs a new last extent block pointer, as does the | 987 | /* fe needs a new last extent block pointer, as does the |
673 | * next_leaf on the previously last-extent-block. */ | 988 | * next_leaf on the previously last-extent-block. */ |
674 | fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk); | 989 | ocfs2_et_set_last_eb_blk(et, new_last_eb_blk); |
675 | 990 | ||
676 | eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; | 991 | eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; |
677 | eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); | 992 | eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); |
@@ -679,7 +994,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
679 | status = ocfs2_journal_dirty(handle, *last_eb_bh); | 994 | status = ocfs2_journal_dirty(handle, *last_eb_bh); |
680 | if (status < 0) | 995 | if (status < 0) |
681 | mlog_errno(status); | 996 | mlog_errno(status); |
682 | status = ocfs2_journal_dirty(handle, fe_bh); | 997 | status = ocfs2_journal_dirty(handle, et->et_root_bh); |
683 | if (status < 0) | 998 | if (status < 0) |
684 | mlog_errno(status); | 999 | mlog_errno(status); |
685 | if (eb_bh) { | 1000 | if (eb_bh) { |
@@ -700,8 +1015,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
700 | bail: | 1015 | bail: |
701 | if (new_eb_bhs) { | 1016 | if (new_eb_bhs) { |
702 | for (i = 0; i < new_blocks; i++) | 1017 | for (i = 0; i < new_blocks; i++) |
703 | if (new_eb_bhs[i]) | 1018 | brelse(new_eb_bhs[i]); |
704 | brelse(new_eb_bhs[i]); | ||
705 | kfree(new_eb_bhs); | 1019 | kfree(new_eb_bhs); |
706 | } | 1020 | } |
707 | 1021 | ||
@@ -717,16 +1031,15 @@ bail: | |||
717 | static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | 1031 | static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, |
718 | handle_t *handle, | 1032 | handle_t *handle, |
719 | struct inode *inode, | 1033 | struct inode *inode, |
720 | struct buffer_head *fe_bh, | 1034 | struct ocfs2_extent_tree *et, |
721 | struct ocfs2_alloc_context *meta_ac, | 1035 | struct ocfs2_alloc_context *meta_ac, |
722 | struct buffer_head **ret_new_eb_bh) | 1036 | struct buffer_head **ret_new_eb_bh) |
723 | { | 1037 | { |
724 | int status, i; | 1038 | int status, i; |
725 | u32 new_clusters; | 1039 | u32 new_clusters; |
726 | struct buffer_head *new_eb_bh = NULL; | 1040 | struct buffer_head *new_eb_bh = NULL; |
727 | struct ocfs2_dinode *fe; | ||
728 | struct ocfs2_extent_block *eb; | 1041 | struct ocfs2_extent_block *eb; |
729 | struct ocfs2_extent_list *fe_el; | 1042 | struct ocfs2_extent_list *root_el; |
730 | struct ocfs2_extent_list *eb_el; | 1043 | struct ocfs2_extent_list *eb_el; |
731 | 1044 | ||
732 | mlog_entry_void(); | 1045 | mlog_entry_void(); |
@@ -746,8 +1059,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | |||
746 | } | 1059 | } |
747 | 1060 | ||
748 | eb_el = &eb->h_list; | 1061 | eb_el = &eb->h_list; |
749 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 1062 | root_el = et->et_root_el; |
750 | fe_el = &fe->id2.i_list; | ||
751 | 1063 | ||
752 | status = ocfs2_journal_access(handle, inode, new_eb_bh, | 1064 | status = ocfs2_journal_access(handle, inode, new_eb_bh, |
753 | OCFS2_JOURNAL_ACCESS_CREATE); | 1065 | OCFS2_JOURNAL_ACCESS_CREATE); |
@@ -756,11 +1068,11 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | |||
756 | goto bail; | 1068 | goto bail; |
757 | } | 1069 | } |
758 | 1070 | ||
759 | /* copy the fe data into the new extent block */ | 1071 | /* copy the root extent list data into the new extent block */ |
760 | eb_el->l_tree_depth = fe_el->l_tree_depth; | 1072 | eb_el->l_tree_depth = root_el->l_tree_depth; |
761 | eb_el->l_next_free_rec = fe_el->l_next_free_rec; | 1073 | eb_el->l_next_free_rec = root_el->l_next_free_rec; |
762 | for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++) | 1074 | for (i = 0; i < le16_to_cpu(root_el->l_next_free_rec); i++) |
763 | eb_el->l_recs[i] = fe_el->l_recs[i]; | 1075 | eb_el->l_recs[i] = root_el->l_recs[i]; |
764 | 1076 | ||
765 | status = ocfs2_journal_dirty(handle, new_eb_bh); | 1077 | status = ocfs2_journal_dirty(handle, new_eb_bh); |
766 | if (status < 0) { | 1078 | if (status < 0) { |
@@ -768,7 +1080,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | |||
768 | goto bail; | 1080 | goto bail; |
769 | } | 1081 | } |
770 | 1082 | ||
771 | status = ocfs2_journal_access(handle, inode, fe_bh, | 1083 | status = ocfs2_journal_access(handle, inode, et->et_root_bh, |
772 | OCFS2_JOURNAL_ACCESS_WRITE); | 1084 | OCFS2_JOURNAL_ACCESS_WRITE); |
773 | if (status < 0) { | 1085 | if (status < 0) { |
774 | mlog_errno(status); | 1086 | mlog_errno(status); |
@@ -777,21 +1089,21 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | |||
777 | 1089 | ||
778 | new_clusters = ocfs2_sum_rightmost_rec(eb_el); | 1090 | new_clusters = ocfs2_sum_rightmost_rec(eb_el); |
779 | 1091 | ||
780 | /* update fe now */ | 1092 | /* update root_bh now */ |
781 | le16_add_cpu(&fe_el->l_tree_depth, 1); | 1093 | le16_add_cpu(&root_el->l_tree_depth, 1); |
782 | fe_el->l_recs[0].e_cpos = 0; | 1094 | root_el->l_recs[0].e_cpos = 0; |
783 | fe_el->l_recs[0].e_blkno = eb->h_blkno; | 1095 | root_el->l_recs[0].e_blkno = eb->h_blkno; |
784 | fe_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters); | 1096 | root_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters); |
785 | for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++) | 1097 | for (i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++) |
786 | memset(&fe_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec)); | 1098 | memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec)); |
787 | fe_el->l_next_free_rec = cpu_to_le16(1); | 1099 | root_el->l_next_free_rec = cpu_to_le16(1); |
788 | 1100 | ||
789 | /* If this is our 1st tree depth shift, then last_eb_blk | 1101 | /* If this is our 1st tree depth shift, then last_eb_blk |
790 | * becomes the allocated extent block */ | 1102 | * becomes the allocated extent block */ |
791 | if (fe_el->l_tree_depth == cpu_to_le16(1)) | 1103 | if (root_el->l_tree_depth == cpu_to_le16(1)) |
792 | fe->i_last_eb_blk = eb->h_blkno; | 1104 | ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); |
793 | 1105 | ||
794 | status = ocfs2_journal_dirty(handle, fe_bh); | 1106 | status = ocfs2_journal_dirty(handle, et->et_root_bh); |
795 | if (status < 0) { | 1107 | if (status < 0) { |
796 | mlog_errno(status); | 1108 | mlog_errno(status); |
797 | goto bail; | 1109 | goto bail; |
@@ -801,8 +1113,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | |||
801 | new_eb_bh = NULL; | 1113 | new_eb_bh = NULL; |
802 | status = 0; | 1114 | status = 0; |
803 | bail: | 1115 | bail: |
804 | if (new_eb_bh) | 1116 | brelse(new_eb_bh); |
805 | brelse(new_eb_bh); | ||
806 | 1117 | ||
807 | mlog_exit(status); | 1118 | mlog_exit(status); |
808 | return status; | 1119 | return status; |
@@ -817,22 +1128,21 @@ bail: | |||
817 | * 1) a lowest extent block is found, then we pass it back in | 1128 | * 1) a lowest extent block is found, then we pass it back in |
818 | * *lowest_eb_bh and return '0' | 1129 | * *lowest_eb_bh and return '0' |
819 | * | 1130 | * |
820 | * 2) the search fails to find anything, but the dinode has room. We | 1131 | * 2) the search fails to find anything, but the root_el has room. We |
821 | * pass NULL back in *lowest_eb_bh, but still return '0' | 1132 | * pass NULL back in *lowest_eb_bh, but still return '0' |
822 | * | 1133 | * |
823 | * 3) the search fails to find anything AND the dinode is full, in | 1134 | * 3) the search fails to find anything AND the root_el is full, in |
824 | * which case we return > 0 | 1135 | * which case we return > 0 |
825 | * | 1136 | * |
826 | * return status < 0 indicates an error. | 1137 | * return status < 0 indicates an error. |
827 | */ | 1138 | */ |
828 | static int ocfs2_find_branch_target(struct ocfs2_super *osb, | 1139 | static int ocfs2_find_branch_target(struct ocfs2_super *osb, |
829 | struct inode *inode, | 1140 | struct inode *inode, |
830 | struct buffer_head *fe_bh, | 1141 | struct ocfs2_extent_tree *et, |
831 | struct buffer_head **target_bh) | 1142 | struct buffer_head **target_bh) |
832 | { | 1143 | { |
833 | int status = 0, i; | 1144 | int status = 0, i; |
834 | u64 blkno; | 1145 | u64 blkno; |
835 | struct ocfs2_dinode *fe; | ||
836 | struct ocfs2_extent_block *eb; | 1146 | struct ocfs2_extent_block *eb; |
837 | struct ocfs2_extent_list *el; | 1147 | struct ocfs2_extent_list *el; |
838 | struct buffer_head *bh = NULL; | 1148 | struct buffer_head *bh = NULL; |
@@ -842,8 +1152,7 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb, | |||
842 | 1152 | ||
843 | *target_bh = NULL; | 1153 | *target_bh = NULL; |
844 | 1154 | ||
845 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 1155 | el = et->et_root_el; |
846 | el = &fe->id2.i_list; | ||
847 | 1156 | ||
848 | while(le16_to_cpu(el->l_tree_depth) > 1) { | 1157 | while(le16_to_cpu(el->l_tree_depth) > 1) { |
849 | if (le16_to_cpu(el->l_next_free_rec) == 0) { | 1158 | if (le16_to_cpu(el->l_next_free_rec) == 0) { |
@@ -864,13 +1173,10 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb, | |||
864 | goto bail; | 1173 | goto bail; |
865 | } | 1174 | } |
866 | 1175 | ||
867 | if (bh) { | 1176 | brelse(bh); |
868 | brelse(bh); | 1177 | bh = NULL; |
869 | bh = NULL; | ||
870 | } | ||
871 | 1178 | ||
872 | status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED, | 1179 | status = ocfs2_read_block(inode, blkno, &bh); |
873 | inode); | ||
874 | if (status < 0) { | 1180 | if (status < 0) { |
875 | mlog_errno(status); | 1181 | mlog_errno(status); |
876 | goto bail; | 1182 | goto bail; |
@@ -886,8 +1192,7 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb, | |||
886 | 1192 | ||
887 | if (le16_to_cpu(el->l_next_free_rec) < | 1193 | if (le16_to_cpu(el->l_next_free_rec) < |
888 | le16_to_cpu(el->l_count)) { | 1194 | le16_to_cpu(el->l_count)) { |
889 | if (lowest_bh) | 1195 | brelse(lowest_bh); |
890 | brelse(lowest_bh); | ||
891 | lowest_bh = bh; | 1196 | lowest_bh = bh; |
892 | get_bh(lowest_bh); | 1197 | get_bh(lowest_bh); |
893 | } | 1198 | } |
@@ -895,14 +1200,13 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb, | |||
895 | 1200 | ||
896 | /* If we didn't find one and the fe doesn't have any room, | 1201 | /* If we didn't find one and the fe doesn't have any room, |
897 | * then return '1' */ | 1202 | * then return '1' */ |
898 | if (!lowest_bh | 1203 | el = et->et_root_el; |
899 | && (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count)) | 1204 | if (!lowest_bh && (el->l_next_free_rec == el->l_count)) |
900 | status = 1; | 1205 | status = 1; |
901 | 1206 | ||
902 | *target_bh = lowest_bh; | 1207 | *target_bh = lowest_bh; |
903 | bail: | 1208 | bail: |
904 | if (bh) | 1209 | brelse(bh); |
905 | brelse(bh); | ||
906 | 1210 | ||
907 | mlog_exit(status); | 1211 | mlog_exit(status); |
908 | return status; | 1212 | return status; |
@@ -919,19 +1223,19 @@ bail: | |||
919 | * *last_eb_bh will be updated by ocfs2_add_branch(). | 1223 | * *last_eb_bh will be updated by ocfs2_add_branch(). |
920 | */ | 1224 | */ |
921 | static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, | 1225 | static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, |
922 | struct buffer_head *di_bh, int *final_depth, | 1226 | struct ocfs2_extent_tree *et, int *final_depth, |
923 | struct buffer_head **last_eb_bh, | 1227 | struct buffer_head **last_eb_bh, |
924 | struct ocfs2_alloc_context *meta_ac) | 1228 | struct ocfs2_alloc_context *meta_ac) |
925 | { | 1229 | { |
926 | int ret, shift; | 1230 | int ret, shift; |
927 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 1231 | struct ocfs2_extent_list *el = et->et_root_el; |
928 | int depth = le16_to_cpu(di->id2.i_list.l_tree_depth); | 1232 | int depth = le16_to_cpu(el->l_tree_depth); |
929 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1233 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
930 | struct buffer_head *bh = NULL; | 1234 | struct buffer_head *bh = NULL; |
931 | 1235 | ||
932 | BUG_ON(meta_ac == NULL); | 1236 | BUG_ON(meta_ac == NULL); |
933 | 1237 | ||
934 | shift = ocfs2_find_branch_target(osb, inode, di_bh, &bh); | 1238 | shift = ocfs2_find_branch_target(osb, inode, et, &bh); |
935 | if (shift < 0) { | 1239 | if (shift < 0) { |
936 | ret = shift; | 1240 | ret = shift; |
937 | mlog_errno(ret); | 1241 | mlog_errno(ret); |
@@ -948,7 +1252,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, | |||
948 | /* ocfs2_shift_tree_depth will return us a buffer with | 1252 | /* ocfs2_shift_tree_depth will return us a buffer with |
949 | * the new extent block (so we can pass that to | 1253 | * the new extent block (so we can pass that to |
950 | * ocfs2_add_branch). */ | 1254 | * ocfs2_add_branch). */ |
951 | ret = ocfs2_shift_tree_depth(osb, handle, inode, di_bh, | 1255 | ret = ocfs2_shift_tree_depth(osb, handle, inode, et, |
952 | meta_ac, &bh); | 1256 | meta_ac, &bh); |
953 | if (ret < 0) { | 1257 | if (ret < 0) { |
954 | mlog_errno(ret); | 1258 | mlog_errno(ret); |
@@ -975,7 +1279,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, | |||
975 | /* call ocfs2_add_branch to add the final part of the tree with | 1279 | /* call ocfs2_add_branch to add the final part of the tree with |
976 | * the new data. */ | 1280 | * the new data. */ |
977 | mlog(0, "add branch. bh = %p\n", bh); | 1281 | mlog(0, "add branch. bh = %p\n", bh); |
978 | ret = ocfs2_add_branch(osb, handle, inode, di_bh, bh, last_eb_bh, | 1282 | ret = ocfs2_add_branch(osb, handle, inode, et, bh, last_eb_bh, |
979 | meta_ac); | 1283 | meta_ac); |
980 | if (ret < 0) { | 1284 | if (ret < 0) { |
981 | mlog_errno(ret); | 1285 | mlog_errno(ret); |
@@ -990,15 +1294,6 @@ out: | |||
990 | } | 1294 | } |
991 | 1295 | ||
992 | /* | 1296 | /* |
993 | * This is only valid for leaf nodes, which are the only ones that can | ||
994 | * have empty extents anyway. | ||
995 | */ | ||
996 | static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) | ||
997 | { | ||
998 | return !rec->e_leaf_clusters; | ||
999 | } | ||
1000 | |||
1001 | /* | ||
1002 | * This function will discard the rightmost extent record. | 1297 | * This function will discard the rightmost extent record. |
1003 | */ | 1298 | */ |
1004 | static void ocfs2_shift_records_right(struct ocfs2_extent_list *el) | 1299 | static void ocfs2_shift_records_right(struct ocfs2_extent_list *el) |
@@ -1245,8 +1540,7 @@ static int __ocfs2_find_path(struct inode *inode, | |||
1245 | 1540 | ||
1246 | brelse(bh); | 1541 | brelse(bh); |
1247 | bh = NULL; | 1542 | bh = NULL; |
1248 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno, | 1543 | ret = ocfs2_read_block(inode, blkno, &bh); |
1249 | &bh, OCFS2_BH_CACHED, inode); | ||
1250 | if (ret) { | 1544 | if (ret) { |
1251 | mlog_errno(ret); | 1545 | mlog_errno(ret); |
1252 | goto out; | 1546 | goto out; |
@@ -2067,11 +2361,11 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
2067 | struct ocfs2_path *right_path, | 2361 | struct ocfs2_path *right_path, |
2068 | int subtree_index, | 2362 | int subtree_index, |
2069 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 2363 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
2070 | int *deleted) | 2364 | int *deleted, |
2365 | struct ocfs2_extent_tree *et) | ||
2071 | { | 2366 | { |
2072 | int ret, i, del_right_subtree = 0, right_has_empty = 0; | 2367 | int ret, i, del_right_subtree = 0, right_has_empty = 0; |
2073 | struct buffer_head *root_bh, *di_bh = path_root_bh(right_path); | 2368 | struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path); |
2074 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
2075 | struct ocfs2_extent_list *right_leaf_el, *left_leaf_el; | 2369 | struct ocfs2_extent_list *right_leaf_el, *left_leaf_el; |
2076 | struct ocfs2_extent_block *eb; | 2370 | struct ocfs2_extent_block *eb; |
2077 | 2371 | ||
@@ -2123,7 +2417,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
2123 | * We have to update i_last_eb_blk during the meta | 2417 | * We have to update i_last_eb_blk during the meta |
2124 | * data delete. | 2418 | * data delete. |
2125 | */ | 2419 | */ |
2126 | ret = ocfs2_journal_access(handle, inode, di_bh, | 2420 | ret = ocfs2_journal_access(handle, inode, et_root_bh, |
2127 | OCFS2_JOURNAL_ACCESS_WRITE); | 2421 | OCFS2_JOURNAL_ACCESS_WRITE); |
2128 | if (ret) { | 2422 | if (ret) { |
2129 | mlog_errno(ret); | 2423 | mlog_errno(ret); |
@@ -2198,7 +2492,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
2198 | ocfs2_update_edge_lengths(inode, handle, left_path); | 2492 | ocfs2_update_edge_lengths(inode, handle, left_path); |
2199 | 2493 | ||
2200 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; | 2494 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; |
2201 | di->i_last_eb_blk = eb->h_blkno; | 2495 | ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); |
2202 | 2496 | ||
2203 | /* | 2497 | /* |
2204 | * Removal of the extent in the left leaf was skipped | 2498 | * Removal of the extent in the left leaf was skipped |
@@ -2208,7 +2502,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
2208 | if (right_has_empty) | 2502 | if (right_has_empty) |
2209 | ocfs2_remove_empty_extent(left_leaf_el); | 2503 | ocfs2_remove_empty_extent(left_leaf_el); |
2210 | 2504 | ||
2211 | ret = ocfs2_journal_dirty(handle, di_bh); | 2505 | ret = ocfs2_journal_dirty(handle, et_root_bh); |
2212 | if (ret) | 2506 | if (ret) |
2213 | mlog_errno(ret); | 2507 | mlog_errno(ret); |
2214 | 2508 | ||
@@ -2331,7 +2625,8 @@ static int __ocfs2_rotate_tree_left(struct inode *inode, | |||
2331 | handle_t *handle, int orig_credits, | 2625 | handle_t *handle, int orig_credits, |
2332 | struct ocfs2_path *path, | 2626 | struct ocfs2_path *path, |
2333 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 2627 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
2334 | struct ocfs2_path **empty_extent_path) | 2628 | struct ocfs2_path **empty_extent_path, |
2629 | struct ocfs2_extent_tree *et) | ||
2335 | { | 2630 | { |
2336 | int ret, subtree_root, deleted; | 2631 | int ret, subtree_root, deleted; |
2337 | u32 right_cpos; | 2632 | u32 right_cpos; |
@@ -2404,7 +2699,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode, | |||
2404 | 2699 | ||
2405 | ret = ocfs2_rotate_subtree_left(inode, handle, left_path, | 2700 | ret = ocfs2_rotate_subtree_left(inode, handle, left_path, |
2406 | right_path, subtree_root, | 2701 | right_path, subtree_root, |
2407 | dealloc, &deleted); | 2702 | dealloc, &deleted, et); |
2408 | if (ret == -EAGAIN) { | 2703 | if (ret == -EAGAIN) { |
2409 | /* | 2704 | /* |
2410 | * The rotation has to temporarily stop due to | 2705 | * The rotation has to temporarily stop due to |
@@ -2447,29 +2742,20 @@ out: | |||
2447 | } | 2742 | } |
2448 | 2743 | ||
2449 | static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, | 2744 | static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, |
2450 | struct ocfs2_path *path, | 2745 | struct ocfs2_path *path, |
2451 | struct ocfs2_cached_dealloc_ctxt *dealloc) | 2746 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
2747 | struct ocfs2_extent_tree *et) | ||
2452 | { | 2748 | { |
2453 | int ret, subtree_index; | 2749 | int ret, subtree_index; |
2454 | u32 cpos; | 2750 | u32 cpos; |
2455 | struct ocfs2_path *left_path = NULL; | 2751 | struct ocfs2_path *left_path = NULL; |
2456 | struct ocfs2_dinode *di; | ||
2457 | struct ocfs2_extent_block *eb; | 2752 | struct ocfs2_extent_block *eb; |
2458 | struct ocfs2_extent_list *el; | 2753 | struct ocfs2_extent_list *el; |
2459 | 2754 | ||
2460 | /* | ||
2461 | * XXX: This code assumes that the root is an inode, which is | ||
2462 | * true for now but may change as tree code gets generic. | ||
2463 | */ | ||
2464 | di = (struct ocfs2_dinode *)path_root_bh(path)->b_data; | ||
2465 | if (!OCFS2_IS_VALID_DINODE(di)) { | ||
2466 | ret = -EIO; | ||
2467 | ocfs2_error(inode->i_sb, | ||
2468 | "Inode %llu has invalid path root", | ||
2469 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
2470 | goto out; | ||
2471 | } | ||
2472 | 2755 | ||
2756 | ret = ocfs2_et_sanity_check(inode, et); | ||
2757 | if (ret) | ||
2758 | goto out; | ||
2473 | /* | 2759 | /* |
2474 | * There's two ways we handle this depending on | 2760 | * There's two ways we handle this depending on |
2475 | * whether path is the only existing one. | 2761 | * whether path is the only existing one. |
@@ -2526,7 +2812,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, | |||
2526 | ocfs2_update_edge_lengths(inode, handle, left_path); | 2812 | ocfs2_update_edge_lengths(inode, handle, left_path); |
2527 | 2813 | ||
2528 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; | 2814 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; |
2529 | di->i_last_eb_blk = eb->h_blkno; | 2815 | ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); |
2530 | } else { | 2816 | } else { |
2531 | /* | 2817 | /* |
2532 | * 'path' is also the leftmost path which | 2818 | * 'path' is also the leftmost path which |
@@ -2537,12 +2823,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, | |||
2537 | */ | 2823 | */ |
2538 | ocfs2_unlink_path(inode, handle, dealloc, path, 1); | 2824 | ocfs2_unlink_path(inode, handle, dealloc, path, 1); |
2539 | 2825 | ||
2540 | el = &di->id2.i_list; | 2826 | el = et->et_root_el; |
2541 | el->l_tree_depth = 0; | 2827 | el->l_tree_depth = 0; |
2542 | el->l_next_free_rec = 0; | 2828 | el->l_next_free_rec = 0; |
2543 | memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); | 2829 | memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); |
2544 | 2830 | ||
2545 | di->i_last_eb_blk = 0; | 2831 | ocfs2_et_set_last_eb_blk(et, 0); |
2546 | } | 2832 | } |
2547 | 2833 | ||
2548 | ocfs2_journal_dirty(handle, path_root_bh(path)); | 2834 | ocfs2_journal_dirty(handle, path_root_bh(path)); |
@@ -2570,7 +2856,8 @@ out: | |||
2570 | */ | 2856 | */ |
2571 | static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle, | 2857 | static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle, |
2572 | struct ocfs2_path *path, | 2858 | struct ocfs2_path *path, |
2573 | struct ocfs2_cached_dealloc_ctxt *dealloc) | 2859 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
2860 | struct ocfs2_extent_tree *et) | ||
2574 | { | 2861 | { |
2575 | int ret, orig_credits = handle->h_buffer_credits; | 2862 | int ret, orig_credits = handle->h_buffer_credits; |
2576 | struct ocfs2_path *tmp_path = NULL, *restart_path = NULL; | 2863 | struct ocfs2_path *tmp_path = NULL, *restart_path = NULL; |
@@ -2584,7 +2871,7 @@ static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle, | |||
2584 | if (path->p_tree_depth == 0) { | 2871 | if (path->p_tree_depth == 0) { |
2585 | rightmost_no_delete: | 2872 | rightmost_no_delete: |
2586 | /* | 2873 | /* |
2587 | * In-inode extents. This is trivially handled, so do | 2874 | * Inline extents. This is trivially handled, so do |
2588 | * it up front. | 2875 | * it up front. |
2589 | */ | 2876 | */ |
2590 | ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, | 2877 | ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, |
@@ -2638,7 +2925,7 @@ rightmost_no_delete: | |||
2638 | */ | 2925 | */ |
2639 | 2926 | ||
2640 | ret = ocfs2_remove_rightmost_path(inode, handle, path, | 2927 | ret = ocfs2_remove_rightmost_path(inode, handle, path, |
2641 | dealloc); | 2928 | dealloc, et); |
2642 | if (ret) | 2929 | if (ret) |
2643 | mlog_errno(ret); | 2930 | mlog_errno(ret); |
2644 | goto out; | 2931 | goto out; |
@@ -2650,7 +2937,7 @@ rightmost_no_delete: | |||
2650 | */ | 2937 | */ |
2651 | try_rotate: | 2938 | try_rotate: |
2652 | ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path, | 2939 | ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path, |
2653 | dealloc, &restart_path); | 2940 | dealloc, &restart_path, et); |
2654 | if (ret && ret != -EAGAIN) { | 2941 | if (ret && ret != -EAGAIN) { |
2655 | mlog_errno(ret); | 2942 | mlog_errno(ret); |
2656 | goto out; | 2943 | goto out; |
@@ -2662,7 +2949,7 @@ try_rotate: | |||
2662 | 2949 | ||
2663 | ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, | 2950 | ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, |
2664 | tmp_path, dealloc, | 2951 | tmp_path, dealloc, |
2665 | &restart_path); | 2952 | &restart_path, et); |
2666 | if (ret && ret != -EAGAIN) { | 2953 | if (ret && ret != -EAGAIN) { |
2667 | mlog_errno(ret); | 2954 | mlog_errno(ret); |
2668 | goto out; | 2955 | goto out; |
@@ -2948,6 +3235,7 @@ static int ocfs2_merge_rec_left(struct inode *inode, | |||
2948 | handle_t *handle, | 3235 | handle_t *handle, |
2949 | struct ocfs2_extent_rec *split_rec, | 3236 | struct ocfs2_extent_rec *split_rec, |
2950 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 3237 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
3238 | struct ocfs2_extent_tree *et, | ||
2951 | int index) | 3239 | int index) |
2952 | { | 3240 | { |
2953 | int ret, i, subtree_index = 0, has_empty_extent = 0; | 3241 | int ret, i, subtree_index = 0, has_empty_extent = 0; |
@@ -3068,7 +3356,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, | |||
3068 | le16_to_cpu(el->l_next_free_rec) == 1) { | 3356 | le16_to_cpu(el->l_next_free_rec) == 1) { |
3069 | 3357 | ||
3070 | ret = ocfs2_remove_rightmost_path(inode, handle, | 3358 | ret = ocfs2_remove_rightmost_path(inode, handle, |
3071 | right_path, dealloc); | 3359 | right_path, |
3360 | dealloc, et); | ||
3072 | if (ret) { | 3361 | if (ret) { |
3073 | mlog_errno(ret); | 3362 | mlog_errno(ret); |
3074 | goto out; | 3363 | goto out; |
@@ -3095,7 +3384,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3095 | int split_index, | 3384 | int split_index, |
3096 | struct ocfs2_extent_rec *split_rec, | 3385 | struct ocfs2_extent_rec *split_rec, |
3097 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 3386 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
3098 | struct ocfs2_merge_ctxt *ctxt) | 3387 | struct ocfs2_merge_ctxt *ctxt, |
3388 | struct ocfs2_extent_tree *et) | ||
3099 | 3389 | ||
3100 | { | 3390 | { |
3101 | int ret = 0; | 3391 | int ret = 0; |
@@ -3113,7 +3403,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3113 | * illegal. | 3403 | * illegal. |
3114 | */ | 3404 | */ |
3115 | ret = ocfs2_rotate_tree_left(inode, handle, path, | 3405 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
3116 | dealloc); | 3406 | dealloc, et); |
3117 | if (ret) { | 3407 | if (ret) { |
3118 | mlog_errno(ret); | 3408 | mlog_errno(ret); |
3119 | goto out; | 3409 | goto out; |
@@ -3156,7 +3446,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3156 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); | 3446 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); |
3157 | 3447 | ||
3158 | /* The merge left us with an empty extent, remove it. */ | 3448 | /* The merge left us with an empty extent, remove it. */ |
3159 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); | 3449 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
3450 | dealloc, et); | ||
3160 | if (ret) { | 3451 | if (ret) { |
3161 | mlog_errno(ret); | 3452 | mlog_errno(ret); |
3162 | goto out; | 3453 | goto out; |
@@ -3170,7 +3461,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3170 | */ | 3461 | */ |
3171 | ret = ocfs2_merge_rec_left(inode, path, | 3462 | ret = ocfs2_merge_rec_left(inode, path, |
3172 | handle, rec, | 3463 | handle, rec, |
3173 | dealloc, | 3464 | dealloc, et, |
3174 | split_index); | 3465 | split_index); |
3175 | 3466 | ||
3176 | if (ret) { | 3467 | if (ret) { |
@@ -3179,7 +3470,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3179 | } | 3470 | } |
3180 | 3471 | ||
3181 | ret = ocfs2_rotate_tree_left(inode, handle, path, | 3472 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
3182 | dealloc); | 3473 | dealloc, et); |
3183 | /* | 3474 | /* |
3184 | * Error from this last rotate is not critical, so | 3475 | * Error from this last rotate is not critical, so |
3185 | * print but don't bubble it up. | 3476 | * print but don't bubble it up. |
@@ -3199,7 +3490,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3199 | ret = ocfs2_merge_rec_left(inode, | 3490 | ret = ocfs2_merge_rec_left(inode, |
3200 | path, | 3491 | path, |
3201 | handle, split_rec, | 3492 | handle, split_rec, |
3202 | dealloc, | 3493 | dealloc, et, |
3203 | split_index); | 3494 | split_index); |
3204 | if (ret) { | 3495 | if (ret) { |
3205 | mlog_errno(ret); | 3496 | mlog_errno(ret); |
@@ -3222,7 +3513,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3222 | * our leaf. Try to rotate it away. | 3513 | * our leaf. Try to rotate it away. |
3223 | */ | 3514 | */ |
3224 | ret = ocfs2_rotate_tree_left(inode, handle, path, | 3515 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
3225 | dealloc); | 3516 | dealloc, et); |
3226 | if (ret) | 3517 | if (ret) |
3227 | mlog_errno(ret); | 3518 | mlog_errno(ret); |
3228 | ret = 0; | 3519 | ret = 0; |
@@ -3356,16 +3647,6 @@ rotate: | |||
3356 | ocfs2_rotate_leaf(el, insert_rec); | 3647 | ocfs2_rotate_leaf(el, insert_rec); |
3357 | } | 3648 | } |
3358 | 3649 | ||
3359 | static inline void ocfs2_update_dinode_clusters(struct inode *inode, | ||
3360 | struct ocfs2_dinode *di, | ||
3361 | u32 clusters) | ||
3362 | { | ||
3363 | le32_add_cpu(&di->i_clusters, clusters); | ||
3364 | spin_lock(&OCFS2_I(inode)->ip_lock); | ||
3365 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters); | ||
3366 | spin_unlock(&OCFS2_I(inode)->ip_lock); | ||
3367 | } | ||
3368 | |||
3369 | static void ocfs2_adjust_rightmost_records(struct inode *inode, | 3650 | static void ocfs2_adjust_rightmost_records(struct inode *inode, |
3370 | handle_t *handle, | 3651 | handle_t *handle, |
3371 | struct ocfs2_path *path, | 3652 | struct ocfs2_path *path, |
@@ -3567,8 +3848,8 @@ static void ocfs2_split_record(struct inode *inode, | |||
3567 | } | 3848 | } |
3568 | 3849 | ||
3569 | /* | 3850 | /* |
3570 | * This function only does inserts on an allocation b-tree. For dinode | 3851 | * This function only does inserts on an allocation b-tree. For tree |
3571 | * lists, ocfs2_insert_at_leaf() is called directly. | 3852 | * depth = 0, ocfs2_insert_at_leaf() is called directly. |
3572 | * | 3853 | * |
3573 | * right_path is the path we want to do the actual insert | 3854 | * right_path is the path we want to do the actual insert |
3574 | * in. left_path should only be passed in if we need to update that | 3855 | * in. left_path should only be passed in if we need to update that |
@@ -3665,7 +3946,7 @@ out: | |||
3665 | 3946 | ||
3666 | static int ocfs2_do_insert_extent(struct inode *inode, | 3947 | static int ocfs2_do_insert_extent(struct inode *inode, |
3667 | handle_t *handle, | 3948 | handle_t *handle, |
3668 | struct buffer_head *di_bh, | 3949 | struct ocfs2_extent_tree *et, |
3669 | struct ocfs2_extent_rec *insert_rec, | 3950 | struct ocfs2_extent_rec *insert_rec, |
3670 | struct ocfs2_insert_type *type) | 3951 | struct ocfs2_insert_type *type) |
3671 | { | 3952 | { |
@@ -3673,13 +3954,11 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
3673 | u32 cpos; | 3954 | u32 cpos; |
3674 | struct ocfs2_path *right_path = NULL; | 3955 | struct ocfs2_path *right_path = NULL; |
3675 | struct ocfs2_path *left_path = NULL; | 3956 | struct ocfs2_path *left_path = NULL; |
3676 | struct ocfs2_dinode *di; | ||
3677 | struct ocfs2_extent_list *el; | 3957 | struct ocfs2_extent_list *el; |
3678 | 3958 | ||
3679 | di = (struct ocfs2_dinode *) di_bh->b_data; | 3959 | el = et->et_root_el; |
3680 | el = &di->id2.i_list; | ||
3681 | 3960 | ||
3682 | ret = ocfs2_journal_access(handle, inode, di_bh, | 3961 | ret = ocfs2_journal_access(handle, inode, et->et_root_bh, |
3683 | OCFS2_JOURNAL_ACCESS_WRITE); | 3962 | OCFS2_JOURNAL_ACCESS_WRITE); |
3684 | if (ret) { | 3963 | if (ret) { |
3685 | mlog_errno(ret); | 3964 | mlog_errno(ret); |
@@ -3691,7 +3970,7 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
3691 | goto out_update_clusters; | 3970 | goto out_update_clusters; |
3692 | } | 3971 | } |
3693 | 3972 | ||
3694 | right_path = ocfs2_new_inode_path(di_bh); | 3973 | right_path = ocfs2_new_path(et->et_root_bh, et->et_root_el); |
3695 | if (!right_path) { | 3974 | if (!right_path) { |
3696 | ret = -ENOMEM; | 3975 | ret = -ENOMEM; |
3697 | mlog_errno(ret); | 3976 | mlog_errno(ret); |
@@ -3741,7 +4020,7 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
3741 | * ocfs2_rotate_tree_right() might have extended the | 4020 | * ocfs2_rotate_tree_right() might have extended the |
3742 | * transaction without re-journaling our tree root. | 4021 | * transaction without re-journaling our tree root. |
3743 | */ | 4022 | */ |
3744 | ret = ocfs2_journal_access(handle, inode, di_bh, | 4023 | ret = ocfs2_journal_access(handle, inode, et->et_root_bh, |
3745 | OCFS2_JOURNAL_ACCESS_WRITE); | 4024 | OCFS2_JOURNAL_ACCESS_WRITE); |
3746 | if (ret) { | 4025 | if (ret) { |
3747 | mlog_errno(ret); | 4026 | mlog_errno(ret); |
@@ -3766,10 +4045,10 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
3766 | 4045 | ||
3767 | out_update_clusters: | 4046 | out_update_clusters: |
3768 | if (type->ins_split == SPLIT_NONE) | 4047 | if (type->ins_split == SPLIT_NONE) |
3769 | ocfs2_update_dinode_clusters(inode, di, | 4048 | ocfs2_et_update_clusters(inode, et, |
3770 | le16_to_cpu(insert_rec->e_leaf_clusters)); | 4049 | le16_to_cpu(insert_rec->e_leaf_clusters)); |
3771 | 4050 | ||
3772 | ret = ocfs2_journal_dirty(handle, di_bh); | 4051 | ret = ocfs2_journal_dirty(handle, et->et_root_bh); |
3773 | if (ret) | 4052 | if (ret) |
3774 | mlog_errno(ret); | 4053 | mlog_errno(ret); |
3775 | 4054 | ||
@@ -3899,7 +4178,8 @@ out: | |||
3899 | static void ocfs2_figure_contig_type(struct inode *inode, | 4178 | static void ocfs2_figure_contig_type(struct inode *inode, |
3900 | struct ocfs2_insert_type *insert, | 4179 | struct ocfs2_insert_type *insert, |
3901 | struct ocfs2_extent_list *el, | 4180 | struct ocfs2_extent_list *el, |
3902 | struct ocfs2_extent_rec *insert_rec) | 4181 | struct ocfs2_extent_rec *insert_rec, |
4182 | struct ocfs2_extent_tree *et) | ||
3903 | { | 4183 | { |
3904 | int i; | 4184 | int i; |
3905 | enum ocfs2_contig_type contig_type = CONTIG_NONE; | 4185 | enum ocfs2_contig_type contig_type = CONTIG_NONE; |
@@ -3915,6 +4195,21 @@ static void ocfs2_figure_contig_type(struct inode *inode, | |||
3915 | } | 4195 | } |
3916 | } | 4196 | } |
3917 | insert->ins_contig = contig_type; | 4197 | insert->ins_contig = contig_type; |
4198 | |||
4199 | if (insert->ins_contig != CONTIG_NONE) { | ||
4200 | struct ocfs2_extent_rec *rec = | ||
4201 | &el->l_recs[insert->ins_contig_index]; | ||
4202 | unsigned int len = le16_to_cpu(rec->e_leaf_clusters) + | ||
4203 | le16_to_cpu(insert_rec->e_leaf_clusters); | ||
4204 | |||
4205 | /* | ||
4206 | * Caller might want us to limit the size of extents, don't | ||
4207 | * calculate contiguousness if we might exceed that limit. | ||
4208 | */ | ||
4209 | if (et->et_max_leaf_clusters && | ||
4210 | (len > et->et_max_leaf_clusters)) | ||
4211 | insert->ins_contig = CONTIG_NONE; | ||
4212 | } | ||
3918 | } | 4213 | } |
3919 | 4214 | ||
3920 | /* | 4215 | /* |
@@ -3923,8 +4218,8 @@ static void ocfs2_figure_contig_type(struct inode *inode, | |||
3923 | * ocfs2_figure_appending_type() will figure out whether we'll have to | 4218 | * ocfs2_figure_appending_type() will figure out whether we'll have to |
3924 | * insert at the tail of the rightmost leaf. | 4219 | * insert at the tail of the rightmost leaf. |
3925 | * | 4220 | * |
3926 | * This should also work against the dinode list for tree's with 0 | 4221 | * This should also work against the root extent list for tree's with 0 |
3927 | * depth. If we consider the dinode list to be the rightmost leaf node | 4222 | * depth. If we consider the root extent list to be the rightmost leaf node |
3928 | * then the logic here makes sense. | 4223 | * then the logic here makes sense. |
3929 | */ | 4224 | */ |
3930 | static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert, | 4225 | static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert, |
@@ -3975,14 +4270,13 @@ set_tail_append: | |||
3975 | * structure. | 4270 | * structure. |
3976 | */ | 4271 | */ |
3977 | static int ocfs2_figure_insert_type(struct inode *inode, | 4272 | static int ocfs2_figure_insert_type(struct inode *inode, |
3978 | struct buffer_head *di_bh, | 4273 | struct ocfs2_extent_tree *et, |
3979 | struct buffer_head **last_eb_bh, | 4274 | struct buffer_head **last_eb_bh, |
3980 | struct ocfs2_extent_rec *insert_rec, | 4275 | struct ocfs2_extent_rec *insert_rec, |
3981 | int *free_records, | 4276 | int *free_records, |
3982 | struct ocfs2_insert_type *insert) | 4277 | struct ocfs2_insert_type *insert) |
3983 | { | 4278 | { |
3984 | int ret; | 4279 | int ret; |
3985 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
3986 | struct ocfs2_extent_block *eb; | 4280 | struct ocfs2_extent_block *eb; |
3987 | struct ocfs2_extent_list *el; | 4281 | struct ocfs2_extent_list *el; |
3988 | struct ocfs2_path *path = NULL; | 4282 | struct ocfs2_path *path = NULL; |
@@ -3990,7 +4284,7 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
3990 | 4284 | ||
3991 | insert->ins_split = SPLIT_NONE; | 4285 | insert->ins_split = SPLIT_NONE; |
3992 | 4286 | ||
3993 | el = &di->id2.i_list; | 4287 | el = et->et_root_el; |
3994 | insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth); | 4288 | insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth); |
3995 | 4289 | ||
3996 | if (el->l_tree_depth) { | 4290 | if (el->l_tree_depth) { |
@@ -4000,9 +4294,7 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
4000 | * ocfs2_figure_insert_type() and ocfs2_add_branch() | 4294 | * ocfs2_figure_insert_type() and ocfs2_add_branch() |
4001 | * may want it later. | 4295 | * may want it later. |
4002 | */ | 4296 | */ |
4003 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 4297 | ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), &bh); |
4004 | le64_to_cpu(di->i_last_eb_blk), &bh, | ||
4005 | OCFS2_BH_CACHED, inode); | ||
4006 | if (ret) { | 4298 | if (ret) { |
4007 | mlog_exit(ret); | 4299 | mlog_exit(ret); |
4008 | goto out; | 4300 | goto out; |
@@ -4023,12 +4315,12 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
4023 | le16_to_cpu(el->l_next_free_rec); | 4315 | le16_to_cpu(el->l_next_free_rec); |
4024 | 4316 | ||
4025 | if (!insert->ins_tree_depth) { | 4317 | if (!insert->ins_tree_depth) { |
4026 | ocfs2_figure_contig_type(inode, insert, el, insert_rec); | 4318 | ocfs2_figure_contig_type(inode, insert, el, insert_rec, et); |
4027 | ocfs2_figure_appending_type(insert, el, insert_rec); | 4319 | ocfs2_figure_appending_type(insert, el, insert_rec); |
4028 | return 0; | 4320 | return 0; |
4029 | } | 4321 | } |
4030 | 4322 | ||
4031 | path = ocfs2_new_inode_path(di_bh); | 4323 | path = ocfs2_new_path(et->et_root_bh, et->et_root_el); |
4032 | if (!path) { | 4324 | if (!path) { |
4033 | ret = -ENOMEM; | 4325 | ret = -ENOMEM; |
4034 | mlog_errno(ret); | 4326 | mlog_errno(ret); |
@@ -4057,7 +4349,7 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
4057 | * into two types of appends: simple record append, or a | 4349 | * into two types of appends: simple record append, or a |
4058 | * rotate inside the tail leaf. | 4350 | * rotate inside the tail leaf. |
4059 | */ | 4351 | */ |
4060 | ocfs2_figure_contig_type(inode, insert, el, insert_rec); | 4352 | ocfs2_figure_contig_type(inode, insert, el, insert_rec, et); |
4061 | 4353 | ||
4062 | /* | 4354 | /* |
4063 | * The insert code isn't quite ready to deal with all cases of | 4355 | * The insert code isn't quite ready to deal with all cases of |
@@ -4078,7 +4370,8 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
4078 | * the case that we're doing a tail append, so maybe we can | 4370 | * the case that we're doing a tail append, so maybe we can |
4079 | * take advantage of that information somehow. | 4371 | * take advantage of that information somehow. |
4080 | */ | 4372 | */ |
4081 | if (le64_to_cpu(di->i_last_eb_blk) == path_leaf_bh(path)->b_blocknr) { | 4373 | if (ocfs2_et_get_last_eb_blk(et) == |
4374 | path_leaf_bh(path)->b_blocknr) { | ||
4082 | /* | 4375 | /* |
4083 | * Ok, ocfs2_find_path() returned us the rightmost | 4376 | * Ok, ocfs2_find_path() returned us the rightmost |
4084 | * tree path. This might be an appending insert. There are | 4377 | * tree path. This might be an appending insert. There are |
@@ -4108,7 +4401,7 @@ out: | |||
4108 | int ocfs2_insert_extent(struct ocfs2_super *osb, | 4401 | int ocfs2_insert_extent(struct ocfs2_super *osb, |
4109 | handle_t *handle, | 4402 | handle_t *handle, |
4110 | struct inode *inode, | 4403 | struct inode *inode, |
4111 | struct buffer_head *fe_bh, | 4404 | struct ocfs2_extent_tree *et, |
4112 | u32 cpos, | 4405 | u32 cpos, |
4113 | u64 start_blk, | 4406 | u64 start_blk, |
4114 | u32 new_clusters, | 4407 | u32 new_clusters, |
@@ -4121,26 +4414,21 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
4121 | struct ocfs2_insert_type insert = {0, }; | 4414 | struct ocfs2_insert_type insert = {0, }; |
4122 | struct ocfs2_extent_rec rec; | 4415 | struct ocfs2_extent_rec rec; |
4123 | 4416 | ||
4124 | BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL); | ||
4125 | |||
4126 | mlog(0, "add %u clusters at position %u to inode %llu\n", | 4417 | mlog(0, "add %u clusters at position %u to inode %llu\n", |
4127 | new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); | 4418 | new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); |
4128 | 4419 | ||
4129 | mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) && | ||
4130 | (OCFS2_I(inode)->ip_clusters != cpos), | ||
4131 | "Device %s, asking for sparse allocation: inode %llu, " | ||
4132 | "cpos %u, clusters %u\n", | ||
4133 | osb->dev_str, | ||
4134 | (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, | ||
4135 | OCFS2_I(inode)->ip_clusters); | ||
4136 | |||
4137 | memset(&rec, 0, sizeof(rec)); | 4420 | memset(&rec, 0, sizeof(rec)); |
4138 | rec.e_cpos = cpu_to_le32(cpos); | 4421 | rec.e_cpos = cpu_to_le32(cpos); |
4139 | rec.e_blkno = cpu_to_le64(start_blk); | 4422 | rec.e_blkno = cpu_to_le64(start_blk); |
4140 | rec.e_leaf_clusters = cpu_to_le16(new_clusters); | 4423 | rec.e_leaf_clusters = cpu_to_le16(new_clusters); |
4141 | rec.e_flags = flags; | 4424 | rec.e_flags = flags; |
4425 | status = ocfs2_et_insert_check(inode, et, &rec); | ||
4426 | if (status) { | ||
4427 | mlog_errno(status); | ||
4428 | goto bail; | ||
4429 | } | ||
4142 | 4430 | ||
4143 | status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, | 4431 | status = ocfs2_figure_insert_type(inode, et, &last_eb_bh, &rec, |
4144 | &free_records, &insert); | 4432 | &free_records, &insert); |
4145 | if (status < 0) { | 4433 | if (status < 0) { |
4146 | mlog_errno(status); | 4434 | mlog_errno(status); |
@@ -4154,7 +4442,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
4154 | free_records, insert.ins_tree_depth); | 4442 | free_records, insert.ins_tree_depth); |
4155 | 4443 | ||
4156 | if (insert.ins_contig == CONTIG_NONE && free_records == 0) { | 4444 | if (insert.ins_contig == CONTIG_NONE && free_records == 0) { |
4157 | status = ocfs2_grow_tree(inode, handle, fe_bh, | 4445 | status = ocfs2_grow_tree(inode, handle, et, |
4158 | &insert.ins_tree_depth, &last_eb_bh, | 4446 | &insert.ins_tree_depth, &last_eb_bh, |
4159 | meta_ac); | 4447 | meta_ac); |
4160 | if (status) { | 4448 | if (status) { |
@@ -4164,17 +4452,124 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
4164 | } | 4452 | } |
4165 | 4453 | ||
4166 | /* Finally, we can add clusters. This might rotate the tree for us. */ | 4454 | /* Finally, we can add clusters. This might rotate the tree for us. */ |
4167 | status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert); | 4455 | status = ocfs2_do_insert_extent(inode, handle, et, &rec, &insert); |
4168 | if (status < 0) | 4456 | if (status < 0) |
4169 | mlog_errno(status); | 4457 | mlog_errno(status); |
4170 | else | 4458 | else if (et->et_ops == &ocfs2_dinode_et_ops) |
4171 | ocfs2_extent_map_insert_rec(inode, &rec); | 4459 | ocfs2_extent_map_insert_rec(inode, &rec); |
4172 | 4460 | ||
4173 | bail: | 4461 | bail: |
4174 | if (last_eb_bh) | 4462 | brelse(last_eb_bh); |
4175 | brelse(last_eb_bh); | 4463 | |
4464 | mlog_exit(status); | ||
4465 | return status; | ||
4466 | } | ||
4467 | |||
4468 | /* | ||
4469 | * Allcate and add clusters into the extent b-tree. | ||
4470 | * The new clusters(clusters_to_add) will be inserted at logical_offset. | ||
4471 | * The extent b-tree's root is specified by et, and | ||
4472 | * it is not limited to the file storage. Any extent tree can use this | ||
4473 | * function if it implements the proper ocfs2_extent_tree. | ||
4474 | */ | ||
4475 | int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb, | ||
4476 | struct inode *inode, | ||
4477 | u32 *logical_offset, | ||
4478 | u32 clusters_to_add, | ||
4479 | int mark_unwritten, | ||
4480 | struct ocfs2_extent_tree *et, | ||
4481 | handle_t *handle, | ||
4482 | struct ocfs2_alloc_context *data_ac, | ||
4483 | struct ocfs2_alloc_context *meta_ac, | ||
4484 | enum ocfs2_alloc_restarted *reason_ret) | ||
4485 | { | ||
4486 | int status = 0; | ||
4487 | int free_extents; | ||
4488 | enum ocfs2_alloc_restarted reason = RESTART_NONE; | ||
4489 | u32 bit_off, num_bits; | ||
4490 | u64 block; | ||
4491 | u8 flags = 0; | ||
4492 | |||
4493 | BUG_ON(!clusters_to_add); | ||
4494 | |||
4495 | if (mark_unwritten) | ||
4496 | flags = OCFS2_EXT_UNWRITTEN; | ||
4497 | |||
4498 | free_extents = ocfs2_num_free_extents(osb, inode, et); | ||
4499 | if (free_extents < 0) { | ||
4500 | status = free_extents; | ||
4501 | mlog_errno(status); | ||
4502 | goto leave; | ||
4503 | } | ||
4504 | |||
4505 | /* there are two cases which could cause us to EAGAIN in the | ||
4506 | * we-need-more-metadata case: | ||
4507 | * 1) we haven't reserved *any* | ||
4508 | * 2) we are so fragmented, we've needed to add metadata too | ||
4509 | * many times. */ | ||
4510 | if (!free_extents && !meta_ac) { | ||
4511 | mlog(0, "we haven't reserved any metadata!\n"); | ||
4512 | status = -EAGAIN; | ||
4513 | reason = RESTART_META; | ||
4514 | goto leave; | ||
4515 | } else if ((!free_extents) | ||
4516 | && (ocfs2_alloc_context_bits_left(meta_ac) | ||
4517 | < ocfs2_extend_meta_needed(et->et_root_el))) { | ||
4518 | mlog(0, "filesystem is really fragmented...\n"); | ||
4519 | status = -EAGAIN; | ||
4520 | reason = RESTART_META; | ||
4521 | goto leave; | ||
4522 | } | ||
4523 | |||
4524 | status = __ocfs2_claim_clusters(osb, handle, data_ac, 1, | ||
4525 | clusters_to_add, &bit_off, &num_bits); | ||
4526 | if (status < 0) { | ||
4527 | if (status != -ENOSPC) | ||
4528 | mlog_errno(status); | ||
4529 | goto leave; | ||
4530 | } | ||
4176 | 4531 | ||
4532 | BUG_ON(num_bits > clusters_to_add); | ||
4533 | |||
4534 | /* reserve our write early -- insert_extent may update the inode */ | ||
4535 | status = ocfs2_journal_access(handle, inode, et->et_root_bh, | ||
4536 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
4537 | if (status < 0) { | ||
4538 | mlog_errno(status); | ||
4539 | goto leave; | ||
4540 | } | ||
4541 | |||
4542 | block = ocfs2_clusters_to_blocks(osb->sb, bit_off); | ||
4543 | mlog(0, "Allocating %u clusters at block %u for inode %llu\n", | ||
4544 | num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
4545 | status = ocfs2_insert_extent(osb, handle, inode, et, | ||
4546 | *logical_offset, block, | ||
4547 | num_bits, flags, meta_ac); | ||
4548 | if (status < 0) { | ||
4549 | mlog_errno(status); | ||
4550 | goto leave; | ||
4551 | } | ||
4552 | |||
4553 | status = ocfs2_journal_dirty(handle, et->et_root_bh); | ||
4554 | if (status < 0) { | ||
4555 | mlog_errno(status); | ||
4556 | goto leave; | ||
4557 | } | ||
4558 | |||
4559 | clusters_to_add -= num_bits; | ||
4560 | *logical_offset += num_bits; | ||
4561 | |||
4562 | if (clusters_to_add) { | ||
4563 | mlog(0, "need to alloc once more, wanted = %u\n", | ||
4564 | clusters_to_add); | ||
4565 | status = -EAGAIN; | ||
4566 | reason = RESTART_TRANS; | ||
4567 | } | ||
4568 | |||
4569 | leave: | ||
4177 | mlog_exit(status); | 4570 | mlog_exit(status); |
4571 | if (reason_ret) | ||
4572 | *reason_ret = reason; | ||
4178 | return status; | 4573 | return status; |
4179 | } | 4574 | } |
4180 | 4575 | ||
@@ -4201,7 +4596,7 @@ static void ocfs2_make_right_split_rec(struct super_block *sb, | |||
4201 | static int ocfs2_split_and_insert(struct inode *inode, | 4596 | static int ocfs2_split_and_insert(struct inode *inode, |
4202 | handle_t *handle, | 4597 | handle_t *handle, |
4203 | struct ocfs2_path *path, | 4598 | struct ocfs2_path *path, |
4204 | struct buffer_head *di_bh, | 4599 | struct ocfs2_extent_tree *et, |
4205 | struct buffer_head **last_eb_bh, | 4600 | struct buffer_head **last_eb_bh, |
4206 | int split_index, | 4601 | int split_index, |
4207 | struct ocfs2_extent_rec *orig_split_rec, | 4602 | struct ocfs2_extent_rec *orig_split_rec, |
@@ -4215,7 +4610,6 @@ static int ocfs2_split_and_insert(struct inode *inode, | |||
4215 | struct ocfs2_extent_rec split_rec = *orig_split_rec; | 4610 | struct ocfs2_extent_rec split_rec = *orig_split_rec; |
4216 | struct ocfs2_insert_type insert; | 4611 | struct ocfs2_insert_type insert; |
4217 | struct ocfs2_extent_block *eb; | 4612 | struct ocfs2_extent_block *eb; |
4218 | struct ocfs2_dinode *di; | ||
4219 | 4613 | ||
4220 | leftright: | 4614 | leftright: |
4221 | /* | 4615 | /* |
@@ -4224,8 +4618,7 @@ leftright: | |||
4224 | */ | 4618 | */ |
4225 | rec = path_leaf_el(path)->l_recs[split_index]; | 4619 | rec = path_leaf_el(path)->l_recs[split_index]; |
4226 | 4620 | ||
4227 | di = (struct ocfs2_dinode *)di_bh->b_data; | 4621 | rightmost_el = et->et_root_el; |
4228 | rightmost_el = &di->id2.i_list; | ||
4229 | 4622 | ||
4230 | depth = le16_to_cpu(rightmost_el->l_tree_depth); | 4623 | depth = le16_to_cpu(rightmost_el->l_tree_depth); |
4231 | if (depth) { | 4624 | if (depth) { |
@@ -4236,8 +4629,8 @@ leftright: | |||
4236 | 4629 | ||
4237 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == | 4630 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == |
4238 | le16_to_cpu(rightmost_el->l_count)) { | 4631 | le16_to_cpu(rightmost_el->l_count)) { |
4239 | ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh, | 4632 | ret = ocfs2_grow_tree(inode, handle, et, |
4240 | meta_ac); | 4633 | &depth, last_eb_bh, meta_ac); |
4241 | if (ret) { | 4634 | if (ret) { |
4242 | mlog_errno(ret); | 4635 | mlog_errno(ret); |
4243 | goto out; | 4636 | goto out; |
@@ -4274,8 +4667,7 @@ leftright: | |||
4274 | do_leftright = 1; | 4667 | do_leftright = 1; |
4275 | } | 4668 | } |
4276 | 4669 | ||
4277 | ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, | 4670 | ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert); |
4278 | &insert); | ||
4279 | if (ret) { | 4671 | if (ret) { |
4280 | mlog_errno(ret); | 4672 | mlog_errno(ret); |
4281 | goto out; | 4673 | goto out; |
@@ -4317,8 +4709,9 @@ out: | |||
4317 | * of the tree is required. All other cases will degrade into a less | 4709 | * of the tree is required. All other cases will degrade into a less |
4318 | * optimal tree layout. | 4710 | * optimal tree layout. |
4319 | * | 4711 | * |
4320 | * last_eb_bh should be the rightmost leaf block for any inode with a | 4712 | * last_eb_bh should be the rightmost leaf block for any extent |
4321 | * btree. Since a split may grow the tree or a merge might shrink it, the caller cannot trust the contents of that buffer after this call. | 4713 | * btree. Since a split may grow the tree or a merge might shrink it, |
4714 | * the caller cannot trust the contents of that buffer after this call. | ||
4322 | * | 4715 | * |
4323 | * This code is optimized for readability - several passes might be | 4716 | * This code is optimized for readability - several passes might be |
4324 | * made over certain portions of the tree. All of those blocks will | 4717 | * made over certain portions of the tree. All of those blocks will |
@@ -4326,7 +4719,7 @@ out: | |||
4326 | * extra overhead is not expressed in terms of disk reads. | 4719 | * extra overhead is not expressed in terms of disk reads. |
4327 | */ | 4720 | */ |
4328 | static int __ocfs2_mark_extent_written(struct inode *inode, | 4721 | static int __ocfs2_mark_extent_written(struct inode *inode, |
4329 | struct buffer_head *di_bh, | 4722 | struct ocfs2_extent_tree *et, |
4330 | handle_t *handle, | 4723 | handle_t *handle, |
4331 | struct ocfs2_path *path, | 4724 | struct ocfs2_path *path, |
4332 | int split_index, | 4725 | int split_index, |
@@ -4366,11 +4759,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode, | |||
4366 | */ | 4759 | */ |
4367 | if (path->p_tree_depth) { | 4760 | if (path->p_tree_depth) { |
4368 | struct ocfs2_extent_block *eb; | 4761 | struct ocfs2_extent_block *eb; |
4369 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
4370 | 4762 | ||
4371 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 4763 | ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), |
4372 | le64_to_cpu(di->i_last_eb_blk), | 4764 | &last_eb_bh); |
4373 | &last_eb_bh, OCFS2_BH_CACHED, inode); | ||
4374 | if (ret) { | 4765 | if (ret) { |
4375 | mlog_exit(ret); | 4766 | mlog_exit(ret); |
4376 | goto out; | 4767 | goto out; |
@@ -4403,7 +4794,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode, | |||
4403 | if (ctxt.c_split_covers_rec) | 4794 | if (ctxt.c_split_covers_rec) |
4404 | el->l_recs[split_index] = *split_rec; | 4795 | el->l_recs[split_index] = *split_rec; |
4405 | else | 4796 | else |
4406 | ret = ocfs2_split_and_insert(inode, handle, path, di_bh, | 4797 | ret = ocfs2_split_and_insert(inode, handle, path, et, |
4407 | &last_eb_bh, split_index, | 4798 | &last_eb_bh, split_index, |
4408 | split_rec, meta_ac); | 4799 | split_rec, meta_ac); |
4409 | if (ret) | 4800 | if (ret) |
@@ -4411,7 +4802,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode, | |||
4411 | } else { | 4802 | } else { |
4412 | ret = ocfs2_try_to_merge_extent(inode, handle, path, | 4803 | ret = ocfs2_try_to_merge_extent(inode, handle, path, |
4413 | split_index, split_rec, | 4804 | split_index, split_rec, |
4414 | dealloc, &ctxt); | 4805 | dealloc, &ctxt, et); |
4415 | if (ret) | 4806 | if (ret) |
4416 | mlog_errno(ret); | 4807 | mlog_errno(ret); |
4417 | } | 4808 | } |
@@ -4429,7 +4820,8 @@ out: | |||
4429 | * | 4820 | * |
4430 | * The caller is responsible for passing down meta_ac if we'll need it. | 4821 | * The caller is responsible for passing down meta_ac if we'll need it. |
4431 | */ | 4822 | */ |
4432 | int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | 4823 | int ocfs2_mark_extent_written(struct inode *inode, |
4824 | struct ocfs2_extent_tree *et, | ||
4433 | handle_t *handle, u32 cpos, u32 len, u32 phys, | 4825 | handle_t *handle, u32 cpos, u32 len, u32 phys, |
4434 | struct ocfs2_alloc_context *meta_ac, | 4826 | struct ocfs2_alloc_context *meta_ac, |
4435 | struct ocfs2_cached_dealloc_ctxt *dealloc) | 4827 | struct ocfs2_cached_dealloc_ctxt *dealloc) |
@@ -4455,10 +4847,14 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | |||
4455 | /* | 4847 | /* |
4456 | * XXX: This should be fixed up so that we just re-insert the | 4848 | * XXX: This should be fixed up so that we just re-insert the |
4457 | * next extent records. | 4849 | * next extent records. |
4850 | * | ||
4851 | * XXX: This is a hack on the extent tree, maybe it should be | ||
4852 | * an op? | ||
4458 | */ | 4853 | */ |
4459 | ocfs2_extent_map_trunc(inode, 0); | 4854 | if (et->et_ops == &ocfs2_dinode_et_ops) |
4855 | ocfs2_extent_map_trunc(inode, 0); | ||
4460 | 4856 | ||
4461 | left_path = ocfs2_new_inode_path(di_bh); | 4857 | left_path = ocfs2_new_path(et->et_root_bh, et->et_root_el); |
4462 | if (!left_path) { | 4858 | if (!left_path) { |
4463 | ret = -ENOMEM; | 4859 | ret = -ENOMEM; |
4464 | mlog_errno(ret); | 4860 | mlog_errno(ret); |
@@ -4489,8 +4885,9 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | |||
4489 | split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags; | 4885 | split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags; |
4490 | split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN; | 4886 | split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN; |
4491 | 4887 | ||
4492 | ret = __ocfs2_mark_extent_written(inode, di_bh, handle, left_path, | 4888 | ret = __ocfs2_mark_extent_written(inode, et, handle, left_path, |
4493 | index, &split_rec, meta_ac, dealloc); | 4889 | index, &split_rec, meta_ac, |
4890 | dealloc); | ||
4494 | if (ret) | 4891 | if (ret) |
4495 | mlog_errno(ret); | 4892 | mlog_errno(ret); |
4496 | 4893 | ||
@@ -4499,13 +4896,12 @@ out: | |||
4499 | return ret; | 4896 | return ret; |
4500 | } | 4897 | } |
4501 | 4898 | ||
4502 | static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, | 4899 | static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et, |
4503 | handle_t *handle, struct ocfs2_path *path, | 4900 | handle_t *handle, struct ocfs2_path *path, |
4504 | int index, u32 new_range, | 4901 | int index, u32 new_range, |
4505 | struct ocfs2_alloc_context *meta_ac) | 4902 | struct ocfs2_alloc_context *meta_ac) |
4506 | { | 4903 | { |
4507 | int ret, depth, credits = handle->h_buffer_credits; | 4904 | int ret, depth, credits = handle->h_buffer_credits; |
4508 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
4509 | struct buffer_head *last_eb_bh = NULL; | 4905 | struct buffer_head *last_eb_bh = NULL; |
4510 | struct ocfs2_extent_block *eb; | 4906 | struct ocfs2_extent_block *eb; |
4511 | struct ocfs2_extent_list *rightmost_el, *el; | 4907 | struct ocfs2_extent_list *rightmost_el, *el; |
@@ -4522,9 +4918,8 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, | |||
4522 | 4918 | ||
4523 | depth = path->p_tree_depth; | 4919 | depth = path->p_tree_depth; |
4524 | if (depth > 0) { | 4920 | if (depth > 0) { |
4525 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 4921 | ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), |
4526 | le64_to_cpu(di->i_last_eb_blk), | 4922 | &last_eb_bh); |
4527 | &last_eb_bh, OCFS2_BH_CACHED, inode); | ||
4528 | if (ret < 0) { | 4923 | if (ret < 0) { |
4529 | mlog_errno(ret); | 4924 | mlog_errno(ret); |
4530 | goto out; | 4925 | goto out; |
@@ -4535,7 +4930,8 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, | |||
4535 | } else | 4930 | } else |
4536 | rightmost_el = path_leaf_el(path); | 4931 | rightmost_el = path_leaf_el(path); |
4537 | 4932 | ||
4538 | credits += path->p_tree_depth + ocfs2_extend_meta_needed(di); | 4933 | credits += path->p_tree_depth + |
4934 | ocfs2_extend_meta_needed(et->et_root_el); | ||
4539 | ret = ocfs2_extend_trans(handle, credits); | 4935 | ret = ocfs2_extend_trans(handle, credits); |
4540 | if (ret) { | 4936 | if (ret) { |
4541 | mlog_errno(ret); | 4937 | mlog_errno(ret); |
@@ -4544,7 +4940,7 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, | |||
4544 | 4940 | ||
4545 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == | 4941 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == |
4546 | le16_to_cpu(rightmost_el->l_count)) { | 4942 | le16_to_cpu(rightmost_el->l_count)) { |
4547 | ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, &last_eb_bh, | 4943 | ret = ocfs2_grow_tree(inode, handle, et, &depth, &last_eb_bh, |
4548 | meta_ac); | 4944 | meta_ac); |
4549 | if (ret) { | 4945 | if (ret) { |
4550 | mlog_errno(ret); | 4946 | mlog_errno(ret); |
@@ -4558,7 +4954,7 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, | |||
4558 | insert.ins_split = SPLIT_RIGHT; | 4954 | insert.ins_split = SPLIT_RIGHT; |
4559 | insert.ins_tree_depth = depth; | 4955 | insert.ins_tree_depth = depth; |
4560 | 4956 | ||
4561 | ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, &insert); | 4957 | ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert); |
4562 | if (ret) | 4958 | if (ret) |
4563 | mlog_errno(ret); | 4959 | mlog_errno(ret); |
4564 | 4960 | ||
@@ -4570,7 +4966,8 @@ out: | |||
4570 | static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, | 4966 | static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, |
4571 | struct ocfs2_path *path, int index, | 4967 | struct ocfs2_path *path, int index, |
4572 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 4968 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
4573 | u32 cpos, u32 len) | 4969 | u32 cpos, u32 len, |
4970 | struct ocfs2_extent_tree *et) | ||
4574 | { | 4971 | { |
4575 | int ret; | 4972 | int ret; |
4576 | u32 left_cpos, rec_range, trunc_range; | 4973 | u32 left_cpos, rec_range, trunc_range; |
@@ -4582,7 +4979,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, | |||
4582 | struct ocfs2_extent_block *eb; | 4979 | struct ocfs2_extent_block *eb; |
4583 | 4980 | ||
4584 | if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) { | 4981 | if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) { |
4585 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); | 4982 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et); |
4586 | if (ret) { | 4983 | if (ret) { |
4587 | mlog_errno(ret); | 4984 | mlog_errno(ret); |
4588 | goto out; | 4985 | goto out; |
@@ -4713,7 +5110,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, | |||
4713 | 5110 | ||
4714 | ocfs2_journal_dirty(handle, path_leaf_bh(path)); | 5111 | ocfs2_journal_dirty(handle, path_leaf_bh(path)); |
4715 | 5112 | ||
4716 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); | 5113 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et); |
4717 | if (ret) { | 5114 | if (ret) { |
4718 | mlog_errno(ret); | 5115 | mlog_errno(ret); |
4719 | goto out; | 5116 | goto out; |
@@ -4724,7 +5121,8 @@ out: | |||
4724 | return ret; | 5121 | return ret; |
4725 | } | 5122 | } |
4726 | 5123 | ||
4727 | int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | 5124 | int ocfs2_remove_extent(struct inode *inode, |
5125 | struct ocfs2_extent_tree *et, | ||
4728 | u32 cpos, u32 len, handle_t *handle, | 5126 | u32 cpos, u32 len, handle_t *handle, |
4729 | struct ocfs2_alloc_context *meta_ac, | 5127 | struct ocfs2_alloc_context *meta_ac, |
4730 | struct ocfs2_cached_dealloc_ctxt *dealloc) | 5128 | struct ocfs2_cached_dealloc_ctxt *dealloc) |
@@ -4733,11 +5131,11 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | |||
4733 | u32 rec_range, trunc_range; | 5131 | u32 rec_range, trunc_range; |
4734 | struct ocfs2_extent_rec *rec; | 5132 | struct ocfs2_extent_rec *rec; |
4735 | struct ocfs2_extent_list *el; | 5133 | struct ocfs2_extent_list *el; |
4736 | struct ocfs2_path *path; | 5134 | struct ocfs2_path *path = NULL; |
4737 | 5135 | ||
4738 | ocfs2_extent_map_trunc(inode, 0); | 5136 | ocfs2_extent_map_trunc(inode, 0); |
4739 | 5137 | ||
4740 | path = ocfs2_new_inode_path(di_bh); | 5138 | path = ocfs2_new_path(et->et_root_bh, et->et_root_el); |
4741 | if (!path) { | 5139 | if (!path) { |
4742 | ret = -ENOMEM; | 5140 | ret = -ENOMEM; |
4743 | mlog_errno(ret); | 5141 | mlog_errno(ret); |
@@ -4790,13 +5188,13 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | |||
4790 | 5188 | ||
4791 | if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) { | 5189 | if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) { |
4792 | ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, | 5190 | ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, |
4793 | cpos, len); | 5191 | cpos, len, et); |
4794 | if (ret) { | 5192 | if (ret) { |
4795 | mlog_errno(ret); | 5193 | mlog_errno(ret); |
4796 | goto out; | 5194 | goto out; |
4797 | } | 5195 | } |
4798 | } else { | 5196 | } else { |
4799 | ret = ocfs2_split_tree(inode, di_bh, handle, path, index, | 5197 | ret = ocfs2_split_tree(inode, et, handle, path, index, |
4800 | trunc_range, meta_ac); | 5198 | trunc_range, meta_ac); |
4801 | if (ret) { | 5199 | if (ret) { |
4802 | mlog_errno(ret); | 5200 | mlog_errno(ret); |
@@ -4845,7 +5243,7 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | |||
4845 | } | 5243 | } |
4846 | 5244 | ||
4847 | ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, | 5245 | ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, |
4848 | cpos, len); | 5246 | cpos, len, et); |
4849 | if (ret) { | 5247 | if (ret) { |
4850 | mlog_errno(ret); | 5248 | mlog_errno(ret); |
4851 | goto out; | 5249 | goto out; |
@@ -5188,8 +5586,7 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb, | |||
5188 | goto bail; | 5586 | goto bail; |
5189 | } | 5587 | } |
5190 | 5588 | ||
5191 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, | 5589 | status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); |
5192 | OCFS2_BH_CACHED, inode); | ||
5193 | if (status < 0) { | 5590 | if (status < 0) { |
5194 | iput(inode); | 5591 | iput(inode); |
5195 | mlog_errno(status); | 5592 | mlog_errno(status); |
@@ -5264,8 +5661,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, | |||
5264 | bail: | 5661 | bail: |
5265 | if (tl_inode) | 5662 | if (tl_inode) |
5266 | iput(tl_inode); | 5663 | iput(tl_inode); |
5267 | if (tl_bh) | 5664 | brelse(tl_bh); |
5268 | brelse(tl_bh); | ||
5269 | 5665 | ||
5270 | if (status < 0 && (*tl_copy)) { | 5666 | if (status < 0 && (*tl_copy)) { |
5271 | kfree(*tl_copy); | 5667 | kfree(*tl_copy); |
@@ -6008,20 +6404,13 @@ bail: | |||
6008 | return status; | 6404 | return status; |
6009 | } | 6405 | } |
6010 | 6406 | ||
6011 | static int ocfs2_writeback_zero_func(handle_t *handle, struct buffer_head *bh) | 6407 | static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh) |
6012 | { | 6408 | { |
6013 | set_buffer_uptodate(bh); | 6409 | set_buffer_uptodate(bh); |
6014 | mark_buffer_dirty(bh); | 6410 | mark_buffer_dirty(bh); |
6015 | return 0; | 6411 | return 0; |
6016 | } | 6412 | } |
6017 | 6413 | ||
6018 | static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh) | ||
6019 | { | ||
6020 | set_buffer_uptodate(bh); | ||
6021 | mark_buffer_dirty(bh); | ||
6022 | return ocfs2_journal_dirty_data(handle, bh); | ||
6023 | } | ||
6024 | |||
6025 | static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, | 6414 | static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, |
6026 | unsigned int from, unsigned int to, | 6415 | unsigned int from, unsigned int to, |
6027 | struct page *page, int zero, u64 *phys) | 6416 | struct page *page, int zero, u64 *phys) |
@@ -6040,17 +6429,18 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, | |||
6040 | * here if they aren't - ocfs2_map_page_blocks() | 6429 | * here if they aren't - ocfs2_map_page_blocks() |
6041 | * might've skipped some | 6430 | * might've skipped some |
6042 | */ | 6431 | */ |
6043 | if (ocfs2_should_order_data(inode)) { | 6432 | ret = walk_page_buffers(handle, page_buffers(page), |
6044 | ret = walk_page_buffers(handle, | 6433 | from, to, &partial, |
6045 | page_buffers(page), | 6434 | ocfs2_zero_func); |
6046 | from, to, &partial, | 6435 | if (ret < 0) |
6047 | ocfs2_ordered_zero_func); | 6436 | mlog_errno(ret); |
6048 | if (ret < 0) | 6437 | else if (ocfs2_should_order_data(inode)) { |
6049 | mlog_errno(ret); | 6438 | ret = ocfs2_jbd2_file_inode(handle, inode); |
6050 | } else { | 6439 | #ifdef CONFIG_OCFS2_COMPAT_JBD |
6051 | ret = walk_page_buffers(handle, page_buffers(page), | 6440 | ret = walk_page_buffers(handle, page_buffers(page), |
6052 | from, to, &partial, | 6441 | from, to, &partial, |
6053 | ocfs2_writeback_zero_func); | 6442 | ocfs2_journal_dirty_data); |
6443 | #endif | ||
6054 | if (ret < 0) | 6444 | if (ret < 0) |
6055 | mlog_errno(ret); | 6445 | mlog_errno(ret); |
6056 | } | 6446 | } |
@@ -6215,20 +6605,29 @@ out: | |||
6215 | return ret; | 6605 | return ret; |
6216 | } | 6606 | } |
6217 | 6607 | ||
6218 | static void ocfs2_zero_dinode_id2(struct inode *inode, struct ocfs2_dinode *di) | 6608 | static void ocfs2_zero_dinode_id2_with_xattr(struct inode *inode, |
6609 | struct ocfs2_dinode *di) | ||
6219 | { | 6610 | { |
6220 | unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits; | 6611 | unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits; |
6612 | unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size); | ||
6221 | 6613 | ||
6222 | memset(&di->id2, 0, blocksize - offsetof(struct ocfs2_dinode, id2)); | 6614 | if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL) |
6615 | memset(&di->id2, 0, blocksize - | ||
6616 | offsetof(struct ocfs2_dinode, id2) - | ||
6617 | xattrsize); | ||
6618 | else | ||
6619 | memset(&di->id2, 0, blocksize - | ||
6620 | offsetof(struct ocfs2_dinode, id2)); | ||
6223 | } | 6621 | } |
6224 | 6622 | ||
6225 | void ocfs2_dinode_new_extent_list(struct inode *inode, | 6623 | void ocfs2_dinode_new_extent_list(struct inode *inode, |
6226 | struct ocfs2_dinode *di) | 6624 | struct ocfs2_dinode *di) |
6227 | { | 6625 | { |
6228 | ocfs2_zero_dinode_id2(inode, di); | 6626 | ocfs2_zero_dinode_id2_with_xattr(inode, di); |
6229 | di->id2.i_list.l_tree_depth = 0; | 6627 | di->id2.i_list.l_tree_depth = 0; |
6230 | di->id2.i_list.l_next_free_rec = 0; | 6628 | di->id2.i_list.l_next_free_rec = 0; |
6231 | di->id2.i_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(inode->i_sb)); | 6629 | di->id2.i_list.l_count = cpu_to_le16( |
6630 | ocfs2_extent_recs_per_inode_with_xattr(inode->i_sb, di)); | ||
6232 | } | 6631 | } |
6233 | 6632 | ||
6234 | void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di) | 6633 | void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di) |
@@ -6245,9 +6644,10 @@ void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di) | |||
6245 | * We clear the entire i_data structure here so that all | 6644 | * We clear the entire i_data structure here so that all |
6246 | * fields can be properly initialized. | 6645 | * fields can be properly initialized. |
6247 | */ | 6646 | */ |
6248 | ocfs2_zero_dinode_id2(inode, di); | 6647 | ocfs2_zero_dinode_id2_with_xattr(inode, di); |
6249 | 6648 | ||
6250 | idata->id_count = cpu_to_le16(ocfs2_max_inline_data(inode->i_sb)); | 6649 | idata->id_count = cpu_to_le16( |
6650 | ocfs2_max_inline_data_with_xattr(inode->i_sb, di)); | ||
6251 | } | 6651 | } |
6252 | 6652 | ||
6253 | int ocfs2_convert_inline_data_to_extents(struct inode *inode, | 6653 | int ocfs2_convert_inline_data_to_extents(struct inode *inode, |
@@ -6262,6 +6662,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
6262 | struct ocfs2_alloc_context *data_ac = NULL; | 6662 | struct ocfs2_alloc_context *data_ac = NULL; |
6263 | struct page **pages = NULL; | 6663 | struct page **pages = NULL; |
6264 | loff_t end = osb->s_clustersize; | 6664 | loff_t end = osb->s_clustersize; |
6665 | struct ocfs2_extent_tree et; | ||
6265 | 6666 | ||
6266 | has_data = i_size_read(inode) ? 1 : 0; | 6667 | has_data = i_size_read(inode) ? 1 : 0; |
6267 | 6668 | ||
@@ -6361,7 +6762,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
6361 | * this proves to be false, we could always re-build | 6762 | * this proves to be false, we could always re-build |
6362 | * the in-inode data from our pages. | 6763 | * the in-inode data from our pages. |
6363 | */ | 6764 | */ |
6364 | ret = ocfs2_insert_extent(osb, handle, inode, di_bh, | 6765 | ocfs2_init_dinode_extent_tree(&et, inode, di_bh); |
6766 | ret = ocfs2_insert_extent(osb, handle, inode, &et, | ||
6365 | 0, block, 1, 0, NULL); | 6767 | 0, block, 1, 0, NULL); |
6366 | if (ret) { | 6768 | if (ret) { |
6367 | mlog_errno(ret); | 6769 | mlog_errno(ret); |
@@ -6404,13 +6806,14 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, | |||
6404 | handle_t *handle = NULL; | 6806 | handle_t *handle = NULL; |
6405 | struct inode *tl_inode = osb->osb_tl_inode; | 6807 | struct inode *tl_inode = osb->osb_tl_inode; |
6406 | struct ocfs2_path *path = NULL; | 6808 | struct ocfs2_path *path = NULL; |
6809 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; | ||
6407 | 6810 | ||
6408 | mlog_entry_void(); | 6811 | mlog_entry_void(); |
6409 | 6812 | ||
6410 | new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, | 6813 | new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, |
6411 | i_size_read(inode)); | 6814 | i_size_read(inode)); |
6412 | 6815 | ||
6413 | path = ocfs2_new_inode_path(fe_bh); | 6816 | path = ocfs2_new_path(fe_bh, &di->id2.i_list); |
6414 | if (!path) { | 6817 | if (!path) { |
6415 | status = -ENOMEM; | 6818 | status = -ENOMEM; |
6416 | mlog_errno(status); | 6819 | mlog_errno(status); |
@@ -6581,8 +6984,8 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb, | |||
6581 | ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); | 6984 | ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); |
6582 | 6985 | ||
6583 | if (fe->id2.i_list.l_tree_depth) { | 6986 | if (fe->id2.i_list.l_tree_depth) { |
6584 | status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), | 6987 | status = ocfs2_read_block(inode, le64_to_cpu(fe->i_last_eb_blk), |
6585 | &last_eb_bh, OCFS2_BH_CACHED, inode); | 6988 | &last_eb_bh); |
6586 | if (status < 0) { | 6989 | if (status < 0) { |
6587 | mlog_errno(status); | 6990 | mlog_errno(status); |
6588 | goto bail; | 6991 | goto bail; |
@@ -6695,8 +7098,7 @@ static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) | |||
6695 | mlog(ML_NOTICE, | 7098 | mlog(ML_NOTICE, |
6696 | "Truncate completion has non-empty dealloc context\n"); | 7099 | "Truncate completion has non-empty dealloc context\n"); |
6697 | 7100 | ||
6698 | if (tc->tc_last_eb_bh) | 7101 | brelse(tc->tc_last_eb_bh); |
6699 | brelse(tc->tc_last_eb_bh); | ||
6700 | 7102 | ||
6701 | kfree(tc); | 7103 | kfree(tc); |
6702 | } | 7104 | } |
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 42ff94bd8011..70257c84cfbe 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h | |||
@@ -26,30 +26,102 @@ | |||
26 | #ifndef OCFS2_ALLOC_H | 26 | #ifndef OCFS2_ALLOC_H |
27 | #define OCFS2_ALLOC_H | 27 | #define OCFS2_ALLOC_H |
28 | 28 | ||
29 | |||
30 | /* | ||
31 | * For xattr tree leaf, we limit the leaf byte size to be 64K. | ||
32 | */ | ||
33 | #define OCFS2_MAX_XATTR_TREE_LEAF_SIZE 65536 | ||
34 | |||
35 | /* | ||
36 | * ocfs2_extent_tree and ocfs2_extent_tree_operations are used to abstract | ||
37 | * the b-tree operations in ocfs2. Now all the b-tree operations are not | ||
38 | * limited to ocfs2_dinode only. Any data which need to allocate clusters | ||
39 | * to store can use b-tree. And it only needs to implement its ocfs2_extent_tree | ||
40 | * and operation. | ||
41 | * | ||
42 | * ocfs2_extent_tree becomes the first-class object for extent tree | ||
43 | * manipulation. Callers of the alloc.c code need to fill it via one of | ||
44 | * the ocfs2_init_*_extent_tree() operations below. | ||
45 | * | ||
46 | * ocfs2_extent_tree contains info for the root of the b-tree, it must have a | ||
47 | * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree | ||
48 | * functions. | ||
49 | * ocfs2_extent_tree_operations abstract the normal operations we do for | ||
50 | * the root of extent b-tree. | ||
51 | */ | ||
52 | struct ocfs2_extent_tree_operations; | ||
53 | struct ocfs2_extent_tree { | ||
54 | struct ocfs2_extent_tree_operations *et_ops; | ||
55 | struct buffer_head *et_root_bh; | ||
56 | struct ocfs2_extent_list *et_root_el; | ||
57 | void *et_object; | ||
58 | unsigned int et_max_leaf_clusters; | ||
59 | }; | ||
60 | |||
61 | /* | ||
62 | * ocfs2_init_*_extent_tree() will fill an ocfs2_extent_tree from the | ||
63 | * specified object buffer. | ||
64 | */ | ||
65 | void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et, | ||
66 | struct inode *inode, | ||
67 | struct buffer_head *bh); | ||
68 | void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, | ||
69 | struct inode *inode, | ||
70 | struct buffer_head *bh); | ||
71 | void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, | ||
72 | struct inode *inode, | ||
73 | struct buffer_head *bh, | ||
74 | struct ocfs2_xattr_value_root *xv); | ||
75 | |||
29 | struct ocfs2_alloc_context; | 76 | struct ocfs2_alloc_context; |
30 | int ocfs2_insert_extent(struct ocfs2_super *osb, | 77 | int ocfs2_insert_extent(struct ocfs2_super *osb, |
31 | handle_t *handle, | 78 | handle_t *handle, |
32 | struct inode *inode, | 79 | struct inode *inode, |
33 | struct buffer_head *fe_bh, | 80 | struct ocfs2_extent_tree *et, |
34 | u32 cpos, | 81 | u32 cpos, |
35 | u64 start_blk, | 82 | u64 start_blk, |
36 | u32 new_clusters, | 83 | u32 new_clusters, |
37 | u8 flags, | 84 | u8 flags, |
38 | struct ocfs2_alloc_context *meta_ac); | 85 | struct ocfs2_alloc_context *meta_ac); |
86 | |||
87 | enum ocfs2_alloc_restarted { | ||
88 | RESTART_NONE = 0, | ||
89 | RESTART_TRANS, | ||
90 | RESTART_META | ||
91 | }; | ||
92 | int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb, | ||
93 | struct inode *inode, | ||
94 | u32 *logical_offset, | ||
95 | u32 clusters_to_add, | ||
96 | int mark_unwritten, | ||
97 | struct ocfs2_extent_tree *et, | ||
98 | handle_t *handle, | ||
99 | struct ocfs2_alloc_context *data_ac, | ||
100 | struct ocfs2_alloc_context *meta_ac, | ||
101 | enum ocfs2_alloc_restarted *reason_ret); | ||
39 | struct ocfs2_cached_dealloc_ctxt; | 102 | struct ocfs2_cached_dealloc_ctxt; |
40 | int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | 103 | int ocfs2_mark_extent_written(struct inode *inode, |
104 | struct ocfs2_extent_tree *et, | ||
41 | handle_t *handle, u32 cpos, u32 len, u32 phys, | 105 | handle_t *handle, u32 cpos, u32 len, u32 phys, |
42 | struct ocfs2_alloc_context *meta_ac, | 106 | struct ocfs2_alloc_context *meta_ac, |
43 | struct ocfs2_cached_dealloc_ctxt *dealloc); | 107 | struct ocfs2_cached_dealloc_ctxt *dealloc); |
44 | int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | 108 | int ocfs2_remove_extent(struct inode *inode, |
109 | struct ocfs2_extent_tree *et, | ||
45 | u32 cpos, u32 len, handle_t *handle, | 110 | u32 cpos, u32 len, handle_t *handle, |
46 | struct ocfs2_alloc_context *meta_ac, | 111 | struct ocfs2_alloc_context *meta_ac, |
47 | struct ocfs2_cached_dealloc_ctxt *dealloc); | 112 | struct ocfs2_cached_dealloc_ctxt *dealloc); |
48 | int ocfs2_num_free_extents(struct ocfs2_super *osb, | 113 | int ocfs2_num_free_extents(struct ocfs2_super *osb, |
49 | struct inode *inode, | 114 | struct inode *inode, |
50 | struct ocfs2_dinode *fe); | 115 | struct ocfs2_extent_tree *et); |
51 | /* how many new metadata chunks would an allocation need at maximum? */ | 116 | |
52 | static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe) | 117 | /* |
118 | * how many new metadata chunks would an allocation need at maximum? | ||
119 | * | ||
120 | * Please note that the caller must make sure that root_el is the root | ||
121 | * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise | ||
122 | * the result may be wrong. | ||
123 | */ | ||
124 | static inline int ocfs2_extend_meta_needed(struct ocfs2_extent_list *root_el) | ||
53 | { | 125 | { |
54 | /* | 126 | /* |
55 | * Rather than do all the work of determining how much we need | 127 | * Rather than do all the work of determining how much we need |
@@ -59,7 +131,7 @@ static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe) | |||
59 | * new tree_depth==0 extent_block, and one block at the new | 131 | * new tree_depth==0 extent_block, and one block at the new |
60 | * top-of-the tree. | 132 | * top-of-the tree. |
61 | */ | 133 | */ |
62 | return le16_to_cpu(fe->id2.i_list.l_tree_depth) + 2; | 134 | return le16_to_cpu(root_el->l_tree_depth) + 2; |
63 | } | 135 | } |
64 | 136 | ||
65 | void ocfs2_dinode_new_extent_list(struct inode *inode, struct ocfs2_dinode *di); | 137 | void ocfs2_dinode_new_extent_list(struct inode *inode, struct ocfs2_dinode *di); |
@@ -146,4 +218,13 @@ static inline unsigned int ocfs2_rec_clusters(struct ocfs2_extent_list *el, | |||
146 | return le16_to_cpu(rec->e_leaf_clusters); | 218 | return le16_to_cpu(rec->e_leaf_clusters); |
147 | } | 219 | } |
148 | 220 | ||
221 | /* | ||
222 | * This is only valid for leaf nodes, which are the only ones that can | ||
223 | * have empty extents anyway. | ||
224 | */ | ||
225 | static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) | ||
226 | { | ||
227 | return !rec->e_leaf_clusters; | ||
228 | } | ||
229 | |||
149 | #endif /* OCFS2_ALLOC_H */ | 230 | #endif /* OCFS2_ALLOC_H */ |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 506c24fb5078..c22543b33420 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -68,9 +68,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, | |||
68 | goto bail; | 68 | goto bail; |
69 | } | 69 | } |
70 | 70 | ||
71 | status = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 71 | status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); |
72 | OCFS2_I(inode)->ip_blkno, | ||
73 | &bh, OCFS2_BH_CACHED, inode); | ||
74 | if (status < 0) { | 72 | if (status < 0) { |
75 | mlog_errno(status); | 73 | mlog_errno(status); |
76 | goto bail; | 74 | goto bail; |
@@ -128,8 +126,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, | |||
128 | err = 0; | 126 | err = 0; |
129 | 127 | ||
130 | bail: | 128 | bail: |
131 | if (bh) | 129 | brelse(bh); |
132 | brelse(bh); | ||
133 | 130 | ||
134 | mlog_exit(err); | 131 | mlog_exit(err); |
135 | return err; | 132 | return err; |
@@ -261,13 +258,11 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page) | |||
261 | { | 258 | { |
262 | int ret; | 259 | int ret; |
263 | struct buffer_head *di_bh = NULL; | 260 | struct buffer_head *di_bh = NULL; |
264 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
265 | 261 | ||
266 | BUG_ON(!PageLocked(page)); | 262 | BUG_ON(!PageLocked(page)); |
267 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); | 263 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); |
268 | 264 | ||
269 | ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh, | 265 | ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); |
270 | OCFS2_BH_CACHED, inode); | ||
271 | if (ret) { | 266 | if (ret) { |
272 | mlog_errno(ret); | 267 | mlog_errno(ret); |
273 | goto out; | 268 | goto out; |
@@ -485,11 +480,14 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode, | |||
485 | } | 480 | } |
486 | 481 | ||
487 | if (ocfs2_should_order_data(inode)) { | 482 | if (ocfs2_should_order_data(inode)) { |
483 | ret = ocfs2_jbd2_file_inode(handle, inode); | ||
484 | #ifdef CONFIG_OCFS2_COMPAT_JBD | ||
488 | ret = walk_page_buffers(handle, | 485 | ret = walk_page_buffers(handle, |
489 | page_buffers(page), | 486 | page_buffers(page), |
490 | from, to, NULL, | 487 | from, to, NULL, |
491 | ocfs2_journal_dirty_data); | 488 | ocfs2_journal_dirty_data); |
492 | if (ret < 0) | 489 | #endif |
490 | if (ret < 0) | ||
493 | mlog_errno(ret); | 491 | mlog_errno(ret); |
494 | } | 492 | } |
495 | out: | 493 | out: |
@@ -594,7 +592,7 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | |||
594 | goto bail; | 592 | goto bail; |
595 | } | 593 | } |
596 | 594 | ||
597 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno) { | 595 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) { |
598 | ocfs2_error(inode->i_sb, | 596 | ocfs2_error(inode->i_sb, |
599 | "Inode %llu has a hole at block %llu\n", | 597 | "Inode %llu has a hole at block %llu\n", |
600 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 598 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
@@ -669,7 +667,7 @@ static void ocfs2_invalidatepage(struct page *page, unsigned long offset) | |||
669 | { | 667 | { |
670 | journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; | 668 | journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; |
671 | 669 | ||
672 | journal_invalidatepage(journal, page, offset); | 670 | jbd2_journal_invalidatepage(journal, page, offset); |
673 | } | 671 | } |
674 | 672 | ||
675 | static int ocfs2_releasepage(struct page *page, gfp_t wait) | 673 | static int ocfs2_releasepage(struct page *page, gfp_t wait) |
@@ -678,7 +676,7 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait) | |||
678 | 676 | ||
679 | if (!page_has_buffers(page)) | 677 | if (!page_has_buffers(page)) |
680 | return 0; | 678 | return 0; |
681 | return journal_try_to_free_buffers(journal, page, wait); | 679 | return jbd2_journal_try_to_free_buffers(journal, page, wait); |
682 | } | 680 | } |
683 | 681 | ||
684 | static ssize_t ocfs2_direct_IO(int rw, | 682 | static ssize_t ocfs2_direct_IO(int rw, |
@@ -1074,11 +1072,15 @@ static void ocfs2_write_failure(struct inode *inode, | |||
1074 | tmppage = wc->w_pages[i]; | 1072 | tmppage = wc->w_pages[i]; |
1075 | 1073 | ||
1076 | if (page_has_buffers(tmppage)) { | 1074 | if (page_has_buffers(tmppage)) { |
1077 | if (ocfs2_should_order_data(inode)) | 1075 | if (ocfs2_should_order_data(inode)) { |
1076 | ocfs2_jbd2_file_inode(wc->w_handle, inode); | ||
1077 | #ifdef CONFIG_OCFS2_COMPAT_JBD | ||
1078 | walk_page_buffers(wc->w_handle, | 1078 | walk_page_buffers(wc->w_handle, |
1079 | page_buffers(tmppage), | 1079 | page_buffers(tmppage), |
1080 | from, to, NULL, | 1080 | from, to, NULL, |
1081 | ocfs2_journal_dirty_data); | 1081 | ocfs2_journal_dirty_data); |
1082 | #endif | ||
1083 | } | ||
1082 | 1084 | ||
1083 | block_commit_write(tmppage, from, to); | 1085 | block_commit_write(tmppage, from, to); |
1084 | } | 1086 | } |
@@ -1242,6 +1244,7 @@ static int ocfs2_write_cluster(struct address_space *mapping, | |||
1242 | int ret, i, new, should_zero = 0; | 1244 | int ret, i, new, should_zero = 0; |
1243 | u64 v_blkno, p_blkno; | 1245 | u64 v_blkno, p_blkno; |
1244 | struct inode *inode = mapping->host; | 1246 | struct inode *inode = mapping->host; |
1247 | struct ocfs2_extent_tree et; | ||
1245 | 1248 | ||
1246 | new = phys == 0 ? 1 : 0; | 1249 | new = phys == 0 ? 1 : 0; |
1247 | if (new || unwritten) | 1250 | if (new || unwritten) |
@@ -1255,10 +1258,10 @@ static int ocfs2_write_cluster(struct address_space *mapping, | |||
1255 | * any additional semaphores or cluster locks. | 1258 | * any additional semaphores or cluster locks. |
1256 | */ | 1259 | */ |
1257 | tmp_pos = cpos; | 1260 | tmp_pos = cpos; |
1258 | ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode, | 1261 | ret = ocfs2_add_inode_data(OCFS2_SB(inode->i_sb), inode, |
1259 | &tmp_pos, 1, 0, wc->w_di_bh, | 1262 | &tmp_pos, 1, 0, wc->w_di_bh, |
1260 | wc->w_handle, data_ac, | 1263 | wc->w_handle, data_ac, |
1261 | meta_ac, NULL); | 1264 | meta_ac, NULL); |
1262 | /* | 1265 | /* |
1263 | * This shouldn't happen because we must have already | 1266 | * This shouldn't happen because we must have already |
1264 | * calculated the correct meta data allocation required. The | 1267 | * calculated the correct meta data allocation required. The |
@@ -1276,7 +1279,8 @@ static int ocfs2_write_cluster(struct address_space *mapping, | |||
1276 | goto out; | 1279 | goto out; |
1277 | } | 1280 | } |
1278 | } else if (unwritten) { | 1281 | } else if (unwritten) { |
1279 | ret = ocfs2_mark_extent_written(inode, wc->w_di_bh, | 1282 | ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh); |
1283 | ret = ocfs2_mark_extent_written(inode, &et, | ||
1280 | wc->w_handle, cpos, 1, phys, | 1284 | wc->w_handle, cpos, 1, phys, |
1281 | meta_ac, &wc->w_dealloc); | 1285 | meta_ac, &wc->w_dealloc); |
1282 | if (ret < 0) { | 1286 | if (ret < 0) { |
@@ -1665,6 +1669,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1665 | struct ocfs2_alloc_context *data_ac = NULL; | 1669 | struct ocfs2_alloc_context *data_ac = NULL; |
1666 | struct ocfs2_alloc_context *meta_ac = NULL; | 1670 | struct ocfs2_alloc_context *meta_ac = NULL; |
1667 | handle_t *handle; | 1671 | handle_t *handle; |
1672 | struct ocfs2_extent_tree et; | ||
1668 | 1673 | ||
1669 | ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); | 1674 | ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); |
1670 | if (ret) { | 1675 | if (ret) { |
@@ -1712,14 +1717,23 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1712 | * ocfs2_lock_allocators(). It greatly over-estimates | 1717 | * ocfs2_lock_allocators(). It greatly over-estimates |
1713 | * the work to be done. | 1718 | * the work to be done. |
1714 | */ | 1719 | */ |
1715 | ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc, | 1720 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u," |
1716 | extents_to_split, &data_ac, &meta_ac); | 1721 | " clusters_to_add = %u, extents_to_split = %u\n", |
1722 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
1723 | (long long)i_size_read(inode), le32_to_cpu(di->i_clusters), | ||
1724 | clusters_to_alloc, extents_to_split); | ||
1725 | |||
1726 | ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh); | ||
1727 | ret = ocfs2_lock_allocators(inode, &et, | ||
1728 | clusters_to_alloc, extents_to_split, | ||
1729 | &data_ac, &meta_ac); | ||
1717 | if (ret) { | 1730 | if (ret) { |
1718 | mlog_errno(ret); | 1731 | mlog_errno(ret); |
1719 | goto out; | 1732 | goto out; |
1720 | } | 1733 | } |
1721 | 1734 | ||
1722 | credits = ocfs2_calc_extend_credits(inode->i_sb, di, | 1735 | credits = ocfs2_calc_extend_credits(inode->i_sb, |
1736 | &di->id2.i_list, | ||
1723 | clusters_to_alloc); | 1737 | clusters_to_alloc); |
1724 | 1738 | ||
1725 | } | 1739 | } |
@@ -1905,11 +1919,15 @@ int ocfs2_write_end_nolock(struct address_space *mapping, | |||
1905 | } | 1919 | } |
1906 | 1920 | ||
1907 | if (page_has_buffers(tmppage)) { | 1921 | if (page_has_buffers(tmppage)) { |
1908 | if (ocfs2_should_order_data(inode)) | 1922 | if (ocfs2_should_order_data(inode)) { |
1923 | ocfs2_jbd2_file_inode(wc->w_handle, inode); | ||
1924 | #ifdef CONFIG_OCFS2_COMPAT_JBD | ||
1909 | walk_page_buffers(wc->w_handle, | 1925 | walk_page_buffers(wc->w_handle, |
1910 | page_buffers(tmppage), | 1926 | page_buffers(tmppage), |
1911 | from, to, NULL, | 1927 | from, to, NULL, |
1912 | ocfs2_journal_dirty_data); | 1928 | ocfs2_journal_dirty_data); |
1929 | #endif | ||
1930 | } | ||
1913 | block_commit_write(tmppage, from, to); | 1931 | block_commit_write(tmppage, from, to); |
1914 | } | 1932 | } |
1915 | } | 1933 | } |
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index f136639f5b41..7e947c672469 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c | |||
@@ -66,7 +66,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, | |||
66 | /* remove from dirty list before I/O. */ | 66 | /* remove from dirty list before I/O. */ |
67 | clear_buffer_dirty(bh); | 67 | clear_buffer_dirty(bh); |
68 | 68 | ||
69 | get_bh(bh); /* for end_buffer_write_sync() */ | 69 | get_bh(bh); /* for end_buffer_write_sync() */ |
70 | bh->b_end_io = end_buffer_write_sync; | 70 | bh->b_end_io = end_buffer_write_sync; |
71 | submit_bh(WRITE, bh); | 71 | submit_bh(WRITE, bh); |
72 | 72 | ||
@@ -88,22 +88,103 @@ out: | |||
88 | return ret; | 88 | return ret; |
89 | } | 89 | } |
90 | 90 | ||
91 | int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | 91 | int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, |
92 | struct buffer_head *bhs[], int flags, | 92 | unsigned int nr, struct buffer_head *bhs[]) |
93 | struct inode *inode) | 93 | { |
94 | int status = 0; | ||
95 | unsigned int i; | ||
96 | struct buffer_head *bh; | ||
97 | |||
98 | if (!nr) { | ||
99 | mlog(ML_BH_IO, "No buffers will be read!\n"); | ||
100 | goto bail; | ||
101 | } | ||
102 | |||
103 | for (i = 0 ; i < nr ; i++) { | ||
104 | if (bhs[i] == NULL) { | ||
105 | bhs[i] = sb_getblk(osb->sb, block++); | ||
106 | if (bhs[i] == NULL) { | ||
107 | status = -EIO; | ||
108 | mlog_errno(status); | ||
109 | goto bail; | ||
110 | } | ||
111 | } | ||
112 | bh = bhs[i]; | ||
113 | |||
114 | if (buffer_jbd(bh)) { | ||
115 | mlog(ML_ERROR, | ||
116 | "trying to sync read a jbd " | ||
117 | "managed bh (blocknr = %llu), skipping\n", | ||
118 | (unsigned long long)bh->b_blocknr); | ||
119 | continue; | ||
120 | } | ||
121 | |||
122 | if (buffer_dirty(bh)) { | ||
123 | /* This should probably be a BUG, or | ||
124 | * at least return an error. */ | ||
125 | mlog(ML_ERROR, | ||
126 | "trying to sync read a dirty " | ||
127 | "buffer! (blocknr = %llu), skipping\n", | ||
128 | (unsigned long long)bh->b_blocknr); | ||
129 | continue; | ||
130 | } | ||
131 | |||
132 | lock_buffer(bh); | ||
133 | if (buffer_jbd(bh)) { | ||
134 | mlog(ML_ERROR, | ||
135 | "block %llu had the JBD bit set " | ||
136 | "while I was in lock_buffer!", | ||
137 | (unsigned long long)bh->b_blocknr); | ||
138 | BUG(); | ||
139 | } | ||
140 | |||
141 | clear_buffer_uptodate(bh); | ||
142 | get_bh(bh); /* for end_buffer_read_sync() */ | ||
143 | bh->b_end_io = end_buffer_read_sync; | ||
144 | submit_bh(READ, bh); | ||
145 | } | ||
146 | |||
147 | for (i = nr; i > 0; i--) { | ||
148 | bh = bhs[i - 1]; | ||
149 | |||
150 | if (buffer_jbd(bh)) { | ||
151 | mlog(ML_ERROR, | ||
152 | "the journal got the buffer while it was " | ||
153 | "locked for io! (blocknr = %llu)\n", | ||
154 | (unsigned long long)bh->b_blocknr); | ||
155 | BUG(); | ||
156 | } | ||
157 | |||
158 | wait_on_buffer(bh); | ||
159 | if (!buffer_uptodate(bh)) { | ||
160 | /* Status won't be cleared from here on out, | ||
161 | * so we can safely record this and loop back | ||
162 | * to cleanup the other buffers. */ | ||
163 | status = -EIO; | ||
164 | put_bh(bh); | ||
165 | bhs[i - 1] = NULL; | ||
166 | } | ||
167 | } | ||
168 | |||
169 | bail: | ||
170 | return status; | ||
171 | } | ||
172 | |||
173 | int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, | ||
174 | struct buffer_head *bhs[], int flags) | ||
94 | { | 175 | { |
95 | int status = 0; | 176 | int status = 0; |
96 | struct super_block *sb; | ||
97 | int i, ignore_cache = 0; | 177 | int i, ignore_cache = 0; |
98 | struct buffer_head *bh; | 178 | struct buffer_head *bh; |
99 | 179 | ||
100 | mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n", | 180 | mlog_entry("(inode=%p, block=(%llu), nr=(%d), flags=%d)\n", |
101 | (unsigned long long)block, nr, flags, inode); | 181 | inode, (unsigned long long)block, nr, flags); |
102 | 182 | ||
183 | BUG_ON(!inode); | ||
103 | BUG_ON((flags & OCFS2_BH_READAHEAD) && | 184 | BUG_ON((flags & OCFS2_BH_READAHEAD) && |
104 | (!inode || !(flags & OCFS2_BH_CACHED))); | 185 | (flags & OCFS2_BH_IGNORE_CACHE)); |
105 | 186 | ||
106 | if (osb == NULL || osb->sb == NULL || bhs == NULL) { | 187 | if (bhs == NULL) { |
107 | status = -EINVAL; | 188 | status = -EINVAL; |
108 | mlog_errno(status); | 189 | mlog_errno(status); |
109 | goto bail; | 190 | goto bail; |
@@ -122,26 +203,19 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
122 | goto bail; | 203 | goto bail; |
123 | } | 204 | } |
124 | 205 | ||
125 | sb = osb->sb; | 206 | mutex_lock(&OCFS2_I(inode)->ip_io_mutex); |
126 | |||
127 | if (flags & OCFS2_BH_CACHED && !inode) | ||
128 | flags &= ~OCFS2_BH_CACHED; | ||
129 | |||
130 | if (inode) | ||
131 | mutex_lock(&OCFS2_I(inode)->ip_io_mutex); | ||
132 | for (i = 0 ; i < nr ; i++) { | 207 | for (i = 0 ; i < nr ; i++) { |
133 | if (bhs[i] == NULL) { | 208 | if (bhs[i] == NULL) { |
134 | bhs[i] = sb_getblk(sb, block++); | 209 | bhs[i] = sb_getblk(inode->i_sb, block++); |
135 | if (bhs[i] == NULL) { | 210 | if (bhs[i] == NULL) { |
136 | if (inode) | 211 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); |
137 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); | ||
138 | status = -EIO; | 212 | status = -EIO; |
139 | mlog_errno(status); | 213 | mlog_errno(status); |
140 | goto bail; | 214 | goto bail; |
141 | } | 215 | } |
142 | } | 216 | } |
143 | bh = bhs[i]; | 217 | bh = bhs[i]; |
144 | ignore_cache = 0; | 218 | ignore_cache = (flags & OCFS2_BH_IGNORE_CACHE); |
145 | 219 | ||
146 | /* There are three read-ahead cases here which we need to | 220 | /* There are three read-ahead cases here which we need to |
147 | * be concerned with. All three assume a buffer has | 221 | * be concerned with. All three assume a buffer has |
@@ -167,26 +241,27 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
167 | * before our is-it-in-flight check. | 241 | * before our is-it-in-flight check. |
168 | */ | 242 | */ |
169 | 243 | ||
170 | if (flags & OCFS2_BH_CACHED && | 244 | if (!ignore_cache && !ocfs2_buffer_uptodate(inode, bh)) { |
171 | !ocfs2_buffer_uptodate(inode, bh)) { | ||
172 | mlog(ML_UPTODATE, | 245 | mlog(ML_UPTODATE, |
173 | "bh (%llu), inode %llu not uptodate\n", | 246 | "bh (%llu), inode %llu not uptodate\n", |
174 | (unsigned long long)bh->b_blocknr, | 247 | (unsigned long long)bh->b_blocknr, |
175 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 248 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
249 | /* We're using ignore_cache here to say | ||
250 | * "go to disk" */ | ||
176 | ignore_cache = 1; | 251 | ignore_cache = 1; |
177 | } | 252 | } |
178 | 253 | ||
179 | /* XXX: Can we ever get this and *not* have the cached | 254 | /* XXX: Can we ever get this and *not* have the cached |
180 | * flag set? */ | 255 | * flag set? */ |
181 | if (buffer_jbd(bh)) { | 256 | if (buffer_jbd(bh)) { |
182 | if (!(flags & OCFS2_BH_CACHED) || ignore_cache) | 257 | if (ignore_cache) |
183 | mlog(ML_BH_IO, "trying to sync read a jbd " | 258 | mlog(ML_BH_IO, "trying to sync read a jbd " |
184 | "managed bh (blocknr = %llu)\n", | 259 | "managed bh (blocknr = %llu)\n", |
185 | (unsigned long long)bh->b_blocknr); | 260 | (unsigned long long)bh->b_blocknr); |
186 | continue; | 261 | continue; |
187 | } | 262 | } |
188 | 263 | ||
189 | if (!(flags & OCFS2_BH_CACHED) || ignore_cache) { | 264 | if (ignore_cache) { |
190 | if (buffer_dirty(bh)) { | 265 | if (buffer_dirty(bh)) { |
191 | /* This should probably be a BUG, or | 266 | /* This should probably be a BUG, or |
192 | * at least return an error. */ | 267 | * at least return an error. */ |
@@ -221,7 +296,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
221 | * previously read-ahead buffer may have | 296 | * previously read-ahead buffer may have |
222 | * completed I/O while we were waiting for the | 297 | * completed I/O while we were waiting for the |
223 | * buffer lock. */ | 298 | * buffer lock. */ |
224 | if ((flags & OCFS2_BH_CACHED) | 299 | if (!(flags & OCFS2_BH_IGNORE_CACHE) |
225 | && !(flags & OCFS2_BH_READAHEAD) | 300 | && !(flags & OCFS2_BH_READAHEAD) |
226 | && ocfs2_buffer_uptodate(inode, bh)) { | 301 | && ocfs2_buffer_uptodate(inode, bh)) { |
227 | unlock_buffer(bh); | 302 | unlock_buffer(bh); |
@@ -265,15 +340,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
265 | /* Always set the buffer in the cache, even if it was | 340 | /* Always set the buffer in the cache, even if it was |
266 | * a forced read, or read-ahead which hasn't yet | 341 | * a forced read, or read-ahead which hasn't yet |
267 | * completed. */ | 342 | * completed. */ |
268 | if (inode) | 343 | ocfs2_set_buffer_uptodate(inode, bh); |
269 | ocfs2_set_buffer_uptodate(inode, bh); | ||
270 | } | 344 | } |
271 | if (inode) | 345 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); |
272 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); | ||
273 | 346 | ||
274 | mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", | 347 | mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", |
275 | (unsigned long long)block, nr, | 348 | (unsigned long long)block, nr, |
276 | (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags); | 349 | ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes", |
350 | flags); | ||
277 | 351 | ||
278 | bail: | 352 | bail: |
279 | 353 | ||
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h index c2e78614c3e5..75e1dcb1ade7 100644 --- a/fs/ocfs2/buffer_head_io.h +++ b/fs/ocfs2/buffer_head_io.h | |||
@@ -31,31 +31,29 @@ | |||
31 | void ocfs2_end_buffer_io_sync(struct buffer_head *bh, | 31 | void ocfs2_end_buffer_io_sync(struct buffer_head *bh, |
32 | int uptodate); | 32 | int uptodate); |
33 | 33 | ||
34 | static inline int ocfs2_read_block(struct ocfs2_super *osb, | 34 | static inline int ocfs2_read_block(struct inode *inode, |
35 | u64 off, | 35 | u64 off, |
36 | struct buffer_head **bh, | 36 | struct buffer_head **bh); |
37 | int flags, | ||
38 | struct inode *inode); | ||
39 | 37 | ||
40 | int ocfs2_write_block(struct ocfs2_super *osb, | 38 | int ocfs2_write_block(struct ocfs2_super *osb, |
41 | struct buffer_head *bh, | 39 | struct buffer_head *bh, |
42 | struct inode *inode); | 40 | struct inode *inode); |
43 | int ocfs2_read_blocks(struct ocfs2_super *osb, | 41 | int ocfs2_read_blocks(struct inode *inode, |
44 | u64 block, | 42 | u64 block, |
45 | int nr, | 43 | int nr, |
46 | struct buffer_head *bhs[], | 44 | struct buffer_head *bhs[], |
47 | int flags, | 45 | int flags); |
48 | struct inode *inode); | 46 | int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, |
47 | unsigned int nr, struct buffer_head *bhs[]); | ||
49 | 48 | ||
50 | int ocfs2_write_super_or_backup(struct ocfs2_super *osb, | 49 | int ocfs2_write_super_or_backup(struct ocfs2_super *osb, |
51 | struct buffer_head *bh); | 50 | struct buffer_head *bh); |
52 | 51 | ||
53 | #define OCFS2_BH_CACHED 1 | 52 | #define OCFS2_BH_IGNORE_CACHE 1 |
54 | #define OCFS2_BH_READAHEAD 8 | 53 | #define OCFS2_BH_READAHEAD 8 |
55 | 54 | ||
56 | static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off, | 55 | static inline int ocfs2_read_block(struct inode *inode, u64 off, |
57 | struct buffer_head **bh, int flags, | 56 | struct buffer_head **bh) |
58 | struct inode *inode) | ||
59 | { | 57 | { |
60 | int status = 0; | 58 | int status = 0; |
61 | 59 | ||
@@ -65,8 +63,7 @@ static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off, | |||
65 | goto bail; | 63 | goto bail; |
66 | } | 64 | } |
67 | 65 | ||
68 | status = ocfs2_read_blocks(osb, off, 1, bh, | 66 | status = ocfs2_read_blocks(inode, off, 1, bh, 0); |
69 | flags, inode); | ||
70 | 67 | ||
71 | bail: | 68 | bail: |
72 | return status; | 69 | return status; |
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c index 23c732f27529..d8a0cb92cef6 100644 --- a/fs/ocfs2/cluster/masklog.c +++ b/fs/ocfs2/cluster/masklog.c | |||
@@ -109,6 +109,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = { | |||
109 | define_mask(CONN), | 109 | define_mask(CONN), |
110 | define_mask(QUORUM), | 110 | define_mask(QUORUM), |
111 | define_mask(EXPORT), | 111 | define_mask(EXPORT), |
112 | define_mask(XATTR), | ||
112 | define_mask(ERROR), | 113 | define_mask(ERROR), |
113 | define_mask(NOTICE), | 114 | define_mask(NOTICE), |
114 | define_mask(KTHREAD), | 115 | define_mask(KTHREAD), |
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 597e064bb94f..57670c680471 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h | |||
@@ -112,6 +112,7 @@ | |||
112 | #define ML_CONN 0x0000000004000000ULL /* net connection management */ | 112 | #define ML_CONN 0x0000000004000000ULL /* net connection management */ |
113 | #define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */ | 113 | #define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */ |
114 | #define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */ | 114 | #define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */ |
115 | #define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ | ||
115 | /* bits that are infrequently given and frequently matched in the high word */ | 116 | /* bits that are infrequently given and frequently matched in the high word */ |
116 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ | 117 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ |
117 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ | 118 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ |
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index d8bfa0eb41b2..52276c02f710 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c | |||
@@ -138,20 +138,20 @@ static int nst_seq_show(struct seq_file *seq, void *v) | |||
138 | " message id: %d\n" | 138 | " message id: %d\n" |
139 | " message type: %u\n" | 139 | " message type: %u\n" |
140 | " message key: 0x%08x\n" | 140 | " message key: 0x%08x\n" |
141 | " sock acquiry: %lu.%lu\n" | 141 | " sock acquiry: %lu.%ld\n" |
142 | " send start: %lu.%lu\n" | 142 | " send start: %lu.%ld\n" |
143 | " wait start: %lu.%lu\n", | 143 | " wait start: %lu.%ld\n", |
144 | nst, (unsigned long)nst->st_task->pid, | 144 | nst, (unsigned long)nst->st_task->pid, |
145 | (unsigned long)nst->st_task->tgid, | 145 | (unsigned long)nst->st_task->tgid, |
146 | nst->st_task->comm, nst->st_node, | 146 | nst->st_task->comm, nst->st_node, |
147 | nst->st_sc, nst->st_id, nst->st_msg_type, | 147 | nst->st_sc, nst->st_id, nst->st_msg_type, |
148 | nst->st_msg_key, | 148 | nst->st_msg_key, |
149 | nst->st_sock_time.tv_sec, | 149 | nst->st_sock_time.tv_sec, |
150 | (unsigned long)nst->st_sock_time.tv_usec, | 150 | (long)nst->st_sock_time.tv_usec, |
151 | nst->st_send_time.tv_sec, | 151 | nst->st_send_time.tv_sec, |
152 | (unsigned long)nst->st_send_time.tv_usec, | 152 | (long)nst->st_send_time.tv_usec, |
153 | nst->st_status_time.tv_sec, | 153 | nst->st_status_time.tv_sec, |
154 | nst->st_status_time.tv_usec); | 154 | (long)nst->st_status_time.tv_usec); |
155 | } | 155 | } |
156 | 156 | ||
157 | spin_unlock(&o2net_debug_lock); | 157 | spin_unlock(&o2net_debug_lock); |
@@ -276,7 +276,7 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
276 | return sc; /* unused, just needs to be null when done */ | 276 | return sc; /* unused, just needs to be null when done */ |
277 | } | 277 | } |
278 | 278 | ||
279 | #define TV_SEC_USEC(TV) TV.tv_sec, (unsigned long)TV.tv_usec | 279 | #define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec |
280 | 280 | ||
281 | static int sc_seq_show(struct seq_file *seq, void *v) | 281 | static int sc_seq_show(struct seq_file *seq, void *v) |
282 | { | 282 | { |
@@ -309,12 +309,12 @@ static int sc_seq_show(struct seq_file *seq, void *v) | |||
309 | " remote node: %s\n" | 309 | " remote node: %s\n" |
310 | " page off: %zu\n" | 310 | " page off: %zu\n" |
311 | " handshake ok: %u\n" | 311 | " handshake ok: %u\n" |
312 | " timer: %lu.%lu\n" | 312 | " timer: %lu.%ld\n" |
313 | " data ready: %lu.%lu\n" | 313 | " data ready: %lu.%ld\n" |
314 | " advance start: %lu.%lu\n" | 314 | " advance start: %lu.%ld\n" |
315 | " advance stop: %lu.%lu\n" | 315 | " advance stop: %lu.%ld\n" |
316 | " func start: %lu.%lu\n" | 316 | " func start: %lu.%ld\n" |
317 | " func stop: %lu.%lu\n" | 317 | " func stop: %lu.%ld\n" |
318 | " func key: %u\n" | 318 | " func key: %u\n" |
319 | " func type: %u\n", | 319 | " func type: %u\n", |
320 | sc, | 320 | sc, |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index a27d61581bd6..2bcf706d9dd3 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -143,8 +143,8 @@ static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); | |||
143 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); | 143 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); |
144 | 144 | ||
145 | #ifdef CONFIG_DEBUG_FS | 145 | #ifdef CONFIG_DEBUG_FS |
146 | void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | 146 | static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, |
147 | u32 msgkey, struct task_struct *task, u8 node) | 147 | u32 msgkey, struct task_struct *task, u8 node) |
148 | { | 148 | { |
149 | INIT_LIST_HEAD(&nst->st_net_debug_item); | 149 | INIT_LIST_HEAD(&nst->st_net_debug_item); |
150 | nst->st_task = task; | 150 | nst->st_task = task; |
@@ -153,31 +153,61 @@ void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | |||
153 | nst->st_node = node; | 153 | nst->st_node = node; |
154 | } | 154 | } |
155 | 155 | ||
156 | void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) | 156 | static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) |
157 | { | 157 | { |
158 | do_gettimeofday(&nst->st_sock_time); | 158 | do_gettimeofday(&nst->st_sock_time); |
159 | } | 159 | } |
160 | 160 | ||
161 | void o2net_set_nst_send_time(struct o2net_send_tracking *nst) | 161 | static void o2net_set_nst_send_time(struct o2net_send_tracking *nst) |
162 | { | 162 | { |
163 | do_gettimeofday(&nst->st_send_time); | 163 | do_gettimeofday(&nst->st_send_time); |
164 | } | 164 | } |
165 | 165 | ||
166 | void o2net_set_nst_status_time(struct o2net_send_tracking *nst) | 166 | static void o2net_set_nst_status_time(struct o2net_send_tracking *nst) |
167 | { | 167 | { |
168 | do_gettimeofday(&nst->st_status_time); | 168 | do_gettimeofday(&nst->st_status_time); |
169 | } | 169 | } |
170 | 170 | ||
171 | void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | 171 | static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, |
172 | struct o2net_sock_container *sc) | 172 | struct o2net_sock_container *sc) |
173 | { | 173 | { |
174 | nst->st_sc = sc; | 174 | nst->st_sc = sc; |
175 | } | 175 | } |
176 | 176 | ||
177 | void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) | 177 | static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) |
178 | { | 178 | { |
179 | nst->st_id = msg_id; | 179 | nst->st_id = msg_id; |
180 | } | 180 | } |
181 | |||
182 | #else /* CONFIG_DEBUG_FS */ | ||
183 | |||
184 | static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | ||
185 | u32 msgkey, struct task_struct *task, u8 node) | ||
186 | { | ||
187 | } | ||
188 | |||
189 | static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) | ||
190 | { | ||
191 | } | ||
192 | |||
193 | static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) | ||
194 | { | ||
195 | } | ||
196 | |||
197 | static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) | ||
198 | { | ||
199 | } | ||
200 | |||
201 | static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | ||
202 | struct o2net_sock_container *sc) | ||
203 | { | ||
204 | } | ||
205 | |||
206 | static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, | ||
207 | u32 msg_id) | ||
208 | { | ||
209 | } | ||
210 | |||
181 | #endif /* CONFIG_DEBUG_FS */ | 211 | #endif /* CONFIG_DEBUG_FS */ |
182 | 212 | ||
183 | static inline int o2net_reconnect_delay(void) | 213 | static inline int o2net_reconnect_delay(void) |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 18307ff81b77..8d58cfe410b1 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
@@ -224,42 +224,10 @@ struct o2net_send_tracking { | |||
224 | struct timeval st_send_time; | 224 | struct timeval st_send_time; |
225 | struct timeval st_status_time; | 225 | struct timeval st_status_time; |
226 | }; | 226 | }; |
227 | |||
228 | void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | ||
229 | u32 msgkey, struct task_struct *task, u8 node); | ||
230 | void o2net_set_nst_sock_time(struct o2net_send_tracking *nst); | ||
231 | void o2net_set_nst_send_time(struct o2net_send_tracking *nst); | ||
232 | void o2net_set_nst_status_time(struct o2net_send_tracking *nst); | ||
233 | void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | ||
234 | struct o2net_sock_container *sc); | ||
235 | void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id); | ||
236 | |||
237 | #else | 227 | #else |
238 | struct o2net_send_tracking { | 228 | struct o2net_send_tracking { |
239 | u32 dummy; | 229 | u32 dummy; |
240 | }; | 230 | }; |
241 | |||
242 | static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | ||
243 | u32 msgkey, struct task_struct *task, u8 node) | ||
244 | { | ||
245 | } | ||
246 | static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) | ||
247 | { | ||
248 | } | ||
249 | static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) | ||
250 | { | ||
251 | } | ||
252 | static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) | ||
253 | { | ||
254 | } | ||
255 | static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | ||
256 | struct o2net_sock_container *sc) | ||
257 | { | ||
258 | } | ||
259 | static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, | ||
260 | u32 msg_id) | ||
261 | { | ||
262 | } | ||
263 | #endif /* CONFIG_DEBUG_FS */ | 231 | #endif /* CONFIG_DEBUG_FS */ |
264 | 232 | ||
265 | #endif /* O2CLUSTER_TCP_INTERNAL_H */ | 233 | #endif /* O2CLUSTER_TCP_INTERNAL_H */ |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 8a1875848080..026e6eb85187 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -82,6 +82,49 @@ static int ocfs2_do_extend_dir(struct super_block *sb, | |||
82 | struct ocfs2_alloc_context *meta_ac, | 82 | struct ocfs2_alloc_context *meta_ac, |
83 | struct buffer_head **new_bh); | 83 | struct buffer_head **new_bh); |
84 | 84 | ||
85 | static struct buffer_head *ocfs2_bread(struct inode *inode, | ||
86 | int block, int *err, int reada) | ||
87 | { | ||
88 | struct buffer_head *bh = NULL; | ||
89 | int tmperr; | ||
90 | u64 p_blkno; | ||
91 | int readflags = 0; | ||
92 | |||
93 | if (reada) | ||
94 | readflags |= OCFS2_BH_READAHEAD; | ||
95 | |||
96 | if (((u64)block << inode->i_sb->s_blocksize_bits) >= | ||
97 | i_size_read(inode)) { | ||
98 | BUG_ON(!reada); | ||
99 | return NULL; | ||
100 | } | ||
101 | |||
102 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
103 | tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, | ||
104 | NULL); | ||
105 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
106 | if (tmperr < 0) { | ||
107 | mlog_errno(tmperr); | ||
108 | goto fail; | ||
109 | } | ||
110 | |||
111 | tmperr = ocfs2_read_blocks(inode, p_blkno, 1, &bh, readflags); | ||
112 | if (tmperr < 0) | ||
113 | goto fail; | ||
114 | |||
115 | tmperr = 0; | ||
116 | |||
117 | *err = 0; | ||
118 | return bh; | ||
119 | |||
120 | fail: | ||
121 | brelse(bh); | ||
122 | bh = NULL; | ||
123 | |||
124 | *err = -EIO; | ||
125 | return NULL; | ||
126 | } | ||
127 | |||
85 | /* | 128 | /* |
86 | * bh passed here can be an inode block or a dir data block, depending | 129 | * bh passed here can be an inode block or a dir data block, depending |
87 | * on the inode inline data flag. | 130 | * on the inode inline data flag. |
@@ -188,8 +231,7 @@ static struct buffer_head *ocfs2_find_entry_id(const char *name, | |||
188 | struct ocfs2_dinode *di; | 231 | struct ocfs2_dinode *di; |
189 | struct ocfs2_inline_data *data; | 232 | struct ocfs2_inline_data *data; |
190 | 233 | ||
191 | ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno, | 234 | ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh); |
192 | &di_bh, OCFS2_BH_CACHED, dir); | ||
193 | if (ret) { | 235 | if (ret) { |
194 | mlog_errno(ret); | 236 | mlog_errno(ret); |
195 | goto out; | 237 | goto out; |
@@ -260,14 +302,13 @@ restart: | |||
260 | } | 302 | } |
261 | if ((bh = bh_use[ra_ptr++]) == NULL) | 303 | if ((bh = bh_use[ra_ptr++]) == NULL) |
262 | goto next; | 304 | goto next; |
263 | wait_on_buffer(bh); | 305 | if (ocfs2_read_block(dir, block, &bh)) { |
264 | if (!buffer_uptodate(bh)) { | 306 | /* read error, skip block & hope for the best. |
265 | /* read error, skip block & hope for the best */ | 307 | * ocfs2_read_block() has released the bh. */ |
266 | ocfs2_error(dir->i_sb, "reading directory %llu, " | 308 | ocfs2_error(dir->i_sb, "reading directory %llu, " |
267 | "offset %lu\n", | 309 | "offset %lu\n", |
268 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | 310 | (unsigned long long)OCFS2_I(dir)->ip_blkno, |
269 | block); | 311 | block); |
270 | brelse(bh); | ||
271 | goto next; | 312 | goto next; |
272 | } | 313 | } |
273 | i = ocfs2_search_dirblock(bh, dir, name, namelen, | 314 | i = ocfs2_search_dirblock(bh, dir, name, namelen, |
@@ -417,8 +458,7 @@ static inline int ocfs2_delete_entry_id(handle_t *handle, | |||
417 | struct ocfs2_dinode *di; | 458 | struct ocfs2_dinode *di; |
418 | struct ocfs2_inline_data *data; | 459 | struct ocfs2_inline_data *data; |
419 | 460 | ||
420 | ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno, | 461 | ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh); |
421 | &di_bh, OCFS2_BH_CACHED, dir); | ||
422 | if (ret) { | 462 | if (ret) { |
423 | mlog_errno(ret); | 463 | mlog_errno(ret); |
424 | goto out; | 464 | goto out; |
@@ -596,8 +636,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode, | |||
596 | struct ocfs2_inline_data *data; | 636 | struct ocfs2_inline_data *data; |
597 | struct ocfs2_dir_entry *de; | 637 | struct ocfs2_dir_entry *de; |
598 | 638 | ||
599 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, | 639 | ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); |
600 | &di_bh, OCFS2_BH_CACHED, inode); | ||
601 | if (ret) { | 640 | if (ret) { |
602 | mlog(ML_ERROR, "Unable to read inode block for dir %llu\n", | 641 | mlog(ML_ERROR, "Unable to read inode block for dir %llu\n", |
603 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 642 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
@@ -716,8 +755,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, | |||
716 | for (i = ra_sectors >> (sb->s_blocksize_bits - 9); | 755 | for (i = ra_sectors >> (sb->s_blocksize_bits - 9); |
717 | i > 0; i--) { | 756 | i > 0; i--) { |
718 | tmp = ocfs2_bread(inode, ++blk, &err, 1); | 757 | tmp = ocfs2_bread(inode, ++blk, &err, 1); |
719 | if (tmp) | 758 | brelse(tmp); |
720 | brelse(tmp); | ||
721 | } | 759 | } |
722 | last_ra_blk = blk; | 760 | last_ra_blk = blk; |
723 | ra_sectors = 8; | 761 | ra_sectors = 8; |
@@ -899,10 +937,8 @@ int ocfs2_find_files_on_disk(const char *name, | |||
899 | leave: | 937 | leave: |
900 | if (status < 0) { | 938 | if (status < 0) { |
901 | *dirent = NULL; | 939 | *dirent = NULL; |
902 | if (*dirent_bh) { | 940 | brelse(*dirent_bh); |
903 | brelse(*dirent_bh); | 941 | *dirent_bh = NULL; |
904 | *dirent_bh = NULL; | ||
905 | } | ||
906 | } | 942 | } |
907 | 943 | ||
908 | mlog_exit(status); | 944 | mlog_exit(status); |
@@ -951,8 +987,7 @@ int ocfs2_check_dir_for_entry(struct inode *dir, | |||
951 | 987 | ||
952 | ret = 0; | 988 | ret = 0; |
953 | bail: | 989 | bail: |
954 | if (dirent_bh) | 990 | brelse(dirent_bh); |
955 | brelse(dirent_bh); | ||
956 | 991 | ||
957 | mlog_exit(ret); | 992 | mlog_exit(ret); |
958 | return ret; | 993 | return ret; |
@@ -1127,8 +1162,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, | |||
1127 | 1162 | ||
1128 | status = 0; | 1163 | status = 0; |
1129 | bail: | 1164 | bail: |
1130 | if (new_bh) | 1165 | brelse(new_bh); |
1131 | brelse(new_bh); | ||
1132 | 1166 | ||
1133 | mlog_exit(status); | 1167 | mlog_exit(status); |
1134 | return status; | 1168 | return status; |
@@ -1192,6 +1226,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1192 | struct buffer_head *dirdata_bh = NULL; | 1226 | struct buffer_head *dirdata_bh = NULL; |
1193 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 1227 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; |
1194 | handle_t *handle; | 1228 | handle_t *handle; |
1229 | struct ocfs2_extent_tree et; | ||
1230 | |||
1231 | ocfs2_init_dinode_extent_tree(&et, dir, di_bh); | ||
1195 | 1232 | ||
1196 | alloc = ocfs2_clusters_for_bytes(sb, bytes); | 1233 | alloc = ocfs2_clusters_for_bytes(sb, bytes); |
1197 | 1234 | ||
@@ -1300,19 +1337,24 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1300 | di->i_size = cpu_to_le64(sb->s_blocksize); | 1337 | di->i_size = cpu_to_le64(sb->s_blocksize); |
1301 | di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); | 1338 | di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); |
1302 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); | 1339 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); |
1303 | dir->i_blocks = ocfs2_inode_sector_count(dir); | ||
1304 | 1340 | ||
1305 | /* | 1341 | /* |
1306 | * This should never fail as our extent list is empty and all | 1342 | * This should never fail as our extent list is empty and all |
1307 | * related blocks have been journaled already. | 1343 | * related blocks have been journaled already. |
1308 | */ | 1344 | */ |
1309 | ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 0, blkno, len, 0, | 1345 | ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, blkno, len, |
1310 | NULL); | 1346 | 0, NULL); |
1311 | if (ret) { | 1347 | if (ret) { |
1312 | mlog_errno(ret); | 1348 | mlog_errno(ret); |
1313 | goto out; | 1349 | goto out_commit; |
1314 | } | 1350 | } |
1315 | 1351 | ||
1352 | /* | ||
1353 | * Set i_blocks after the extent insert for the most up to | ||
1354 | * date ip_clusters value. | ||
1355 | */ | ||
1356 | dir->i_blocks = ocfs2_inode_sector_count(dir); | ||
1357 | |||
1316 | ret = ocfs2_journal_dirty(handle, di_bh); | 1358 | ret = ocfs2_journal_dirty(handle, di_bh); |
1317 | if (ret) { | 1359 | if (ret) { |
1318 | mlog_errno(ret); | 1360 | mlog_errno(ret); |
@@ -1332,11 +1374,11 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1332 | } | 1374 | } |
1333 | blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off); | 1375 | blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off); |
1334 | 1376 | ||
1335 | ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 1, blkno, | 1377 | ret = ocfs2_insert_extent(osb, handle, dir, &et, 1, |
1336 | len, 0, NULL); | 1378 | blkno, len, 0, NULL); |
1337 | if (ret) { | 1379 | if (ret) { |
1338 | mlog_errno(ret); | 1380 | mlog_errno(ret); |
1339 | goto out; | 1381 | goto out_commit; |
1340 | } | 1382 | } |
1341 | } | 1383 | } |
1342 | 1384 | ||
@@ -1378,9 +1420,9 @@ static int ocfs2_do_extend_dir(struct super_block *sb, | |||
1378 | if (extend) { | 1420 | if (extend) { |
1379 | u32 offset = OCFS2_I(dir)->ip_clusters; | 1421 | u32 offset = OCFS2_I(dir)->ip_clusters; |
1380 | 1422 | ||
1381 | status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset, | 1423 | status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset, |
1382 | 1, 0, parent_fe_bh, handle, | 1424 | 1, 0, parent_fe_bh, handle, |
1383 | data_ac, meta_ac, NULL); | 1425 | data_ac, meta_ac, NULL); |
1384 | BUG_ON(status == -EAGAIN); | 1426 | BUG_ON(status == -EAGAIN); |
1385 | if (status < 0) { | 1427 | if (status < 0) { |
1386 | mlog_errno(status); | 1428 | mlog_errno(status); |
@@ -1425,12 +1467,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
1425 | int credits, num_free_extents, drop_alloc_sem = 0; | 1467 | int credits, num_free_extents, drop_alloc_sem = 0; |
1426 | loff_t dir_i_size; | 1468 | loff_t dir_i_size; |
1427 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; | 1469 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; |
1470 | struct ocfs2_extent_list *el = &fe->id2.i_list; | ||
1428 | struct ocfs2_alloc_context *data_ac = NULL; | 1471 | struct ocfs2_alloc_context *data_ac = NULL; |
1429 | struct ocfs2_alloc_context *meta_ac = NULL; | 1472 | struct ocfs2_alloc_context *meta_ac = NULL; |
1430 | handle_t *handle = NULL; | 1473 | handle_t *handle = NULL; |
1431 | struct buffer_head *new_bh = NULL; | 1474 | struct buffer_head *new_bh = NULL; |
1432 | struct ocfs2_dir_entry * de; | 1475 | struct ocfs2_dir_entry * de; |
1433 | struct super_block *sb = osb->sb; | 1476 | struct super_block *sb = osb->sb; |
1477 | struct ocfs2_extent_tree et; | ||
1434 | 1478 | ||
1435 | mlog_entry_void(); | 1479 | mlog_entry_void(); |
1436 | 1480 | ||
@@ -1474,7 +1518,8 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
1474 | spin_lock(&OCFS2_I(dir)->ip_lock); | 1518 | spin_lock(&OCFS2_I(dir)->ip_lock); |
1475 | if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) { | 1519 | if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) { |
1476 | spin_unlock(&OCFS2_I(dir)->ip_lock); | 1520 | spin_unlock(&OCFS2_I(dir)->ip_lock); |
1477 | num_free_extents = ocfs2_num_free_extents(osb, dir, fe); | 1521 | ocfs2_init_dinode_extent_tree(&et, dir, parent_fe_bh); |
1522 | num_free_extents = ocfs2_num_free_extents(osb, dir, &et); | ||
1478 | if (num_free_extents < 0) { | 1523 | if (num_free_extents < 0) { |
1479 | status = num_free_extents; | 1524 | status = num_free_extents; |
1480 | mlog_errno(status); | 1525 | mlog_errno(status); |
@@ -1482,7 +1527,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
1482 | } | 1527 | } |
1483 | 1528 | ||
1484 | if (!num_free_extents) { | 1529 | if (!num_free_extents) { |
1485 | status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac); | 1530 | status = ocfs2_reserve_new_metadata(osb, el, &meta_ac); |
1486 | if (status < 0) { | 1531 | if (status < 0) { |
1487 | if (status != -ENOSPC) | 1532 | if (status != -ENOSPC) |
1488 | mlog_errno(status); | 1533 | mlog_errno(status); |
@@ -1497,7 +1542,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
1497 | goto bail; | 1542 | goto bail; |
1498 | } | 1543 | } |
1499 | 1544 | ||
1500 | credits = ocfs2_calc_extend_credits(sb, fe, 1); | 1545 | credits = ocfs2_calc_extend_credits(sb, el, 1); |
1501 | } else { | 1546 | } else { |
1502 | spin_unlock(&OCFS2_I(dir)->ip_lock); | 1547 | spin_unlock(&OCFS2_I(dir)->ip_lock); |
1503 | credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; | 1548 | credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; |
@@ -1563,8 +1608,7 @@ bail: | |||
1563 | if (meta_ac) | 1608 | if (meta_ac) |
1564 | ocfs2_free_alloc_context(meta_ac); | 1609 | ocfs2_free_alloc_context(meta_ac); |
1565 | 1610 | ||
1566 | if (new_bh) | 1611 | brelse(new_bh); |
1567 | brelse(new_bh); | ||
1568 | 1612 | ||
1569 | mlog_exit(status); | 1613 | mlog_exit(status); |
1570 | return status; | 1614 | return status; |
@@ -1691,8 +1735,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, | |||
1691 | 1735 | ||
1692 | status = 0; | 1736 | status = 0; |
1693 | bail: | 1737 | bail: |
1694 | if (bh) | 1738 | brelse(bh); |
1695 | brelse(bh); | ||
1696 | 1739 | ||
1697 | mlog_exit(status); | 1740 | mlog_exit(status); |
1698 | return status; | 1741 | return status; |
@@ -1751,7 +1794,6 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, | |||
1751 | *ret_de_bh = bh; | 1794 | *ret_de_bh = bh; |
1752 | bh = NULL; | 1795 | bh = NULL; |
1753 | out: | 1796 | out: |
1754 | if (bh) | 1797 | brelse(bh); |
1755 | brelse(bh); | ||
1756 | return ret; | 1798 | return ret; |
1757 | } | 1799 | } |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index eae3d643a5e4..ec684426034b 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -2024,8 +2024,7 @@ static int ocfs2_inode_lock_update(struct inode *inode, | |||
2024 | } else { | 2024 | } else { |
2025 | /* Boo, we have to go to disk. */ | 2025 | /* Boo, we have to go to disk. */ |
2026 | /* read bh, cast, ocfs2_refresh_inode */ | 2026 | /* read bh, cast, ocfs2_refresh_inode */ |
2027 | status = ocfs2_read_block(OCFS2_SB(inode->i_sb), oi->ip_blkno, | 2027 | status = ocfs2_read_block(inode, oi->ip_blkno, bh); |
2028 | bh, OCFS2_BH_CACHED, inode); | ||
2029 | if (status < 0) { | 2028 | if (status < 0) { |
2030 | mlog_errno(status); | 2029 | mlog_errno(status); |
2031 | goto bail_refresh; | 2030 | goto bail_refresh; |
@@ -2086,11 +2085,7 @@ static int ocfs2_assign_bh(struct inode *inode, | |||
2086 | return 0; | 2085 | return 0; |
2087 | } | 2086 | } |
2088 | 2087 | ||
2089 | status = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 2088 | status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, ret_bh); |
2090 | OCFS2_I(inode)->ip_blkno, | ||
2091 | ret_bh, | ||
2092 | OCFS2_BH_CACHED, | ||
2093 | inode); | ||
2094 | if (status < 0) | 2089 | if (status < 0) |
2095 | mlog_errno(status); | 2090 | mlog_errno(status); |
2096 | 2091 | ||
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index c58668a326fe..2baedac58234 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/fs.h> | 25 | #include <linux/fs.h> |
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/types.h> | 27 | #include <linux/types.h> |
28 | #include <linux/fiemap.h> | ||
28 | 29 | ||
29 | #define MLOG_MASK_PREFIX ML_EXTENT_MAP | 30 | #define MLOG_MASK_PREFIX ML_EXTENT_MAP |
30 | #include <cluster/masklog.h> | 31 | #include <cluster/masklog.h> |
@@ -32,6 +33,7 @@ | |||
32 | #include "ocfs2.h" | 33 | #include "ocfs2.h" |
33 | 34 | ||
34 | #include "alloc.h" | 35 | #include "alloc.h" |
36 | #include "dlmglue.h" | ||
35 | #include "extent_map.h" | 37 | #include "extent_map.h" |
36 | #include "inode.h" | 38 | #include "inode.h" |
37 | #include "super.h" | 39 | #include "super.h" |
@@ -282,6 +284,50 @@ out: | |||
282 | kfree(new_emi); | 284 | kfree(new_emi); |
283 | } | 285 | } |
284 | 286 | ||
287 | static int ocfs2_last_eb_is_empty(struct inode *inode, | ||
288 | struct ocfs2_dinode *di) | ||
289 | { | ||
290 | int ret, next_free; | ||
291 | u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk); | ||
292 | struct buffer_head *eb_bh = NULL; | ||
293 | struct ocfs2_extent_block *eb; | ||
294 | struct ocfs2_extent_list *el; | ||
295 | |||
296 | ret = ocfs2_read_block(inode, last_eb_blk, &eb_bh); | ||
297 | if (ret) { | ||
298 | mlog_errno(ret); | ||
299 | goto out; | ||
300 | } | ||
301 | |||
302 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
303 | el = &eb->h_list; | ||
304 | |||
305 | if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { | ||
306 | ret = -EROFS; | ||
307 | OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); | ||
308 | goto out; | ||
309 | } | ||
310 | |||
311 | if (el->l_tree_depth) { | ||
312 | ocfs2_error(inode->i_sb, | ||
313 | "Inode %lu has non zero tree depth in " | ||
314 | "leaf block %llu\n", inode->i_ino, | ||
315 | (unsigned long long)eb_bh->b_blocknr); | ||
316 | ret = -EROFS; | ||
317 | goto out; | ||
318 | } | ||
319 | |||
320 | next_free = le16_to_cpu(el->l_next_free_rec); | ||
321 | |||
322 | if (next_free == 0 || | ||
323 | (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) | ||
324 | ret = 1; | ||
325 | |||
326 | out: | ||
327 | brelse(eb_bh); | ||
328 | return ret; | ||
329 | } | ||
330 | |||
285 | /* | 331 | /* |
286 | * Return the 1st index within el which contains an extent start | 332 | * Return the 1st index within el which contains an extent start |
287 | * larger than v_cluster. | 333 | * larger than v_cluster. |
@@ -335,9 +381,9 @@ static int ocfs2_figure_hole_clusters(struct inode *inode, | |||
335 | if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) | 381 | if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) |
336 | goto no_more_extents; | 382 | goto no_more_extents; |
337 | 383 | ||
338 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 384 | ret = ocfs2_read_block(inode, |
339 | le64_to_cpu(eb->h_next_leaf_blk), | 385 | le64_to_cpu(eb->h_next_leaf_blk), |
340 | &next_eb_bh, OCFS2_BH_CACHED, inode); | 386 | &next_eb_bh); |
341 | if (ret) { | 387 | if (ret) { |
342 | mlog_errno(ret); | 388 | mlog_errno(ret); |
343 | goto out; | 389 | goto out; |
@@ -373,42 +419,28 @@ out: | |||
373 | return ret; | 419 | return ret; |
374 | } | 420 | } |
375 | 421 | ||
376 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | 422 | static int ocfs2_get_clusters_nocache(struct inode *inode, |
377 | u32 *p_cluster, u32 *num_clusters, | 423 | struct buffer_head *di_bh, |
378 | unsigned int *extent_flags) | 424 | u32 v_cluster, unsigned int *hole_len, |
425 | struct ocfs2_extent_rec *ret_rec, | ||
426 | unsigned int *is_last) | ||
379 | { | 427 | { |
380 | int ret, i; | 428 | int i, ret, tree_height, len; |
381 | unsigned int flags = 0; | ||
382 | struct buffer_head *di_bh = NULL; | ||
383 | struct buffer_head *eb_bh = NULL; | ||
384 | struct ocfs2_dinode *di; | 429 | struct ocfs2_dinode *di; |
385 | struct ocfs2_extent_block *eb; | 430 | struct ocfs2_extent_block *uninitialized_var(eb); |
386 | struct ocfs2_extent_list *el; | 431 | struct ocfs2_extent_list *el; |
387 | struct ocfs2_extent_rec *rec; | 432 | struct ocfs2_extent_rec *rec; |
388 | u32 coff; | 433 | struct buffer_head *eb_bh = NULL; |
389 | |||
390 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
391 | ret = -ERANGE; | ||
392 | mlog_errno(ret); | ||
393 | goto out; | ||
394 | } | ||
395 | |||
396 | ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, | ||
397 | num_clusters, extent_flags); | ||
398 | if (ret == 0) | ||
399 | goto out; | ||
400 | 434 | ||
401 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, | 435 | memset(ret_rec, 0, sizeof(*ret_rec)); |
402 | &di_bh, OCFS2_BH_CACHED, inode); | 436 | if (is_last) |
403 | if (ret) { | 437 | *is_last = 0; |
404 | mlog_errno(ret); | ||
405 | goto out; | ||
406 | } | ||
407 | 438 | ||
408 | di = (struct ocfs2_dinode *) di_bh->b_data; | 439 | di = (struct ocfs2_dinode *) di_bh->b_data; |
409 | el = &di->id2.i_list; | 440 | el = &di->id2.i_list; |
441 | tree_height = le16_to_cpu(el->l_tree_depth); | ||
410 | 442 | ||
411 | if (el->l_tree_depth) { | 443 | if (tree_height > 0) { |
412 | ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); | 444 | ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); |
413 | if (ret) { | 445 | if (ret) { |
414 | mlog_errno(ret); | 446 | mlog_errno(ret); |
@@ -431,46 +463,202 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | |||
431 | i = ocfs2_search_extent_list(el, v_cluster); | 463 | i = ocfs2_search_extent_list(el, v_cluster); |
432 | if (i == -1) { | 464 | if (i == -1) { |
433 | /* | 465 | /* |
434 | * A hole was found. Return some canned values that | 466 | * Holes can be larger than the maximum size of an |
435 | * callers can key on. If asked for, num_clusters will | 467 | * extent, so we return their lengths in a seperate |
436 | * be populated with the size of the hole. | 468 | * field. |
437 | */ | 469 | */ |
438 | *p_cluster = 0; | 470 | if (hole_len) { |
439 | if (num_clusters) { | ||
440 | ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, | 471 | ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, |
441 | v_cluster, | 472 | v_cluster, &len); |
442 | num_clusters); | ||
443 | if (ret) { | 473 | if (ret) { |
444 | mlog_errno(ret); | 474 | mlog_errno(ret); |
445 | goto out; | 475 | goto out; |
446 | } | 476 | } |
477 | |||
478 | *hole_len = len; | ||
479 | } | ||
480 | goto out_hole; | ||
481 | } | ||
482 | |||
483 | rec = &el->l_recs[i]; | ||
484 | |||
485 | BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); | ||
486 | |||
487 | if (!rec->e_blkno) { | ||
488 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " | ||
489 | "record (%u, %u, 0)", inode->i_ino, | ||
490 | le32_to_cpu(rec->e_cpos), | ||
491 | ocfs2_rec_clusters(el, rec)); | ||
492 | ret = -EROFS; | ||
493 | goto out; | ||
494 | } | ||
495 | |||
496 | *ret_rec = *rec; | ||
497 | |||
498 | /* | ||
499 | * Checking for last extent is potentially expensive - we | ||
500 | * might have to look at the next leaf over to see if it's | ||
501 | * empty. | ||
502 | * | ||
503 | * The first two checks are to see whether the caller even | ||
504 | * cares for this information, and if the extent is at least | ||
505 | * the last in it's list. | ||
506 | * | ||
507 | * If those hold true, then the extent is last if any of the | ||
508 | * additional conditions hold true: | ||
509 | * - Extent list is in-inode | ||
510 | * - Extent list is right-most | ||
511 | * - Extent list is 2nd to rightmost, with empty right-most | ||
512 | */ | ||
513 | if (is_last) { | ||
514 | if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) { | ||
515 | if (tree_height == 0) | ||
516 | *is_last = 1; | ||
517 | else if (eb->h_blkno == di->i_last_eb_blk) | ||
518 | *is_last = 1; | ||
519 | else if (eb->h_next_leaf_blk == di->i_last_eb_blk) { | ||
520 | ret = ocfs2_last_eb_is_empty(inode, di); | ||
521 | if (ret < 0) { | ||
522 | mlog_errno(ret); | ||
523 | goto out; | ||
524 | } | ||
525 | if (ret == 1) | ||
526 | *is_last = 1; | ||
527 | } | ||
528 | } | ||
529 | } | ||
530 | |||
531 | out_hole: | ||
532 | ret = 0; | ||
533 | out: | ||
534 | brelse(eb_bh); | ||
535 | return ret; | ||
536 | } | ||
537 | |||
538 | static void ocfs2_relative_extent_offsets(struct super_block *sb, | ||
539 | u32 v_cluster, | ||
540 | struct ocfs2_extent_rec *rec, | ||
541 | u32 *p_cluster, u32 *num_clusters) | ||
542 | |||
543 | { | ||
544 | u32 coff = v_cluster - le32_to_cpu(rec->e_cpos); | ||
545 | |||
546 | *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno)); | ||
547 | *p_cluster = *p_cluster + coff; | ||
548 | |||
549 | if (num_clusters) | ||
550 | *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff; | ||
551 | } | ||
552 | |||
553 | int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, | ||
554 | u32 *p_cluster, u32 *num_clusters, | ||
555 | struct ocfs2_extent_list *el) | ||
556 | { | ||
557 | int ret = 0, i; | ||
558 | struct buffer_head *eb_bh = NULL; | ||
559 | struct ocfs2_extent_block *eb; | ||
560 | struct ocfs2_extent_rec *rec; | ||
561 | u32 coff; | ||
562 | |||
563 | if (el->l_tree_depth) { | ||
564 | ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); | ||
565 | if (ret) { | ||
566 | mlog_errno(ret); | ||
567 | goto out; | ||
447 | } | 568 | } |
569 | |||
570 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
571 | el = &eb->h_list; | ||
572 | |||
573 | if (el->l_tree_depth) { | ||
574 | ocfs2_error(inode->i_sb, | ||
575 | "Inode %lu has non zero tree depth in " | ||
576 | "xattr leaf block %llu\n", inode->i_ino, | ||
577 | (unsigned long long)eb_bh->b_blocknr); | ||
578 | ret = -EROFS; | ||
579 | goto out; | ||
580 | } | ||
581 | } | ||
582 | |||
583 | i = ocfs2_search_extent_list(el, v_cluster); | ||
584 | if (i == -1) { | ||
585 | ret = -EROFS; | ||
586 | mlog_errno(ret); | ||
587 | goto out; | ||
448 | } else { | 588 | } else { |
449 | rec = &el->l_recs[i]; | 589 | rec = &el->l_recs[i]; |
450 | |||
451 | BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); | 590 | BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); |
452 | 591 | ||
453 | if (!rec->e_blkno) { | 592 | if (!rec->e_blkno) { |
454 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " | 593 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " |
455 | "record (%u, %u, 0)", inode->i_ino, | 594 | "record (%u, %u, 0) in xattr", inode->i_ino, |
456 | le32_to_cpu(rec->e_cpos), | 595 | le32_to_cpu(rec->e_cpos), |
457 | ocfs2_rec_clusters(el, rec)); | 596 | ocfs2_rec_clusters(el, rec)); |
458 | ret = -EROFS; | 597 | ret = -EROFS; |
459 | goto out; | 598 | goto out; |
460 | } | 599 | } |
461 | |||
462 | coff = v_cluster - le32_to_cpu(rec->e_cpos); | 600 | coff = v_cluster - le32_to_cpu(rec->e_cpos); |
463 | |||
464 | *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, | 601 | *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, |
465 | le64_to_cpu(rec->e_blkno)); | 602 | le64_to_cpu(rec->e_blkno)); |
466 | *p_cluster = *p_cluster + coff; | 603 | *p_cluster = *p_cluster + coff; |
467 | |||
468 | if (num_clusters) | 604 | if (num_clusters) |
469 | *num_clusters = ocfs2_rec_clusters(el, rec) - coff; | 605 | *num_clusters = ocfs2_rec_clusters(el, rec) - coff; |
606 | } | ||
607 | out: | ||
608 | if (eb_bh) | ||
609 | brelse(eb_bh); | ||
610 | return ret; | ||
611 | } | ||
612 | |||
613 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | ||
614 | u32 *p_cluster, u32 *num_clusters, | ||
615 | unsigned int *extent_flags) | ||
616 | { | ||
617 | int ret; | ||
618 | unsigned int uninitialized_var(hole_len), flags = 0; | ||
619 | struct buffer_head *di_bh = NULL; | ||
620 | struct ocfs2_extent_rec rec; | ||
621 | |||
622 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
623 | ret = -ERANGE; | ||
624 | mlog_errno(ret); | ||
625 | goto out; | ||
626 | } | ||
627 | |||
628 | ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, | ||
629 | num_clusters, extent_flags); | ||
630 | if (ret == 0) | ||
631 | goto out; | ||
632 | |||
633 | ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); | ||
634 | if (ret) { | ||
635 | mlog_errno(ret); | ||
636 | goto out; | ||
637 | } | ||
470 | 638 | ||
471 | flags = rec->e_flags; | 639 | ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len, |
640 | &rec, NULL); | ||
641 | if (ret) { | ||
642 | mlog_errno(ret); | ||
643 | goto out; | ||
644 | } | ||
645 | |||
646 | if (rec.e_blkno == 0ULL) { | ||
647 | /* | ||
648 | * A hole was found. Return some canned values that | ||
649 | * callers can key on. If asked for, num_clusters will | ||
650 | * be populated with the size of the hole. | ||
651 | */ | ||
652 | *p_cluster = 0; | ||
653 | if (num_clusters) { | ||
654 | *num_clusters = hole_len; | ||
655 | } | ||
656 | } else { | ||
657 | ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec, | ||
658 | p_cluster, num_clusters); | ||
659 | flags = rec.e_flags; | ||
472 | 660 | ||
473 | ocfs2_extent_map_insert_rec(inode, rec); | 661 | ocfs2_extent_map_insert_rec(inode, &rec); |
474 | } | 662 | } |
475 | 663 | ||
476 | if (extent_flags) | 664 | if (extent_flags) |
@@ -478,7 +666,6 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | |||
478 | 666 | ||
479 | out: | 667 | out: |
480 | brelse(di_bh); | 668 | brelse(di_bh); |
481 | brelse(eb_bh); | ||
482 | return ret; | 669 | return ret; |
483 | } | 670 | } |
484 | 671 | ||
@@ -521,3 +708,114 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, | |||
521 | out: | 708 | out: |
522 | return ret; | 709 | return ret; |
523 | } | 710 | } |
711 | |||
712 | static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, | ||
713 | struct fiemap_extent_info *fieinfo, | ||
714 | u64 map_start) | ||
715 | { | ||
716 | int ret; | ||
717 | unsigned int id_count; | ||
718 | struct ocfs2_dinode *di; | ||
719 | u64 phys; | ||
720 | u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST; | ||
721 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
722 | |||
723 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
724 | id_count = le16_to_cpu(di->id2.i_data.id_count); | ||
725 | |||
726 | if (map_start < id_count) { | ||
727 | phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; | ||
728 | phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data); | ||
729 | |||
730 | ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, | ||
731 | flags); | ||
732 | if (ret < 0) | ||
733 | return ret; | ||
734 | } | ||
735 | |||
736 | return 0; | ||
737 | } | ||
738 | |||
739 | #define OCFS2_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) | ||
740 | |||
741 | int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
742 | u64 map_start, u64 map_len) | ||
743 | { | ||
744 | int ret, is_last; | ||
745 | u32 mapping_end, cpos; | ||
746 | unsigned int hole_size; | ||
747 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
748 | u64 len_bytes, phys_bytes, virt_bytes; | ||
749 | struct buffer_head *di_bh = NULL; | ||
750 | struct ocfs2_extent_rec rec; | ||
751 | |||
752 | ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS); | ||
753 | if (ret) | ||
754 | return ret; | ||
755 | |||
756 | ret = ocfs2_inode_lock(inode, &di_bh, 0); | ||
757 | if (ret) { | ||
758 | mlog_errno(ret); | ||
759 | goto out; | ||
760 | } | ||
761 | |||
762 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
763 | |||
764 | /* | ||
765 | * Handle inline-data separately. | ||
766 | */ | ||
767 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
768 | ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); | ||
769 | goto out_unlock; | ||
770 | } | ||
771 | |||
772 | cpos = map_start >> osb->s_clustersize_bits; | ||
773 | mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, | ||
774 | map_start + map_len); | ||
775 | mapping_end -= cpos; | ||
776 | is_last = 0; | ||
777 | while (cpos < mapping_end && !is_last) { | ||
778 | u32 fe_flags; | ||
779 | |||
780 | ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, | ||
781 | &hole_size, &rec, &is_last); | ||
782 | if (ret) { | ||
783 | mlog_errno(ret); | ||
784 | goto out; | ||
785 | } | ||
786 | |||
787 | if (rec.e_blkno == 0ULL) { | ||
788 | cpos += hole_size; | ||
789 | continue; | ||
790 | } | ||
791 | |||
792 | fe_flags = 0; | ||
793 | if (rec.e_flags & OCFS2_EXT_UNWRITTEN) | ||
794 | fe_flags |= FIEMAP_EXTENT_UNWRITTEN; | ||
795 | if (is_last) | ||
796 | fe_flags |= FIEMAP_EXTENT_LAST; | ||
797 | len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; | ||
798 | phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits; | ||
799 | virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits; | ||
800 | |||
801 | ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes, | ||
802 | len_bytes, fe_flags); | ||
803 | if (ret) | ||
804 | break; | ||
805 | |||
806 | cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters); | ||
807 | } | ||
808 | |||
809 | if (ret > 0) | ||
810 | ret = 0; | ||
811 | |||
812 | out_unlock: | ||
813 | brelse(di_bh); | ||
814 | |||
815 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
816 | |||
817 | ocfs2_inode_unlock(inode, 0); | ||
818 | out: | ||
819 | |||
820 | return ret; | ||
821 | } | ||
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h index de91e3e41a22..1c4aa8b06f34 100644 --- a/fs/ocfs2/extent_map.h +++ b/fs/ocfs2/extent_map.h | |||
@@ -50,4 +50,11 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster, | |||
50 | int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, | 50 | int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, |
51 | u64 *ret_count, unsigned int *extent_flags); | 51 | u64 *ret_count, unsigned int *extent_flags); |
52 | 52 | ||
53 | int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
54 | u64 map_start, u64 map_len); | ||
55 | |||
56 | int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, | ||
57 | u32 *p_cluster, u32 *num_clusters, | ||
58 | struct ocfs2_extent_list *el); | ||
59 | |||
53 | #endif /* _EXTENT_MAP_H */ | 60 | #endif /* _EXTENT_MAP_H */ |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ec2ed15c3daa..8d3225a78073 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -55,6 +55,7 @@ | |||
55 | #include "mmap.h" | 55 | #include "mmap.h" |
56 | #include "suballoc.h" | 56 | #include "suballoc.h" |
57 | #include "super.h" | 57 | #include "super.h" |
58 | #include "xattr.h" | ||
58 | 59 | ||
59 | #include "buffer_head_io.h" | 60 | #include "buffer_head_io.h" |
60 | 61 | ||
@@ -184,7 +185,7 @@ static int ocfs2_sync_file(struct file *file, | |||
184 | goto bail; | 185 | goto bail; |
185 | 186 | ||
186 | journal = osb->journal->j_journal; | 187 | journal = osb->journal->j_journal; |
187 | err = journal_force_commit(journal); | 188 | err = jbd2_journal_force_commit(journal); |
188 | 189 | ||
189 | bail: | 190 | bail: |
190 | mlog_exit(err); | 191 | mlog_exit(err); |
@@ -488,7 +489,7 @@ bail: | |||
488 | } | 489 | } |
489 | 490 | ||
490 | /* | 491 | /* |
491 | * extend allocation only here. | 492 | * extend file allocation only here. |
492 | * we'll update all the disk stuff, and oip->alloc_size | 493 | * we'll update all the disk stuff, and oip->alloc_size |
493 | * | 494 | * |
494 | * expect stuff to be locked, a transaction started and enough data / | 495 | * expect stuff to be locked, a transaction started and enough data / |
@@ -497,189 +498,25 @@ bail: | |||
497 | * Will return -EAGAIN, and a reason if a restart is needed. | 498 | * Will return -EAGAIN, and a reason if a restart is needed. |
498 | * If passed in, *reason will always be set, even in error. | 499 | * If passed in, *reason will always be set, even in error. |
499 | */ | 500 | */ |
500 | int ocfs2_do_extend_allocation(struct ocfs2_super *osb, | 501 | int ocfs2_add_inode_data(struct ocfs2_super *osb, |
501 | struct inode *inode, | 502 | struct inode *inode, |
502 | u32 *logical_offset, | 503 | u32 *logical_offset, |
503 | u32 clusters_to_add, | 504 | u32 clusters_to_add, |
504 | int mark_unwritten, | 505 | int mark_unwritten, |
505 | struct buffer_head *fe_bh, | 506 | struct buffer_head *fe_bh, |
506 | handle_t *handle, | 507 | handle_t *handle, |
507 | struct ocfs2_alloc_context *data_ac, | 508 | struct ocfs2_alloc_context *data_ac, |
508 | struct ocfs2_alloc_context *meta_ac, | 509 | struct ocfs2_alloc_context *meta_ac, |
509 | enum ocfs2_alloc_restarted *reason_ret) | 510 | enum ocfs2_alloc_restarted *reason_ret) |
510 | { | 511 | { |
511 | int status = 0; | 512 | int ret; |
512 | int free_extents; | 513 | struct ocfs2_extent_tree et; |
513 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; | ||
514 | enum ocfs2_alloc_restarted reason = RESTART_NONE; | ||
515 | u32 bit_off, num_bits; | ||
516 | u64 block; | ||
517 | u8 flags = 0; | ||
518 | |||
519 | BUG_ON(!clusters_to_add); | ||
520 | |||
521 | if (mark_unwritten) | ||
522 | flags = OCFS2_EXT_UNWRITTEN; | ||
523 | |||
524 | free_extents = ocfs2_num_free_extents(osb, inode, fe); | ||
525 | if (free_extents < 0) { | ||
526 | status = free_extents; | ||
527 | mlog_errno(status); | ||
528 | goto leave; | ||
529 | } | ||
530 | |||
531 | /* there are two cases which could cause us to EAGAIN in the | ||
532 | * we-need-more-metadata case: | ||
533 | * 1) we haven't reserved *any* | ||
534 | * 2) we are so fragmented, we've needed to add metadata too | ||
535 | * many times. */ | ||
536 | if (!free_extents && !meta_ac) { | ||
537 | mlog(0, "we haven't reserved any metadata!\n"); | ||
538 | status = -EAGAIN; | ||
539 | reason = RESTART_META; | ||
540 | goto leave; | ||
541 | } else if ((!free_extents) | ||
542 | && (ocfs2_alloc_context_bits_left(meta_ac) | ||
543 | < ocfs2_extend_meta_needed(fe))) { | ||
544 | mlog(0, "filesystem is really fragmented...\n"); | ||
545 | status = -EAGAIN; | ||
546 | reason = RESTART_META; | ||
547 | goto leave; | ||
548 | } | ||
549 | |||
550 | status = __ocfs2_claim_clusters(osb, handle, data_ac, 1, | ||
551 | clusters_to_add, &bit_off, &num_bits); | ||
552 | if (status < 0) { | ||
553 | if (status != -ENOSPC) | ||
554 | mlog_errno(status); | ||
555 | goto leave; | ||
556 | } | ||
557 | |||
558 | BUG_ON(num_bits > clusters_to_add); | ||
559 | |||
560 | /* reserve our write early -- insert_extent may update the inode */ | ||
561 | status = ocfs2_journal_access(handle, inode, fe_bh, | ||
562 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
563 | if (status < 0) { | ||
564 | mlog_errno(status); | ||
565 | goto leave; | ||
566 | } | ||
567 | |||
568 | block = ocfs2_clusters_to_blocks(osb->sb, bit_off); | ||
569 | mlog(0, "Allocating %u clusters at block %u for inode %llu\n", | ||
570 | num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
571 | status = ocfs2_insert_extent(osb, handle, inode, fe_bh, | ||
572 | *logical_offset, block, num_bits, | ||
573 | flags, meta_ac); | ||
574 | if (status < 0) { | ||
575 | mlog_errno(status); | ||
576 | goto leave; | ||
577 | } | ||
578 | |||
579 | status = ocfs2_journal_dirty(handle, fe_bh); | ||
580 | if (status < 0) { | ||
581 | mlog_errno(status); | ||
582 | goto leave; | ||
583 | } | ||
584 | |||
585 | clusters_to_add -= num_bits; | ||
586 | *logical_offset += num_bits; | ||
587 | |||
588 | if (clusters_to_add) { | ||
589 | mlog(0, "need to alloc once more, clusters = %u, wanted = " | ||
590 | "%u\n", fe->i_clusters, clusters_to_add); | ||
591 | status = -EAGAIN; | ||
592 | reason = RESTART_TRANS; | ||
593 | } | ||
594 | |||
595 | leave: | ||
596 | mlog_exit(status); | ||
597 | if (reason_ret) | ||
598 | *reason_ret = reason; | ||
599 | return status; | ||
600 | } | ||
601 | |||
602 | /* | ||
603 | * For a given allocation, determine which allocators will need to be | ||
604 | * accessed, and lock them, reserving the appropriate number of bits. | ||
605 | * | ||
606 | * Sparse file systems call this from ocfs2_write_begin_nolock() | ||
607 | * and ocfs2_allocate_unwritten_extents(). | ||
608 | * | ||
609 | * File systems which don't support holes call this from | ||
610 | * ocfs2_extend_allocation(). | ||
611 | */ | ||
612 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | ||
613 | u32 clusters_to_add, u32 extents_to_split, | ||
614 | struct ocfs2_alloc_context **data_ac, | ||
615 | struct ocfs2_alloc_context **meta_ac) | ||
616 | { | ||
617 | int ret = 0, num_free_extents; | ||
618 | unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; | ||
619 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
620 | |||
621 | *meta_ac = NULL; | ||
622 | if (data_ac) | ||
623 | *data_ac = NULL; | ||
624 | |||
625 | BUG_ON(clusters_to_add != 0 && data_ac == NULL); | ||
626 | |||
627 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " | ||
628 | "clusters_to_add = %u, extents_to_split = %u\n", | ||
629 | (unsigned long long)OCFS2_I(inode)->ip_blkno, (long long)i_size_read(inode), | ||
630 | le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split); | ||
631 | |||
632 | num_free_extents = ocfs2_num_free_extents(osb, inode, di); | ||
633 | if (num_free_extents < 0) { | ||
634 | ret = num_free_extents; | ||
635 | mlog_errno(ret); | ||
636 | goto out; | ||
637 | } | ||
638 | |||
639 | /* | ||
640 | * Sparse allocation file systems need to be more conservative | ||
641 | * with reserving room for expansion - the actual allocation | ||
642 | * happens while we've got a journal handle open so re-taking | ||
643 | * a cluster lock (because we ran out of room for another | ||
644 | * extent) will violate ordering rules. | ||
645 | * | ||
646 | * Most of the time we'll only be seeing this 1 cluster at a time | ||
647 | * anyway. | ||
648 | * | ||
649 | * Always lock for any unwritten extents - we might want to | ||
650 | * add blocks during a split. | ||
651 | */ | ||
652 | if (!num_free_extents || | ||
653 | (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) { | ||
654 | ret = ocfs2_reserve_new_metadata(osb, di, meta_ac); | ||
655 | if (ret < 0) { | ||
656 | if (ret != -ENOSPC) | ||
657 | mlog_errno(ret); | ||
658 | goto out; | ||
659 | } | ||
660 | } | ||
661 | |||
662 | if (clusters_to_add == 0) | ||
663 | goto out; | ||
664 | |||
665 | ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); | ||
666 | if (ret < 0) { | ||
667 | if (ret != -ENOSPC) | ||
668 | mlog_errno(ret); | ||
669 | goto out; | ||
670 | } | ||
671 | |||
672 | out: | ||
673 | if (ret) { | ||
674 | if (*meta_ac) { | ||
675 | ocfs2_free_alloc_context(*meta_ac); | ||
676 | *meta_ac = NULL; | ||
677 | } | ||
678 | 514 | ||
679 | /* | 515 | ocfs2_init_dinode_extent_tree(&et, inode, fe_bh); |
680 | * We cannot have an error and a non null *data_ac. | 516 | ret = ocfs2_add_clusters_in_btree(osb, inode, logical_offset, |
681 | */ | 517 | clusters_to_add, mark_unwritten, |
682 | } | 518 | &et, handle, |
519 | data_ac, meta_ac, reason_ret); | ||
683 | 520 | ||
684 | return ret; | 521 | return ret; |
685 | } | 522 | } |
@@ -698,6 +535,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, | |||
698 | struct ocfs2_alloc_context *meta_ac = NULL; | 535 | struct ocfs2_alloc_context *meta_ac = NULL; |
699 | enum ocfs2_alloc_restarted why; | 536 | enum ocfs2_alloc_restarted why; |
700 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 537 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
538 | struct ocfs2_extent_tree et; | ||
701 | 539 | ||
702 | mlog_entry("(clusters_to_add = %u)\n", clusters_to_add); | 540 | mlog_entry("(clusters_to_add = %u)\n", clusters_to_add); |
703 | 541 | ||
@@ -707,8 +545,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, | |||
707 | */ | 545 | */ |
708 | BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); | 546 | BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); |
709 | 547 | ||
710 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, | 548 | status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); |
711 | OCFS2_BH_CACHED, inode); | ||
712 | if (status < 0) { | 549 | if (status < 0) { |
713 | mlog_errno(status); | 550 | mlog_errno(status); |
714 | goto leave; | 551 | goto leave; |
@@ -724,14 +561,21 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, | |||
724 | restart_all: | 561 | restart_all: |
725 | BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); | 562 | BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); |
726 | 563 | ||
727 | status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac, | 564 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " |
728 | &meta_ac); | 565 | "clusters_to_add = %u\n", |
566 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
567 | (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters), | ||
568 | clusters_to_add); | ||
569 | ocfs2_init_dinode_extent_tree(&et, inode, bh); | ||
570 | status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, | ||
571 | &data_ac, &meta_ac); | ||
729 | if (status) { | 572 | if (status) { |
730 | mlog_errno(status); | 573 | mlog_errno(status); |
731 | goto leave; | 574 | goto leave; |
732 | } | 575 | } |
733 | 576 | ||
734 | credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add); | 577 | credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list, |
578 | clusters_to_add); | ||
735 | handle = ocfs2_start_trans(osb, credits); | 579 | handle = ocfs2_start_trans(osb, credits); |
736 | if (IS_ERR(handle)) { | 580 | if (IS_ERR(handle)) { |
737 | status = PTR_ERR(handle); | 581 | status = PTR_ERR(handle); |
@@ -753,16 +597,16 @@ restarted_transaction: | |||
753 | 597 | ||
754 | prev_clusters = OCFS2_I(inode)->ip_clusters; | 598 | prev_clusters = OCFS2_I(inode)->ip_clusters; |
755 | 599 | ||
756 | status = ocfs2_do_extend_allocation(osb, | 600 | status = ocfs2_add_inode_data(osb, |
757 | inode, | 601 | inode, |
758 | &logical_start, | 602 | &logical_start, |
759 | clusters_to_add, | 603 | clusters_to_add, |
760 | mark_unwritten, | 604 | mark_unwritten, |
761 | bh, | 605 | bh, |
762 | handle, | 606 | handle, |
763 | data_ac, | 607 | data_ac, |
764 | meta_ac, | 608 | meta_ac, |
765 | &why); | 609 | &why); |
766 | if ((status < 0) && (status != -EAGAIN)) { | 610 | if ((status < 0) && (status != -EAGAIN)) { |
767 | if (status != -ENOSPC) | 611 | if (status != -ENOSPC) |
768 | mlog_errno(status); | 612 | mlog_errno(status); |
@@ -789,7 +633,7 @@ restarted_transaction: | |||
789 | mlog(0, "restarting transaction.\n"); | 633 | mlog(0, "restarting transaction.\n"); |
790 | /* TODO: This can be more intelligent. */ | 634 | /* TODO: This can be more intelligent. */ |
791 | credits = ocfs2_calc_extend_credits(osb->sb, | 635 | credits = ocfs2_calc_extend_credits(osb->sb, |
792 | fe, | 636 | &fe->id2.i_list, |
793 | clusters_to_add); | 637 | clusters_to_add); |
794 | status = ocfs2_extend_trans(handle, credits); | 638 | status = ocfs2_extend_trans(handle, credits); |
795 | if (status < 0) { | 639 | if (status < 0) { |
@@ -826,10 +670,8 @@ leave: | |||
826 | restart_func = 0; | 670 | restart_func = 0; |
827 | goto restart_all; | 671 | goto restart_all; |
828 | } | 672 | } |
829 | if (bh) { | 673 | brelse(bh); |
830 | brelse(bh); | 674 | bh = NULL; |
831 | bh = NULL; | ||
832 | } | ||
833 | 675 | ||
834 | mlog_exit(status); | 676 | mlog_exit(status); |
835 | return status; | 677 | return status; |
@@ -1096,9 +938,15 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1096 | goto bail_unlock; | 938 | goto bail_unlock; |
1097 | } | 939 | } |
1098 | 940 | ||
1099 | if (i_size_read(inode) > attr->ia_size) | 941 | if (i_size_read(inode) > attr->ia_size) { |
942 | if (ocfs2_should_order_data(inode)) { | ||
943 | status = ocfs2_begin_ordered_truncate(inode, | ||
944 | attr->ia_size); | ||
945 | if (status) | ||
946 | goto bail_unlock; | ||
947 | } | ||
1100 | status = ocfs2_truncate_file(inode, bh, attr->ia_size); | 948 | status = ocfs2_truncate_file(inode, bh, attr->ia_size); |
1101 | else | 949 | } else |
1102 | status = ocfs2_extend_file(inode, bh, attr->ia_size); | 950 | status = ocfs2_extend_file(inode, bh, attr->ia_size); |
1103 | if (status < 0) { | 951 | if (status < 0) { |
1104 | if (status != -ENOSPC) | 952 | if (status != -ENOSPC) |
@@ -1140,8 +988,7 @@ bail_unlock_rw: | |||
1140 | if (size_change) | 988 | if (size_change) |
1141 | ocfs2_rw_unlock(inode, 1); | 989 | ocfs2_rw_unlock(inode, 1); |
1142 | bail: | 990 | bail: |
1143 | if (bh) | 991 | brelse(bh); |
1144 | brelse(bh); | ||
1145 | 992 | ||
1146 | mlog_exit(status); | 993 | mlog_exit(status); |
1147 | return status; | 994 | return status; |
@@ -1284,8 +1131,7 @@ static int ocfs2_write_remove_suid(struct inode *inode) | |||
1284 | struct buffer_head *bh = NULL; | 1131 | struct buffer_head *bh = NULL; |
1285 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1132 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1286 | 1133 | ||
1287 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 1134 | ret = ocfs2_read_block(inode, oi->ip_blkno, &bh); |
1288 | oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode); | ||
1289 | if (ret < 0) { | 1135 | if (ret < 0) { |
1290 | mlog_errno(ret); | 1136 | mlog_errno(ret); |
1291 | goto out; | 1137 | goto out; |
@@ -1311,9 +1157,8 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode, | |||
1311 | struct buffer_head *di_bh = NULL; | 1157 | struct buffer_head *di_bh = NULL; |
1312 | 1158 | ||
1313 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 1159 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
1314 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 1160 | ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, |
1315 | OCFS2_I(inode)->ip_blkno, &di_bh, | 1161 | &di_bh); |
1316 | OCFS2_BH_CACHED, inode); | ||
1317 | if (ret) { | 1162 | if (ret) { |
1318 | mlog_errno(ret); | 1163 | mlog_errno(ret); |
1319 | goto out; | 1164 | goto out; |
@@ -1394,8 +1239,11 @@ static int __ocfs2_remove_inode_range(struct inode *inode, | |||
1394 | handle_t *handle; | 1239 | handle_t *handle; |
1395 | struct ocfs2_alloc_context *meta_ac = NULL; | 1240 | struct ocfs2_alloc_context *meta_ac = NULL; |
1396 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 1241 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; |
1242 | struct ocfs2_extent_tree et; | ||
1397 | 1243 | ||
1398 | ret = ocfs2_lock_allocators(inode, di, 0, 1, NULL, &meta_ac); | 1244 | ocfs2_init_dinode_extent_tree(&et, inode, di_bh); |
1245 | |||
1246 | ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); | ||
1399 | if (ret) { | 1247 | if (ret) { |
1400 | mlog_errno(ret); | 1248 | mlog_errno(ret); |
1401 | return ret; | 1249 | return ret; |
@@ -1425,7 +1273,7 @@ static int __ocfs2_remove_inode_range(struct inode *inode, | |||
1425 | goto out; | 1273 | goto out; |
1426 | } | 1274 | } |
1427 | 1275 | ||
1428 | ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac, | 1276 | ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac, |
1429 | dealloc); | 1277 | dealloc); |
1430 | if (ret) { | 1278 | if (ret) { |
1431 | mlog_errno(ret); | 1279 | mlog_errno(ret); |
@@ -2040,7 +1888,7 @@ out_dio: | |||
2040 | */ | 1888 | */ |
2041 | if (old_size != i_size_read(inode) || | 1889 | if (old_size != i_size_read(inode) || |
2042 | old_clusters != OCFS2_I(inode)->ip_clusters) { | 1890 | old_clusters != OCFS2_I(inode)->ip_clusters) { |
2043 | ret = journal_force_commit(osb->journal->j_journal); | 1891 | ret = jbd2_journal_force_commit(osb->journal->j_journal); |
2044 | if (ret < 0) | 1892 | if (ret < 0) |
2045 | written = ret; | 1893 | written = ret; |
2046 | } | 1894 | } |
@@ -2227,7 +2075,12 @@ const struct inode_operations ocfs2_file_iops = { | |||
2227 | .setattr = ocfs2_setattr, | 2075 | .setattr = ocfs2_setattr, |
2228 | .getattr = ocfs2_getattr, | 2076 | .getattr = ocfs2_getattr, |
2229 | .permission = ocfs2_permission, | 2077 | .permission = ocfs2_permission, |
2078 | .setxattr = generic_setxattr, | ||
2079 | .getxattr = generic_getxattr, | ||
2080 | .listxattr = ocfs2_listxattr, | ||
2081 | .removexattr = generic_removexattr, | ||
2230 | .fallocate = ocfs2_fallocate, | 2082 | .fallocate = ocfs2_fallocate, |
2083 | .fiemap = ocfs2_fiemap, | ||
2231 | }; | 2084 | }; |
2232 | 2085 | ||
2233 | const struct inode_operations ocfs2_special_file_iops = { | 2086 | const struct inode_operations ocfs2_special_file_iops = { |
@@ -2236,6 +2089,10 @@ const struct inode_operations ocfs2_special_file_iops = { | |||
2236 | .permission = ocfs2_permission, | 2089 | .permission = ocfs2_permission, |
2237 | }; | 2090 | }; |
2238 | 2091 | ||
2092 | /* | ||
2093 | * Other than ->lock, keep ocfs2_fops and ocfs2_dops in sync with | ||
2094 | * ocfs2_fops_no_plocks and ocfs2_dops_no_plocks! | ||
2095 | */ | ||
2239 | const struct file_operations ocfs2_fops = { | 2096 | const struct file_operations ocfs2_fops = { |
2240 | .llseek = generic_file_llseek, | 2097 | .llseek = generic_file_llseek, |
2241 | .read = do_sync_read, | 2098 | .read = do_sync_read, |
@@ -2250,6 +2107,7 @@ const struct file_operations ocfs2_fops = { | |||
2250 | #ifdef CONFIG_COMPAT | 2107 | #ifdef CONFIG_COMPAT |
2251 | .compat_ioctl = ocfs2_compat_ioctl, | 2108 | .compat_ioctl = ocfs2_compat_ioctl, |
2252 | #endif | 2109 | #endif |
2110 | .lock = ocfs2_lock, | ||
2253 | .flock = ocfs2_flock, | 2111 | .flock = ocfs2_flock, |
2254 | .splice_read = ocfs2_file_splice_read, | 2112 | .splice_read = ocfs2_file_splice_read, |
2255 | .splice_write = ocfs2_file_splice_write, | 2113 | .splice_write = ocfs2_file_splice_write, |
@@ -2266,5 +2124,51 @@ const struct file_operations ocfs2_dops = { | |||
2266 | #ifdef CONFIG_COMPAT | 2124 | #ifdef CONFIG_COMPAT |
2267 | .compat_ioctl = ocfs2_compat_ioctl, | 2125 | .compat_ioctl = ocfs2_compat_ioctl, |
2268 | #endif | 2126 | #endif |
2127 | .lock = ocfs2_lock, | ||
2128 | .flock = ocfs2_flock, | ||
2129 | }; | ||
2130 | |||
2131 | /* | ||
2132 | * POSIX-lockless variants of our file_operations. | ||
2133 | * | ||
2134 | * These will be used if the underlying cluster stack does not support | ||
2135 | * posix file locking, if the user passes the "localflocks" mount | ||
2136 | * option, or if we have a local-only fs. | ||
2137 | * | ||
2138 | * ocfs2_flock is in here because all stacks handle UNIX file locks, | ||
2139 | * so we still want it in the case of no stack support for | ||
2140 | * plocks. Internally, it will do the right thing when asked to ignore | ||
2141 | * the cluster. | ||
2142 | */ | ||
2143 | const struct file_operations ocfs2_fops_no_plocks = { | ||
2144 | .llseek = generic_file_llseek, | ||
2145 | .read = do_sync_read, | ||
2146 | .write = do_sync_write, | ||
2147 | .mmap = ocfs2_mmap, | ||
2148 | .fsync = ocfs2_sync_file, | ||
2149 | .release = ocfs2_file_release, | ||
2150 | .open = ocfs2_file_open, | ||
2151 | .aio_read = ocfs2_file_aio_read, | ||
2152 | .aio_write = ocfs2_file_aio_write, | ||
2153 | .unlocked_ioctl = ocfs2_ioctl, | ||
2154 | #ifdef CONFIG_COMPAT | ||
2155 | .compat_ioctl = ocfs2_compat_ioctl, | ||
2156 | #endif | ||
2157 | .flock = ocfs2_flock, | ||
2158 | .splice_read = ocfs2_file_splice_read, | ||
2159 | .splice_write = ocfs2_file_splice_write, | ||
2160 | }; | ||
2161 | |||
2162 | const struct file_operations ocfs2_dops_no_plocks = { | ||
2163 | .llseek = generic_file_llseek, | ||
2164 | .read = generic_read_dir, | ||
2165 | .readdir = ocfs2_readdir, | ||
2166 | .fsync = ocfs2_sync_file, | ||
2167 | .release = ocfs2_dir_release, | ||
2168 | .open = ocfs2_dir_open, | ||
2169 | .unlocked_ioctl = ocfs2_ioctl, | ||
2170 | #ifdef CONFIG_COMPAT | ||
2171 | .compat_ioctl = ocfs2_compat_ioctl, | ||
2172 | #endif | ||
2269 | .flock = ocfs2_flock, | 2173 | .flock = ocfs2_flock, |
2270 | }; | 2174 | }; |
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 1e27b4d017ea..e92382cbca5f 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h | |||
@@ -28,9 +28,12 @@ | |||
28 | 28 | ||
29 | extern const struct file_operations ocfs2_fops; | 29 | extern const struct file_operations ocfs2_fops; |
30 | extern const struct file_operations ocfs2_dops; | 30 | extern const struct file_operations ocfs2_dops; |
31 | extern const struct file_operations ocfs2_fops_no_plocks; | ||
32 | extern const struct file_operations ocfs2_dops_no_plocks; | ||
31 | extern const struct inode_operations ocfs2_file_iops; | 33 | extern const struct inode_operations ocfs2_file_iops; |
32 | extern const struct inode_operations ocfs2_special_file_iops; | 34 | extern const struct inode_operations ocfs2_special_file_iops; |
33 | struct ocfs2_alloc_context; | 35 | struct ocfs2_alloc_context; |
36 | enum ocfs2_alloc_restarted; | ||
34 | 37 | ||
35 | struct ocfs2_file_private { | 38 | struct ocfs2_file_private { |
36 | struct file *fp_file; | 39 | struct file *fp_file; |
@@ -38,27 +41,18 @@ struct ocfs2_file_private { | |||
38 | struct ocfs2_lock_res fp_flock; | 41 | struct ocfs2_lock_res fp_flock; |
39 | }; | 42 | }; |
40 | 43 | ||
41 | enum ocfs2_alloc_restarted { | 44 | int ocfs2_add_inode_data(struct ocfs2_super *osb, |
42 | RESTART_NONE = 0, | 45 | struct inode *inode, |
43 | RESTART_TRANS, | 46 | u32 *logical_offset, |
44 | RESTART_META | 47 | u32 clusters_to_add, |
45 | }; | 48 | int mark_unwritten, |
46 | int ocfs2_do_extend_allocation(struct ocfs2_super *osb, | 49 | struct buffer_head *fe_bh, |
47 | struct inode *inode, | 50 | handle_t *handle, |
48 | u32 *logical_offset, | 51 | struct ocfs2_alloc_context *data_ac, |
49 | u32 clusters_to_add, | 52 | struct ocfs2_alloc_context *meta_ac, |
50 | int mark_unwritten, | 53 | enum ocfs2_alloc_restarted *reason_ret); |
51 | struct buffer_head *fe_bh, | ||
52 | handle_t *handle, | ||
53 | struct ocfs2_alloc_context *data_ac, | ||
54 | struct ocfs2_alloc_context *meta_ac, | ||
55 | enum ocfs2_alloc_restarted *reason_ret); | ||
56 | int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, | 54 | int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, |
57 | u64 zero_to); | 55 | u64 zero_to); |
58 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | ||
59 | u32 clusters_to_add, u32 extents_to_split, | ||
60 | struct ocfs2_alloc_context **data_ac, | ||
61 | struct ocfs2_alloc_context **meta_ac); | ||
62 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); | 56 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); |
63 | int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, | 57 | int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, |
64 | struct kstat *stat); | 58 | struct kstat *stat); |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 7e9e4c79aec7..4903688f72a9 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include "symlink.h" | 49 | #include "symlink.h" |
50 | #include "sysfile.h" | 50 | #include "sysfile.h" |
51 | #include "uptodate.h" | 51 | #include "uptodate.h" |
52 | #include "xattr.h" | ||
52 | 53 | ||
53 | #include "buffer_head_io.h" | 54 | #include "buffer_head_io.h" |
54 | 55 | ||
@@ -219,6 +220,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
219 | struct super_block *sb; | 220 | struct super_block *sb; |
220 | struct ocfs2_super *osb; | 221 | struct ocfs2_super *osb; |
221 | int status = -EINVAL; | 222 | int status = -EINVAL; |
223 | int use_plocks = 1; | ||
222 | 224 | ||
223 | mlog_entry("(0x%p, size:%llu)\n", inode, | 225 | mlog_entry("(0x%p, size:%llu)\n", inode, |
224 | (unsigned long long)le64_to_cpu(fe->i_size)); | 226 | (unsigned long long)le64_to_cpu(fe->i_size)); |
@@ -226,6 +228,10 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
226 | sb = inode->i_sb; | 228 | sb = inode->i_sb; |
227 | osb = OCFS2_SB(sb); | 229 | osb = OCFS2_SB(sb); |
228 | 230 | ||
231 | if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) || | ||
232 | ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks()) | ||
233 | use_plocks = 0; | ||
234 | |||
229 | /* this means that read_inode cannot create a superblock inode | 235 | /* this means that read_inode cannot create a superblock inode |
230 | * today. change if needed. */ | 236 | * today. change if needed. */ |
231 | if (!OCFS2_IS_VALID_DINODE(fe) || | 237 | if (!OCFS2_IS_VALID_DINODE(fe) || |
@@ -295,13 +301,19 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
295 | 301 | ||
296 | switch (inode->i_mode & S_IFMT) { | 302 | switch (inode->i_mode & S_IFMT) { |
297 | case S_IFREG: | 303 | case S_IFREG: |
298 | inode->i_fop = &ocfs2_fops; | 304 | if (use_plocks) |
305 | inode->i_fop = &ocfs2_fops; | ||
306 | else | ||
307 | inode->i_fop = &ocfs2_fops_no_plocks; | ||
299 | inode->i_op = &ocfs2_file_iops; | 308 | inode->i_op = &ocfs2_file_iops; |
300 | i_size_write(inode, le64_to_cpu(fe->i_size)); | 309 | i_size_write(inode, le64_to_cpu(fe->i_size)); |
301 | break; | 310 | break; |
302 | case S_IFDIR: | 311 | case S_IFDIR: |
303 | inode->i_op = &ocfs2_dir_iops; | 312 | inode->i_op = &ocfs2_dir_iops; |
304 | inode->i_fop = &ocfs2_dops; | 313 | if (use_plocks) |
314 | inode->i_fop = &ocfs2_dops; | ||
315 | else | ||
316 | inode->i_fop = &ocfs2_dops_no_plocks; | ||
305 | i_size_write(inode, le64_to_cpu(fe->i_size)); | 317 | i_size_write(inode, le64_to_cpu(fe->i_size)); |
306 | break; | 318 | break; |
307 | case S_IFLNK: | 319 | case S_IFLNK: |
@@ -448,8 +460,11 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
448 | } | 460 | } |
449 | } | 461 | } |
450 | 462 | ||
451 | status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, | 463 | if (can_lock) |
452 | can_lock ? inode : NULL); | 464 | status = ocfs2_read_blocks(inode, args->fi_blkno, 1, &bh, |
465 | OCFS2_BH_IGNORE_CACHE); | ||
466 | else | ||
467 | status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); | ||
453 | if (status < 0) { | 468 | if (status < 0) { |
454 | mlog_errno(status); | 469 | mlog_errno(status); |
455 | goto bail; | 470 | goto bail; |
@@ -522,6 +537,9 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, | |||
522 | * data and fast symlinks. | 537 | * data and fast symlinks. |
523 | */ | 538 | */ |
524 | if (fe->i_clusters) { | 539 | if (fe->i_clusters) { |
540 | if (ocfs2_should_order_data(inode)) | ||
541 | ocfs2_begin_ordered_truncate(inode, 0); | ||
542 | |||
525 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 543 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
526 | if (IS_ERR(handle)) { | 544 | if (IS_ERR(handle)) { |
527 | status = PTR_ERR(handle); | 545 | status = PTR_ERR(handle); |
@@ -730,6 +748,13 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
730 | goto bail_unlock_dir; | 748 | goto bail_unlock_dir; |
731 | } | 749 | } |
732 | 750 | ||
751 | /*Free extended attribute resources associated with this inode.*/ | ||
752 | status = ocfs2_xattr_remove(inode, di_bh); | ||
753 | if (status < 0) { | ||
754 | mlog_errno(status); | ||
755 | goto bail_unlock_dir; | ||
756 | } | ||
757 | |||
733 | status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode, | 758 | status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode, |
734 | orphan_dir_bh); | 759 | orphan_dir_bh); |
735 | if (status < 0) | 760 | if (status < 0) |
@@ -1081,6 +1106,8 @@ void ocfs2_clear_inode(struct inode *inode) | |||
1081 | oi->ip_last_trans = 0; | 1106 | oi->ip_last_trans = 0; |
1082 | oi->ip_dir_start_lookup = 0; | 1107 | oi->ip_dir_start_lookup = 0; |
1083 | oi->ip_blkno = 0ULL; | 1108 | oi->ip_blkno = 0ULL; |
1109 | jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal, | ||
1110 | &oi->ip_jinode); | ||
1084 | 1111 | ||
1085 | bail: | 1112 | bail: |
1086 | mlog_exit_void(); | 1113 | mlog_exit_void(); |
@@ -1107,58 +1134,6 @@ void ocfs2_drop_inode(struct inode *inode) | |||
1107 | } | 1134 | } |
1108 | 1135 | ||
1109 | /* | 1136 | /* |
1110 | * TODO: this should probably be merged into ocfs2_get_block | ||
1111 | * | ||
1112 | * However, you now need to pay attention to the cont_prepare_write() | ||
1113 | * stuff in ocfs2_get_block (that is, ocfs2_get_block pretty much | ||
1114 | * expects never to extend). | ||
1115 | */ | ||
1116 | struct buffer_head *ocfs2_bread(struct inode *inode, | ||
1117 | int block, int *err, int reada) | ||
1118 | { | ||
1119 | struct buffer_head *bh = NULL; | ||
1120 | int tmperr; | ||
1121 | u64 p_blkno; | ||
1122 | int readflags = OCFS2_BH_CACHED; | ||
1123 | |||
1124 | if (reada) | ||
1125 | readflags |= OCFS2_BH_READAHEAD; | ||
1126 | |||
1127 | if (((u64)block << inode->i_sb->s_blocksize_bits) >= | ||
1128 | i_size_read(inode)) { | ||
1129 | BUG_ON(!reada); | ||
1130 | return NULL; | ||
1131 | } | ||
1132 | |||
1133 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
1134 | tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, | ||
1135 | NULL); | ||
1136 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
1137 | if (tmperr < 0) { | ||
1138 | mlog_errno(tmperr); | ||
1139 | goto fail; | ||
1140 | } | ||
1141 | |||
1142 | tmperr = ocfs2_read_block(OCFS2_SB(inode->i_sb), p_blkno, &bh, | ||
1143 | readflags, inode); | ||
1144 | if (tmperr < 0) | ||
1145 | goto fail; | ||
1146 | |||
1147 | tmperr = 0; | ||
1148 | |||
1149 | *err = 0; | ||
1150 | return bh; | ||
1151 | |||
1152 | fail: | ||
1153 | if (bh) { | ||
1154 | brelse(bh); | ||
1155 | bh = NULL; | ||
1156 | } | ||
1157 | *err = -EIO; | ||
1158 | return NULL; | ||
1159 | } | ||
1160 | |||
1161 | /* | ||
1162 | * This is called from our getattr. | 1137 | * This is called from our getattr. |
1163 | */ | 1138 | */ |
1164 | int ocfs2_inode_revalidate(struct dentry *dentry) | 1139 | int ocfs2_inode_revalidate(struct dentry *dentry) |
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 390a85596aa0..2f37af9bcc4a 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
@@ -40,6 +40,9 @@ struct ocfs2_inode_info | |||
40 | /* protects allocation changes on this inode. */ | 40 | /* protects allocation changes on this inode. */ |
41 | struct rw_semaphore ip_alloc_sem; | 41 | struct rw_semaphore ip_alloc_sem; |
42 | 42 | ||
43 | /* protects extended attribute changes on this inode */ | ||
44 | struct rw_semaphore ip_xattr_sem; | ||
45 | |||
43 | /* These fields are protected by ip_lock */ | 46 | /* These fields are protected by ip_lock */ |
44 | spinlock_t ip_lock; | 47 | spinlock_t ip_lock; |
45 | u32 ip_open_count; | 48 | u32 ip_open_count; |
@@ -68,6 +71,7 @@ struct ocfs2_inode_info | |||
68 | struct ocfs2_extent_map ip_extent_map; | 71 | struct ocfs2_extent_map ip_extent_map; |
69 | 72 | ||
70 | struct inode vfs_inode; | 73 | struct inode vfs_inode; |
74 | struct jbd2_inode ip_jinode; | ||
71 | }; | 75 | }; |
72 | 76 | ||
73 | /* | 77 | /* |
@@ -113,8 +117,6 @@ extern struct kmem_cache *ocfs2_inode_cache; | |||
113 | 117 | ||
114 | extern const struct address_space_operations ocfs2_aops; | 118 | extern const struct address_space_operations ocfs2_aops; |
115 | 119 | ||
116 | struct buffer_head *ocfs2_bread(struct inode *inode, int block, | ||
117 | int *err, int reada); | ||
118 | void ocfs2_clear_inode(struct inode *inode); | 120 | void ocfs2_clear_inode(struct inode *inode); |
119 | void ocfs2_delete_inode(struct inode *inode); | 121 | void ocfs2_delete_inode(struct inode *inode); |
120 | void ocfs2_drop_inode(struct inode *inode); | 122 | void ocfs2_drop_inode(struct inode *inode); |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 7b142f0ce995..9fcd36dcc9a0 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -102,8 +102,7 @@ bail_unlock: | |||
102 | bail: | 102 | bail: |
103 | mutex_unlock(&inode->i_mutex); | 103 | mutex_unlock(&inode->i_mutex); |
104 | 104 | ||
105 | if (bh) | 105 | brelse(bh); |
106 | brelse(bh); | ||
107 | 106 | ||
108 | mlog_exit(status); | 107 | mlog_exit(status); |
109 | return status; | 108 | return status; |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 7a37240f7a31..81e40677eecb 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -215,9 +215,9 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) | |||
215 | goto finally; | 215 | goto finally; |
216 | } | 216 | } |
217 | 217 | ||
218 | journal_lock_updates(journal->j_journal); | 218 | jbd2_journal_lock_updates(journal->j_journal); |
219 | status = journal_flush(journal->j_journal); | 219 | status = jbd2_journal_flush(journal->j_journal); |
220 | journal_unlock_updates(journal->j_journal); | 220 | jbd2_journal_unlock_updates(journal->j_journal); |
221 | if (status < 0) { | 221 | if (status < 0) { |
222 | up_write(&journal->j_trans_barrier); | 222 | up_write(&journal->j_trans_barrier); |
223 | mlog_errno(status); | 223 | mlog_errno(status); |
@@ -264,7 +264,7 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) | |||
264 | 264 | ||
265 | down_read(&osb->journal->j_trans_barrier); | 265 | down_read(&osb->journal->j_trans_barrier); |
266 | 266 | ||
267 | handle = journal_start(journal, max_buffs); | 267 | handle = jbd2_journal_start(journal, max_buffs); |
268 | if (IS_ERR(handle)) { | 268 | if (IS_ERR(handle)) { |
269 | up_read(&osb->journal->j_trans_barrier); | 269 | up_read(&osb->journal->j_trans_barrier); |
270 | 270 | ||
@@ -290,7 +290,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb, | |||
290 | 290 | ||
291 | BUG_ON(!handle); | 291 | BUG_ON(!handle); |
292 | 292 | ||
293 | ret = journal_stop(handle); | 293 | ret = jbd2_journal_stop(handle); |
294 | if (ret < 0) | 294 | if (ret < 0) |
295 | mlog_errno(ret); | 295 | mlog_errno(ret); |
296 | 296 | ||
@@ -304,7 +304,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb, | |||
304 | * transaction. extend_trans will either extend the current handle by | 304 | * transaction. extend_trans will either extend the current handle by |
305 | * nblocks, or commit it and start a new one with nblocks credits. | 305 | * nblocks, or commit it and start a new one with nblocks credits. |
306 | * | 306 | * |
307 | * This might call journal_restart() which will commit dirty buffers | 307 | * This might call jbd2_journal_restart() which will commit dirty buffers |
308 | * and then restart the transaction. Before calling | 308 | * and then restart the transaction. Before calling |
309 | * ocfs2_extend_trans(), any changed blocks should have been | 309 | * ocfs2_extend_trans(), any changed blocks should have been |
310 | * dirtied. After calling it, all blocks which need to be changed must | 310 | * dirtied. After calling it, all blocks which need to be changed must |
@@ -332,7 +332,7 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks) | |||
332 | #ifdef CONFIG_OCFS2_DEBUG_FS | 332 | #ifdef CONFIG_OCFS2_DEBUG_FS |
333 | status = 1; | 333 | status = 1; |
334 | #else | 334 | #else |
335 | status = journal_extend(handle, nblocks); | 335 | status = jbd2_journal_extend(handle, nblocks); |
336 | if (status < 0) { | 336 | if (status < 0) { |
337 | mlog_errno(status); | 337 | mlog_errno(status); |
338 | goto bail; | 338 | goto bail; |
@@ -340,8 +340,10 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks) | |||
340 | #endif | 340 | #endif |
341 | 341 | ||
342 | if (status > 0) { | 342 | if (status > 0) { |
343 | mlog(0, "journal_extend failed, trying journal_restart\n"); | 343 | mlog(0, |
344 | status = journal_restart(handle, nblocks); | 344 | "jbd2_journal_extend failed, trying " |
345 | "jbd2_journal_restart\n"); | ||
346 | status = jbd2_journal_restart(handle, nblocks); | ||
345 | if (status < 0) { | 347 | if (status < 0) { |
346 | mlog_errno(status); | 348 | mlog_errno(status); |
347 | goto bail; | 349 | goto bail; |
@@ -393,11 +395,11 @@ int ocfs2_journal_access(handle_t *handle, | |||
393 | switch (type) { | 395 | switch (type) { |
394 | case OCFS2_JOURNAL_ACCESS_CREATE: | 396 | case OCFS2_JOURNAL_ACCESS_CREATE: |
395 | case OCFS2_JOURNAL_ACCESS_WRITE: | 397 | case OCFS2_JOURNAL_ACCESS_WRITE: |
396 | status = journal_get_write_access(handle, bh); | 398 | status = jbd2_journal_get_write_access(handle, bh); |
397 | break; | 399 | break; |
398 | 400 | ||
399 | case OCFS2_JOURNAL_ACCESS_UNDO: | 401 | case OCFS2_JOURNAL_ACCESS_UNDO: |
400 | status = journal_get_undo_access(handle, bh); | 402 | status = jbd2_journal_get_undo_access(handle, bh); |
401 | break; | 403 | break; |
402 | 404 | ||
403 | default: | 405 | default: |
@@ -422,7 +424,7 @@ int ocfs2_journal_dirty(handle_t *handle, | |||
422 | mlog_entry("(bh->b_blocknr=%llu)\n", | 424 | mlog_entry("(bh->b_blocknr=%llu)\n", |
423 | (unsigned long long)bh->b_blocknr); | 425 | (unsigned long long)bh->b_blocknr); |
424 | 426 | ||
425 | status = journal_dirty_metadata(handle, bh); | 427 | status = jbd2_journal_dirty_metadata(handle, bh); |
426 | if (status < 0) | 428 | if (status < 0) |
427 | mlog(ML_ERROR, "Could not dirty metadata buffer. " | 429 | mlog(ML_ERROR, "Could not dirty metadata buffer. " |
428 | "(bh->b_blocknr=%llu)\n", | 430 | "(bh->b_blocknr=%llu)\n", |
@@ -432,6 +434,7 @@ int ocfs2_journal_dirty(handle_t *handle, | |||
432 | return status; | 434 | return status; |
433 | } | 435 | } |
434 | 436 | ||
437 | #ifdef CONFIG_OCFS2_COMPAT_JBD | ||
435 | int ocfs2_journal_dirty_data(handle_t *handle, | 438 | int ocfs2_journal_dirty_data(handle_t *handle, |
436 | struct buffer_head *bh) | 439 | struct buffer_head *bh) |
437 | { | 440 | { |
@@ -443,8 +446,9 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
443 | 446 | ||
444 | return err; | 447 | return err; |
445 | } | 448 | } |
449 | #endif | ||
446 | 450 | ||
447 | #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD_DEFAULT_MAX_COMMIT_AGE) | 451 | #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE) |
448 | 452 | ||
449 | void ocfs2_set_journal_params(struct ocfs2_super *osb) | 453 | void ocfs2_set_journal_params(struct ocfs2_super *osb) |
450 | { | 454 | { |
@@ -457,9 +461,9 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb) | |||
457 | spin_lock(&journal->j_state_lock); | 461 | spin_lock(&journal->j_state_lock); |
458 | journal->j_commit_interval = commit_interval; | 462 | journal->j_commit_interval = commit_interval; |
459 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) | 463 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) |
460 | journal->j_flags |= JFS_BARRIER; | 464 | journal->j_flags |= JBD2_BARRIER; |
461 | else | 465 | else |
462 | journal->j_flags &= ~JFS_BARRIER; | 466 | journal->j_flags &= ~JBD2_BARRIER; |
463 | spin_unlock(&journal->j_state_lock); | 467 | spin_unlock(&journal->j_state_lock); |
464 | } | 468 | } |
465 | 469 | ||
@@ -524,14 +528,14 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) | |||
524 | mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters); | 528 | mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters); |
525 | 529 | ||
526 | /* call the kernels journal init function now */ | 530 | /* call the kernels journal init function now */ |
527 | j_journal = journal_init_inode(inode); | 531 | j_journal = jbd2_journal_init_inode(inode); |
528 | if (j_journal == NULL) { | 532 | if (j_journal == NULL) { |
529 | mlog(ML_ERROR, "Linux journal layer error\n"); | 533 | mlog(ML_ERROR, "Linux journal layer error\n"); |
530 | status = -EINVAL; | 534 | status = -EINVAL; |
531 | goto done; | 535 | goto done; |
532 | } | 536 | } |
533 | 537 | ||
534 | mlog(0, "Returned from journal_init_inode\n"); | 538 | mlog(0, "Returned from jbd2_journal_init_inode\n"); |
535 | mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen); | 539 | mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen); |
536 | 540 | ||
537 | *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & | 541 | *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & |
@@ -550,8 +554,7 @@ done: | |||
550 | if (status < 0) { | 554 | if (status < 0) { |
551 | if (inode_lock) | 555 | if (inode_lock) |
552 | ocfs2_inode_unlock(inode, 1); | 556 | ocfs2_inode_unlock(inode, 1); |
553 | if (bh != NULL) | 557 | brelse(bh); |
554 | brelse(bh); | ||
555 | if (inode) { | 558 | if (inode) { |
556 | OCFS2_I(inode)->ip_open_count--; | 559 | OCFS2_I(inode)->ip_open_count--; |
557 | iput(inode); | 560 | iput(inode); |
@@ -639,7 +642,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) | |||
639 | if (journal->j_state != OCFS2_JOURNAL_LOADED) | 642 | if (journal->j_state != OCFS2_JOURNAL_LOADED) |
640 | goto done; | 643 | goto done; |
641 | 644 | ||
642 | /* need to inc inode use count as journal_destroy will iput. */ | 645 | /* need to inc inode use count - jbd2_journal_destroy will iput. */ |
643 | if (!igrab(inode)) | 646 | if (!igrab(inode)) |
644 | BUG(); | 647 | BUG(); |
645 | 648 | ||
@@ -668,9 +671,9 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) | |||
668 | BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0); | 671 | BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0); |
669 | 672 | ||
670 | if (ocfs2_mount_local(osb)) { | 673 | if (ocfs2_mount_local(osb)) { |
671 | journal_lock_updates(journal->j_journal); | 674 | jbd2_journal_lock_updates(journal->j_journal); |
672 | status = journal_flush(journal->j_journal); | 675 | status = jbd2_journal_flush(journal->j_journal); |
673 | journal_unlock_updates(journal->j_journal); | 676 | jbd2_journal_unlock_updates(journal->j_journal); |
674 | if (status < 0) | 677 | if (status < 0) |
675 | mlog_errno(status); | 678 | mlog_errno(status); |
676 | } | 679 | } |
@@ -686,7 +689,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) | |||
686 | } | 689 | } |
687 | 690 | ||
688 | /* Shutdown the kernel journal system */ | 691 | /* Shutdown the kernel journal system */ |
689 | journal_destroy(journal->j_journal); | 692 | jbd2_journal_destroy(journal->j_journal); |
690 | 693 | ||
691 | OCFS2_I(inode)->ip_open_count--; | 694 | OCFS2_I(inode)->ip_open_count--; |
692 | 695 | ||
@@ -711,15 +714,15 @@ static void ocfs2_clear_journal_error(struct super_block *sb, | |||
711 | { | 714 | { |
712 | int olderr; | 715 | int olderr; |
713 | 716 | ||
714 | olderr = journal_errno(journal); | 717 | olderr = jbd2_journal_errno(journal); |
715 | if (olderr) { | 718 | if (olderr) { |
716 | mlog(ML_ERROR, "File system error %d recorded in " | 719 | mlog(ML_ERROR, "File system error %d recorded in " |
717 | "journal %u.\n", olderr, slot); | 720 | "journal %u.\n", olderr, slot); |
718 | mlog(ML_ERROR, "File system on device %s needs checking.\n", | 721 | mlog(ML_ERROR, "File system on device %s needs checking.\n", |
719 | sb->s_id); | 722 | sb->s_id); |
720 | 723 | ||
721 | journal_ack_err(journal); | 724 | jbd2_journal_ack_err(journal); |
722 | journal_clear_err(journal); | 725 | jbd2_journal_clear_err(journal); |
723 | } | 726 | } |
724 | } | 727 | } |
725 | 728 | ||
@@ -734,7 +737,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed) | |||
734 | 737 | ||
735 | osb = journal->j_osb; | 738 | osb = journal->j_osb; |
736 | 739 | ||
737 | status = journal_load(journal->j_journal); | 740 | status = jbd2_journal_load(journal->j_journal); |
738 | if (status < 0) { | 741 | if (status < 0) { |
739 | mlog(ML_ERROR, "Failed to load journal!\n"); | 742 | mlog(ML_ERROR, "Failed to load journal!\n"); |
740 | goto done; | 743 | goto done; |
@@ -778,7 +781,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full) | |||
778 | 781 | ||
779 | BUG_ON(!journal); | 782 | BUG_ON(!journal); |
780 | 783 | ||
781 | status = journal_wipe(journal->j_journal, full); | 784 | status = jbd2_journal_wipe(journal->j_journal, full); |
782 | if (status < 0) { | 785 | if (status < 0) { |
783 | mlog_errno(status); | 786 | mlog_errno(status); |
784 | goto bail; | 787 | goto bail; |
@@ -847,9 +850,8 @@ static int ocfs2_force_read_journal(struct inode *inode) | |||
847 | 850 | ||
848 | /* We are reading journal data which should not | 851 | /* We are reading journal data which should not |
849 | * be put in the uptodate cache */ | 852 | * be put in the uptodate cache */ |
850 | status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb), | 853 | status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb), |
851 | p_blkno, p_blocks, bhs, 0, | 854 | p_blkno, p_blocks, bhs); |
852 | NULL); | ||
853 | if (status < 0) { | 855 | if (status < 0) { |
854 | mlog_errno(status); | 856 | mlog_errno(status); |
855 | goto bail; | 857 | goto bail; |
@@ -865,8 +867,7 @@ static int ocfs2_force_read_journal(struct inode *inode) | |||
865 | 867 | ||
866 | bail: | 868 | bail: |
867 | for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++) | 869 | for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++) |
868 | if (bhs[i]) | 870 | brelse(bhs[i]); |
869 | brelse(bhs[i]); | ||
870 | mlog_exit(status); | 871 | mlog_exit(status); |
871 | return status; | 872 | return status; |
872 | } | 873 | } |
@@ -1133,7 +1134,8 @@ static int ocfs2_read_journal_inode(struct ocfs2_super *osb, | |||
1133 | } | 1134 | } |
1134 | SET_INODE_JOURNAL(inode); | 1135 | SET_INODE_JOURNAL(inode); |
1135 | 1136 | ||
1136 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode); | 1137 | status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, bh, |
1138 | OCFS2_BH_IGNORE_CACHE); | ||
1137 | if (status < 0) { | 1139 | if (status < 0) { |
1138 | mlog_errno(status); | 1140 | mlog_errno(status); |
1139 | goto bail; | 1141 | goto bail; |
@@ -1229,19 +1231,19 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
1229 | } | 1231 | } |
1230 | 1232 | ||
1231 | mlog(0, "calling journal_init_inode\n"); | 1233 | mlog(0, "calling journal_init_inode\n"); |
1232 | journal = journal_init_inode(inode); | 1234 | journal = jbd2_journal_init_inode(inode); |
1233 | if (journal == NULL) { | 1235 | if (journal == NULL) { |
1234 | mlog(ML_ERROR, "Linux journal layer error\n"); | 1236 | mlog(ML_ERROR, "Linux journal layer error\n"); |
1235 | status = -EIO; | 1237 | status = -EIO; |
1236 | goto done; | 1238 | goto done; |
1237 | } | 1239 | } |
1238 | 1240 | ||
1239 | status = journal_load(journal); | 1241 | status = jbd2_journal_load(journal); |
1240 | if (status < 0) { | 1242 | if (status < 0) { |
1241 | mlog_errno(status); | 1243 | mlog_errno(status); |
1242 | if (!igrab(inode)) | 1244 | if (!igrab(inode)) |
1243 | BUG(); | 1245 | BUG(); |
1244 | journal_destroy(journal); | 1246 | jbd2_journal_destroy(journal); |
1245 | goto done; | 1247 | goto done; |
1246 | } | 1248 | } |
1247 | 1249 | ||
@@ -1249,9 +1251,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
1249 | 1251 | ||
1250 | /* wipe the journal */ | 1252 | /* wipe the journal */ |
1251 | mlog(0, "flushing the journal.\n"); | 1253 | mlog(0, "flushing the journal.\n"); |
1252 | journal_lock_updates(journal); | 1254 | jbd2_journal_lock_updates(journal); |
1253 | status = journal_flush(journal); | 1255 | status = jbd2_journal_flush(journal); |
1254 | journal_unlock_updates(journal); | 1256 | jbd2_journal_unlock_updates(journal); |
1255 | if (status < 0) | 1257 | if (status < 0) |
1256 | mlog_errno(status); | 1258 | mlog_errno(status); |
1257 | 1259 | ||
@@ -1272,7 +1274,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
1272 | if (!igrab(inode)) | 1274 | if (!igrab(inode)) |
1273 | BUG(); | 1275 | BUG(); |
1274 | 1276 | ||
1275 | journal_destroy(journal); | 1277 | jbd2_journal_destroy(journal); |
1276 | 1278 | ||
1277 | done: | 1279 | done: |
1278 | /* drop the lock on this nodes journal */ | 1280 | /* drop the lock on this nodes journal */ |
@@ -1282,8 +1284,7 @@ done: | |||
1282 | if (inode) | 1284 | if (inode) |
1283 | iput(inode); | 1285 | iput(inode); |
1284 | 1286 | ||
1285 | if (bh) | 1287 | brelse(bh); |
1286 | brelse(bh); | ||
1287 | 1288 | ||
1288 | mlog_exit(status); | 1289 | mlog_exit(status); |
1289 | return status; | 1290 | return status; |
@@ -1418,13 +1419,13 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | |||
1418 | { | 1419 | { |
1419 | unsigned int node_num; | 1420 | unsigned int node_num; |
1420 | int status, i; | 1421 | int status, i; |
1422 | u32 gen; | ||
1421 | struct buffer_head *bh = NULL; | 1423 | struct buffer_head *bh = NULL; |
1422 | struct ocfs2_dinode *di; | 1424 | struct ocfs2_dinode *di; |
1423 | 1425 | ||
1424 | /* This is called with the super block cluster lock, so we | 1426 | /* This is called with the super block cluster lock, so we |
1425 | * know that the slot map can't change underneath us. */ | 1427 | * know that the slot map can't change underneath us. */ |
1426 | 1428 | ||
1427 | spin_lock(&osb->osb_lock); | ||
1428 | for (i = 0; i < osb->max_slots; i++) { | 1429 | for (i = 0; i < osb->max_slots; i++) { |
1429 | /* Read journal inode to get the recovery generation */ | 1430 | /* Read journal inode to get the recovery generation */ |
1430 | status = ocfs2_read_journal_inode(osb, i, &bh, NULL); | 1431 | status = ocfs2_read_journal_inode(osb, i, &bh, NULL); |
@@ -1433,23 +1434,31 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | |||
1433 | goto bail; | 1434 | goto bail; |
1434 | } | 1435 | } |
1435 | di = (struct ocfs2_dinode *)bh->b_data; | 1436 | di = (struct ocfs2_dinode *)bh->b_data; |
1436 | osb->slot_recovery_generations[i] = | 1437 | gen = ocfs2_get_recovery_generation(di); |
1437 | ocfs2_get_recovery_generation(di); | ||
1438 | brelse(bh); | 1438 | brelse(bh); |
1439 | bh = NULL; | 1439 | bh = NULL; |
1440 | 1440 | ||
1441 | spin_lock(&osb->osb_lock); | ||
1442 | osb->slot_recovery_generations[i] = gen; | ||
1443 | |||
1441 | mlog(0, "Slot %u recovery generation is %u\n", i, | 1444 | mlog(0, "Slot %u recovery generation is %u\n", i, |
1442 | osb->slot_recovery_generations[i]); | 1445 | osb->slot_recovery_generations[i]); |
1443 | 1446 | ||
1444 | if (i == osb->slot_num) | 1447 | if (i == osb->slot_num) { |
1448 | spin_unlock(&osb->osb_lock); | ||
1445 | continue; | 1449 | continue; |
1450 | } | ||
1446 | 1451 | ||
1447 | status = ocfs2_slot_to_node_num_locked(osb, i, &node_num); | 1452 | status = ocfs2_slot_to_node_num_locked(osb, i, &node_num); |
1448 | if (status == -ENOENT) | 1453 | if (status == -ENOENT) { |
1454 | spin_unlock(&osb->osb_lock); | ||
1449 | continue; | 1455 | continue; |
1456 | } | ||
1450 | 1457 | ||
1451 | if (__ocfs2_recovery_map_test(osb, node_num)) | 1458 | if (__ocfs2_recovery_map_test(osb, node_num)) { |
1459 | spin_unlock(&osb->osb_lock); | ||
1452 | continue; | 1460 | continue; |
1461 | } | ||
1453 | spin_unlock(&osb->osb_lock); | 1462 | spin_unlock(&osb->osb_lock); |
1454 | 1463 | ||
1455 | /* Ok, we have a slot occupied by another node which | 1464 | /* Ok, we have a slot occupied by another node which |
@@ -1465,10 +1474,7 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | |||
1465 | mlog_errno(status); | 1474 | mlog_errno(status); |
1466 | goto bail; | 1475 | goto bail; |
1467 | } | 1476 | } |
1468 | |||
1469 | spin_lock(&osb->osb_lock); | ||
1470 | } | 1477 | } |
1471 | spin_unlock(&osb->osb_lock); | ||
1472 | 1478 | ||
1473 | status = 0; | 1479 | status = 0; |
1474 | bail: | 1480 | bail: |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 2178ebffa05f..d4d14e9a3cea 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -27,7 +27,12 @@ | |||
27 | #define OCFS2_JOURNAL_H | 27 | #define OCFS2_JOURNAL_H |
28 | 28 | ||
29 | #include <linux/fs.h> | 29 | #include <linux/fs.h> |
30 | #include <linux/jbd.h> | 30 | #ifndef CONFIG_OCFS2_COMPAT_JBD |
31 | # include <linux/jbd2.h> | ||
32 | #else | ||
33 | # include <linux/jbd.h> | ||
34 | # include "ocfs2_jbd_compat.h" | ||
35 | #endif | ||
31 | 36 | ||
32 | enum ocfs2_journal_state { | 37 | enum ocfs2_journal_state { |
33 | OCFS2_JOURNAL_FREE = 0, | 38 | OCFS2_JOURNAL_FREE = 0, |
@@ -215,8 +220,8 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode) | |||
215 | * buffer. Will have to call ocfs2_journal_dirty once | 220 | * buffer. Will have to call ocfs2_journal_dirty once |
216 | * we've actually dirtied it. Type is one of . or . | 221 | * we've actually dirtied it. Type is one of . or . |
217 | * ocfs2_journal_dirty - Mark a journalled buffer as having dirty data. | 222 | * ocfs2_journal_dirty - Mark a journalled buffer as having dirty data. |
218 | * ocfs2_journal_dirty_data - Indicate that a data buffer should go out before | 223 | * ocfs2_jbd2_file_inode - Mark an inode so that its data goes out before |
219 | * the current handle commits. | 224 | * the current handle commits. |
220 | */ | 225 | */ |
221 | 226 | ||
222 | /* You must always start_trans with a number of buffs > 0, but it's | 227 | /* You must always start_trans with a number of buffs > 0, but it's |
@@ -268,8 +273,10 @@ int ocfs2_journal_access(handle_t *handle, | |||
268 | */ | 273 | */ |
269 | int ocfs2_journal_dirty(handle_t *handle, | 274 | int ocfs2_journal_dirty(handle_t *handle, |
270 | struct buffer_head *bh); | 275 | struct buffer_head *bh); |
276 | #ifdef CONFIG_OCFS2_COMPAT_JBD | ||
271 | int ocfs2_journal_dirty_data(handle_t *handle, | 277 | int ocfs2_journal_dirty_data(handle_t *handle, |
272 | struct buffer_head *bh); | 278 | struct buffer_head *bh); |
279 | #endif | ||
273 | 280 | ||
274 | /* | 281 | /* |
275 | * Credit Macros: | 282 | * Credit Macros: |
@@ -283,6 +290,9 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
283 | /* simple file updates like chmod, etc. */ | 290 | /* simple file updates like chmod, etc. */ |
284 | #define OCFS2_INODE_UPDATE_CREDITS 1 | 291 | #define OCFS2_INODE_UPDATE_CREDITS 1 |
285 | 292 | ||
293 | /* extended attribute block update */ | ||
294 | #define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1 | ||
295 | |||
286 | /* group extend. inode update and last group update. */ | 296 | /* group extend. inode update and last group update. */ |
287 | #define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) | 297 | #define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) |
288 | 298 | ||
@@ -340,11 +350,23 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
340 | #define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \ | 350 | #define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \ |
341 | + OCFS2_UNLINK_CREDITS) | 351 | + OCFS2_UNLINK_CREDITS) |
342 | 352 | ||
353 | /* global bitmap dinode, group desc., relinked group, | ||
354 | * suballocator dinode, group desc., relinked group, | ||
355 | * dinode, xattr block */ | ||
356 | #define OCFS2_XATTR_BLOCK_CREATE_CREDITS (OCFS2_SUBALLOC_ALLOC * 2 + \ | ||
357 | + OCFS2_INODE_UPDATE_CREDITS \ | ||
358 | + OCFS2_XATTR_BLOCK_UPDATE_CREDITS) | ||
359 | |||
360 | /* | ||
361 | * Please note that the caller must make sure that root_el is the root | ||
362 | * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise | ||
363 | * the result may be wrong. | ||
364 | */ | ||
343 | static inline int ocfs2_calc_extend_credits(struct super_block *sb, | 365 | static inline int ocfs2_calc_extend_credits(struct super_block *sb, |
344 | struct ocfs2_dinode *fe, | 366 | struct ocfs2_extent_list *root_el, |
345 | u32 bits_wanted) | 367 | u32 bits_wanted) |
346 | { | 368 | { |
347 | int bitmap_blocks, sysfile_bitmap_blocks, dinode_blocks; | 369 | int bitmap_blocks, sysfile_bitmap_blocks, extent_blocks; |
348 | 370 | ||
349 | /* bitmap dinode, group desc. + relinked group. */ | 371 | /* bitmap dinode, group desc. + relinked group. */ |
350 | bitmap_blocks = OCFS2_SUBALLOC_ALLOC; | 372 | bitmap_blocks = OCFS2_SUBALLOC_ALLOC; |
@@ -355,16 +377,16 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb, | |||
355 | * however many metadata chunks needed * a remaining suballoc | 377 | * however many metadata chunks needed * a remaining suballoc |
356 | * alloc. */ | 378 | * alloc. */ |
357 | sysfile_bitmap_blocks = 1 + | 379 | sysfile_bitmap_blocks = 1 + |
358 | (OCFS2_SUBALLOC_ALLOC - 1) * ocfs2_extend_meta_needed(fe); | 380 | (OCFS2_SUBALLOC_ALLOC - 1) * ocfs2_extend_meta_needed(root_el); |
359 | 381 | ||
360 | /* this does not include *new* metadata blocks, which are | 382 | /* this does not include *new* metadata blocks, which are |
361 | * accounted for in sysfile_bitmap_blocks. fe + | 383 | * accounted for in sysfile_bitmap_blocks. root_el + |
362 | * prev. last_eb_blk + blocks along edge of tree. | 384 | * prev. last_eb_blk + blocks along edge of tree. |
363 | * calc_symlink_credits passes because we just need 1 | 385 | * calc_symlink_credits passes because we just need 1 |
364 | * credit for the dinode there. */ | 386 | * credit for the dinode there. */ |
365 | dinode_blocks = 1 + 1 + le16_to_cpu(fe->id2.i_list.l_tree_depth); | 387 | extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); |
366 | 388 | ||
367 | return bitmap_blocks + sysfile_bitmap_blocks + dinode_blocks; | 389 | return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks; |
368 | } | 390 | } |
369 | 391 | ||
370 | static inline int ocfs2_calc_symlink_credits(struct super_block *sb) | 392 | static inline int ocfs2_calc_symlink_credits(struct super_block *sb) |
@@ -415,4 +437,16 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb, | |||
415 | return credits; | 437 | return credits; |
416 | } | 438 | } |
417 | 439 | ||
440 | static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode) | ||
441 | { | ||
442 | return jbd2_journal_file_inode(handle, &OCFS2_I(inode)->ip_jinode); | ||
443 | } | ||
444 | |||
445 | static inline int ocfs2_begin_ordered_truncate(struct inode *inode, | ||
446 | loff_t new_size) | ||
447 | { | ||
448 | return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode, | ||
449 | new_size); | ||
450 | } | ||
451 | |||
418 | #endif /* OCFS2_JOURNAL_H */ | 452 | #endif /* OCFS2_JOURNAL_H */ |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 28e492e4ec88..687b28713c32 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
29 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
30 | #include <linux/bitops.h> | 30 | #include <linux/bitops.h> |
31 | #include <linux/debugfs.h> | ||
31 | 32 | ||
32 | #define MLOG_MASK_PREFIX ML_DISK_ALLOC | 33 | #define MLOG_MASK_PREFIX ML_DISK_ALLOC |
33 | #include <cluster/masklog.h> | 34 | #include <cluster/masklog.h> |
@@ -47,8 +48,6 @@ | |||
47 | 48 | ||
48 | #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) | 49 | #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) |
49 | 50 | ||
50 | static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb); | ||
51 | |||
52 | static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); | 51 | static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); |
53 | 52 | ||
54 | static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, | 53 | static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, |
@@ -75,24 +74,129 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
75 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, | 74 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, |
76 | struct inode *local_alloc_inode); | 75 | struct inode *local_alloc_inode); |
77 | 76 | ||
78 | static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb) | 77 | #ifdef CONFIG_OCFS2_FS_STATS |
78 | |||
79 | static int ocfs2_la_debug_open(struct inode *inode, struct file *file) | ||
80 | { | ||
81 | file->private_data = inode->i_private; | ||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | #define LA_DEBUG_BUF_SZ PAGE_CACHE_SIZE | ||
86 | #define LA_DEBUG_VER 1 | ||
87 | static ssize_t ocfs2_la_debug_read(struct file *file, char __user *userbuf, | ||
88 | size_t count, loff_t *ppos) | ||
89 | { | ||
90 | static DEFINE_MUTEX(la_debug_mutex); | ||
91 | struct ocfs2_super *osb = file->private_data; | ||
92 | int written, ret; | ||
93 | char *buf = osb->local_alloc_debug_buf; | ||
94 | |||
95 | mutex_lock(&la_debug_mutex); | ||
96 | memset(buf, 0, LA_DEBUG_BUF_SZ); | ||
97 | |||
98 | written = snprintf(buf, LA_DEBUG_BUF_SZ, | ||
99 | "0x%x\t0x%llx\t%u\t%u\t0x%x\n", | ||
100 | LA_DEBUG_VER, | ||
101 | (unsigned long long)osb->la_last_gd, | ||
102 | osb->local_alloc_default_bits, | ||
103 | osb->local_alloc_bits, osb->local_alloc_state); | ||
104 | |||
105 | ret = simple_read_from_buffer(userbuf, count, ppos, buf, written); | ||
106 | |||
107 | mutex_unlock(&la_debug_mutex); | ||
108 | return ret; | ||
109 | } | ||
110 | |||
111 | static const struct file_operations ocfs2_la_debug_fops = { | ||
112 | .open = ocfs2_la_debug_open, | ||
113 | .read = ocfs2_la_debug_read, | ||
114 | }; | ||
115 | |||
116 | static void ocfs2_init_la_debug(struct ocfs2_super *osb) | ||
117 | { | ||
118 | osb->local_alloc_debug_buf = kmalloc(LA_DEBUG_BUF_SZ, GFP_NOFS); | ||
119 | if (!osb->local_alloc_debug_buf) | ||
120 | return; | ||
121 | |||
122 | osb->local_alloc_debug = debugfs_create_file("local_alloc_stats", | ||
123 | S_IFREG|S_IRUSR, | ||
124 | osb->osb_debug_root, | ||
125 | osb, | ||
126 | &ocfs2_la_debug_fops); | ||
127 | if (!osb->local_alloc_debug) { | ||
128 | kfree(osb->local_alloc_debug_buf); | ||
129 | osb->local_alloc_debug_buf = NULL; | ||
130 | } | ||
131 | } | ||
132 | |||
133 | static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb) | ||
134 | { | ||
135 | if (osb->local_alloc_debug) | ||
136 | debugfs_remove(osb->local_alloc_debug); | ||
137 | |||
138 | if (osb->local_alloc_debug_buf) | ||
139 | kfree(osb->local_alloc_debug_buf); | ||
140 | |||
141 | osb->local_alloc_debug_buf = NULL; | ||
142 | osb->local_alloc_debug = NULL; | ||
143 | } | ||
144 | #else /* CONFIG_OCFS2_FS_STATS */ | ||
145 | static void ocfs2_init_la_debug(struct ocfs2_super *osb) | ||
146 | { | ||
147 | return; | ||
148 | } | ||
149 | static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb) | ||
150 | { | ||
151 | return; | ||
152 | } | ||
153 | #endif | ||
154 | |||
155 | static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) | ||
79 | { | 156 | { |
80 | BUG_ON(osb->s_clustersize_bits > 20); | 157 | return (osb->local_alloc_state == OCFS2_LA_THROTTLED || |
158 | osb->local_alloc_state == OCFS2_LA_ENABLED); | ||
159 | } | ||
81 | 160 | ||
82 | /* Size local alloc windows by the megabyte */ | 161 | void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, |
83 | return osb->local_alloc_size << (20 - osb->s_clustersize_bits); | 162 | unsigned int num_clusters) |
163 | { | ||
164 | spin_lock(&osb->osb_lock); | ||
165 | if (osb->local_alloc_state == OCFS2_LA_DISABLED || | ||
166 | osb->local_alloc_state == OCFS2_LA_THROTTLED) | ||
167 | if (num_clusters >= osb->local_alloc_default_bits) { | ||
168 | cancel_delayed_work(&osb->la_enable_wq); | ||
169 | osb->local_alloc_state = OCFS2_LA_ENABLED; | ||
170 | } | ||
171 | spin_unlock(&osb->osb_lock); | ||
172 | } | ||
173 | |||
174 | void ocfs2_la_enable_worker(struct work_struct *work) | ||
175 | { | ||
176 | struct ocfs2_super *osb = | ||
177 | container_of(work, struct ocfs2_super, | ||
178 | la_enable_wq.work); | ||
179 | spin_lock(&osb->osb_lock); | ||
180 | osb->local_alloc_state = OCFS2_LA_ENABLED; | ||
181 | spin_unlock(&osb->osb_lock); | ||
84 | } | 182 | } |
85 | 183 | ||
86 | /* | 184 | /* |
87 | * Tell us whether a given allocation should use the local alloc | 185 | * Tell us whether a given allocation should use the local alloc |
88 | * file. Otherwise, it has to go to the main bitmap. | 186 | * file. Otherwise, it has to go to the main bitmap. |
187 | * | ||
188 | * This function does semi-dirty reads of local alloc size and state! | ||
189 | * This is ok however, as the values are re-checked once under mutex. | ||
89 | */ | 190 | */ |
90 | int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) | 191 | int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) |
91 | { | 192 | { |
92 | int la_bits = ocfs2_local_alloc_window_bits(osb); | ||
93 | int ret = 0; | 193 | int ret = 0; |
194 | int la_bits; | ||
195 | |||
196 | spin_lock(&osb->osb_lock); | ||
197 | la_bits = osb->local_alloc_bits; | ||
94 | 198 | ||
95 | if (osb->local_alloc_state != OCFS2_LA_ENABLED) | 199 | if (!ocfs2_la_state_enabled(osb)) |
96 | goto bail; | 200 | goto bail; |
97 | 201 | ||
98 | /* la_bits should be at least twice the size (in clusters) of | 202 | /* la_bits should be at least twice the size (in clusters) of |
@@ -106,6 +210,7 @@ int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) | |||
106 | bail: | 210 | bail: |
107 | mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", | 211 | mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", |
108 | osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); | 212 | osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); |
213 | spin_unlock(&osb->osb_lock); | ||
109 | return ret; | 214 | return ret; |
110 | } | 215 | } |
111 | 216 | ||
@@ -120,14 +225,18 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
120 | 225 | ||
121 | mlog_entry_void(); | 226 | mlog_entry_void(); |
122 | 227 | ||
123 | if (osb->local_alloc_size == 0) | 228 | ocfs2_init_la_debug(osb); |
229 | |||
230 | if (osb->local_alloc_bits == 0) | ||
124 | goto bail; | 231 | goto bail; |
125 | 232 | ||
126 | if (ocfs2_local_alloc_window_bits(osb) >= osb->bitmap_cpg) { | 233 | if (osb->local_alloc_bits >= osb->bitmap_cpg) { |
127 | mlog(ML_NOTICE, "Requested local alloc window %d is larger " | 234 | mlog(ML_NOTICE, "Requested local alloc window %d is larger " |
128 | "than max possible %u. Using defaults.\n", | 235 | "than max possible %u. Using defaults.\n", |
129 | ocfs2_local_alloc_window_bits(osb), (osb->bitmap_cpg - 1)); | 236 | osb->local_alloc_bits, (osb->bitmap_cpg - 1)); |
130 | osb->local_alloc_size = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; | 237 | osb->local_alloc_bits = |
238 | ocfs2_megabytes_to_clusters(osb->sb, | ||
239 | OCFS2_DEFAULT_LOCAL_ALLOC_SIZE); | ||
131 | } | 240 | } |
132 | 241 | ||
133 | /* read the alloc off disk */ | 242 | /* read the alloc off disk */ |
@@ -139,8 +248,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
139 | goto bail; | 248 | goto bail; |
140 | } | 249 | } |
141 | 250 | ||
142 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, | 251 | status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, |
143 | &alloc_bh, 0, inode); | 252 | &alloc_bh, OCFS2_BH_IGNORE_CACHE); |
144 | if (status < 0) { | 253 | if (status < 0) { |
145 | mlog_errno(status); | 254 | mlog_errno(status); |
146 | goto bail; | 255 | goto bail; |
@@ -185,13 +294,14 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
185 | 294 | ||
186 | bail: | 295 | bail: |
187 | if (status < 0) | 296 | if (status < 0) |
188 | if (alloc_bh) | 297 | brelse(alloc_bh); |
189 | brelse(alloc_bh); | ||
190 | if (inode) | 298 | if (inode) |
191 | iput(inode); | 299 | iput(inode); |
192 | 300 | ||
193 | mlog(0, "Local alloc window bits = %d\n", | 301 | if (status < 0) |
194 | ocfs2_local_alloc_window_bits(osb)); | 302 | ocfs2_shutdown_la_debug(osb); |
303 | |||
304 | mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits); | ||
195 | 305 | ||
196 | mlog_exit(status); | 306 | mlog_exit(status); |
197 | return status; | 307 | return status; |
@@ -217,6 +327,11 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) | |||
217 | 327 | ||
218 | mlog_entry_void(); | 328 | mlog_entry_void(); |
219 | 329 | ||
330 | cancel_delayed_work(&osb->la_enable_wq); | ||
331 | flush_workqueue(ocfs2_wq); | ||
332 | |||
333 | ocfs2_shutdown_la_debug(osb); | ||
334 | |||
220 | if (osb->local_alloc_state == OCFS2_LA_UNUSED) | 335 | if (osb->local_alloc_state == OCFS2_LA_UNUSED) |
221 | goto out; | 336 | goto out; |
222 | 337 | ||
@@ -295,8 +410,7 @@ out_commit: | |||
295 | ocfs2_commit_trans(osb, handle); | 410 | ocfs2_commit_trans(osb, handle); |
296 | 411 | ||
297 | out_unlock: | 412 | out_unlock: |
298 | if (main_bm_bh) | 413 | brelse(main_bm_bh); |
299 | brelse(main_bm_bh); | ||
300 | 414 | ||
301 | ocfs2_inode_unlock(main_bm_inode, 1); | 415 | ocfs2_inode_unlock(main_bm_inode, 1); |
302 | 416 | ||
@@ -345,8 +459,8 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, | |||
345 | 459 | ||
346 | mutex_lock(&inode->i_mutex); | 460 | mutex_lock(&inode->i_mutex); |
347 | 461 | ||
348 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, | 462 | status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, |
349 | &alloc_bh, 0, inode); | 463 | &alloc_bh, OCFS2_BH_IGNORE_CACHE); |
350 | if (status < 0) { | 464 | if (status < 0) { |
351 | mlog_errno(status); | 465 | mlog_errno(status); |
352 | goto bail; | 466 | goto bail; |
@@ -372,8 +486,7 @@ bail: | |||
372 | *alloc_copy = NULL; | 486 | *alloc_copy = NULL; |
373 | } | 487 | } |
374 | 488 | ||
375 | if (alloc_bh) | 489 | brelse(alloc_bh); |
376 | brelse(alloc_bh); | ||
377 | 490 | ||
378 | if (inode) { | 491 | if (inode) { |
379 | mutex_unlock(&inode->i_mutex); | 492 | mutex_unlock(&inode->i_mutex); |
@@ -441,8 +554,7 @@ out_unlock: | |||
441 | out_mutex: | 554 | out_mutex: |
442 | mutex_unlock(&main_bm_inode->i_mutex); | 555 | mutex_unlock(&main_bm_inode->i_mutex); |
443 | 556 | ||
444 | if (main_bm_bh) | 557 | brelse(main_bm_bh); |
445 | brelse(main_bm_bh); | ||
446 | 558 | ||
447 | iput(main_bm_inode); | 559 | iput(main_bm_inode); |
448 | 560 | ||
@@ -453,8 +565,48 @@ out: | |||
453 | return status; | 565 | return status; |
454 | } | 566 | } |
455 | 567 | ||
568 | /* Check to see if the local alloc window is within ac->ac_max_block */ | ||
569 | static int ocfs2_local_alloc_in_range(struct inode *inode, | ||
570 | struct ocfs2_alloc_context *ac, | ||
571 | u32 bits_wanted) | ||
572 | { | ||
573 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
574 | struct ocfs2_dinode *alloc; | ||
575 | struct ocfs2_local_alloc *la; | ||
576 | int start; | ||
577 | u64 block_off; | ||
578 | |||
579 | if (!ac->ac_max_block) | ||
580 | return 1; | ||
581 | |||
582 | alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; | ||
583 | la = OCFS2_LOCAL_ALLOC(alloc); | ||
584 | |||
585 | start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); | ||
586 | if (start == -1) { | ||
587 | mlog_errno(-ENOSPC); | ||
588 | return 0; | ||
589 | } | ||
590 | |||
591 | /* | ||
592 | * Converting (bm_off + start + bits_wanted) to blocks gives us | ||
593 | * the blkno just past our actual allocation. This is perfect | ||
594 | * to compare with ac_max_block. | ||
595 | */ | ||
596 | block_off = ocfs2_clusters_to_blocks(inode->i_sb, | ||
597 | le32_to_cpu(la->la_bm_off) + | ||
598 | start + bits_wanted); | ||
599 | mlog(0, "Checking %llu against %llu\n", | ||
600 | (unsigned long long)block_off, | ||
601 | (unsigned long long)ac->ac_max_block); | ||
602 | if (block_off > ac->ac_max_block) | ||
603 | return 0; | ||
604 | |||
605 | return 1; | ||
606 | } | ||
607 | |||
456 | /* | 608 | /* |
457 | * make sure we've got at least bitswanted contiguous bits in the | 609 | * make sure we've got at least bits_wanted contiguous bits in the |
458 | * local alloc. You lose them when you drop i_mutex. | 610 | * local alloc. You lose them when you drop i_mutex. |
459 | * | 611 | * |
460 | * We will add ourselves to the transaction passed in, but may start | 612 | * We will add ourselves to the transaction passed in, but may start |
@@ -485,16 +637,18 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, | |||
485 | 637 | ||
486 | mutex_lock(&local_alloc_inode->i_mutex); | 638 | mutex_lock(&local_alloc_inode->i_mutex); |
487 | 639 | ||
488 | if (osb->local_alloc_state != OCFS2_LA_ENABLED) { | 640 | /* |
489 | status = -ENOSPC; | 641 | * We must double check state and allocator bits because |
490 | goto bail; | 642 | * another process may have changed them while holding i_mutex. |
491 | } | 643 | */ |
492 | 644 | spin_lock(&osb->osb_lock); | |
493 | if (bits_wanted > ocfs2_local_alloc_window_bits(osb)) { | 645 | if (!ocfs2_la_state_enabled(osb) || |
494 | mlog(0, "Asking for more than my max window size!\n"); | 646 | (bits_wanted > osb->local_alloc_bits)) { |
647 | spin_unlock(&osb->osb_lock); | ||
495 | status = -ENOSPC; | 648 | status = -ENOSPC; |
496 | goto bail; | 649 | goto bail; |
497 | } | 650 | } |
651 | spin_unlock(&osb->osb_lock); | ||
498 | 652 | ||
499 | alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; | 653 | alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; |
500 | 654 | ||
@@ -522,6 +676,36 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, | |||
522 | mlog_errno(status); | 676 | mlog_errno(status); |
523 | goto bail; | 677 | goto bail; |
524 | } | 678 | } |
679 | |||
680 | /* | ||
681 | * Under certain conditions, the window slide code | ||
682 | * might have reduced the number of bits available or | ||
683 | * disabled the the local alloc entirely. Re-check | ||
684 | * here and return -ENOSPC if necessary. | ||
685 | */ | ||
686 | status = -ENOSPC; | ||
687 | if (!ocfs2_la_state_enabled(osb)) | ||
688 | goto bail; | ||
689 | |||
690 | free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - | ||
691 | le32_to_cpu(alloc->id1.bitmap1.i_used); | ||
692 | if (bits_wanted > free_bits) | ||
693 | goto bail; | ||
694 | } | ||
695 | |||
696 | if (ac->ac_max_block) | ||
697 | mlog(0, "Calling in_range for max block %llu\n", | ||
698 | (unsigned long long)ac->ac_max_block); | ||
699 | |||
700 | if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac, | ||
701 | bits_wanted)) { | ||
702 | /* | ||
703 | * The window is outside ac->ac_max_block. | ||
704 | * This errno tells the caller to keep localalloc enabled | ||
705 | * but to get the allocation from the main bitmap. | ||
706 | */ | ||
707 | status = -EFBIG; | ||
708 | goto bail; | ||
525 | } | 709 | } |
526 | 710 | ||
527 | ac->ac_inode = local_alloc_inode; | 711 | ac->ac_inode = local_alloc_inode; |
@@ -789,6 +973,85 @@ bail: | |||
789 | return status; | 973 | return status; |
790 | } | 974 | } |
791 | 975 | ||
976 | enum ocfs2_la_event { | ||
977 | OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */ | ||
978 | OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has | ||
979 | * enough bits theoretically | ||
980 | * free, but a contiguous | ||
981 | * allocation could not be | ||
982 | * found. */ | ||
983 | OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have | ||
984 | * enough bits free to satisfy | ||
985 | * our request. */ | ||
986 | }; | ||
987 | #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ) | ||
988 | /* | ||
989 | * Given an event, calculate the size of our next local alloc window. | ||
990 | * | ||
991 | * This should always be called under i_mutex of the local alloc inode | ||
992 | * so that local alloc disabling doesn't race with processes trying to | ||
993 | * use the allocator. | ||
994 | * | ||
995 | * Returns the state which the local alloc was left in. This value can | ||
996 | * be ignored by some paths. | ||
997 | */ | ||
998 | static int ocfs2_recalc_la_window(struct ocfs2_super *osb, | ||
999 | enum ocfs2_la_event event) | ||
1000 | { | ||
1001 | unsigned int bits; | ||
1002 | int state; | ||
1003 | |||
1004 | spin_lock(&osb->osb_lock); | ||
1005 | if (osb->local_alloc_state == OCFS2_LA_DISABLED) { | ||
1006 | WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED); | ||
1007 | goto out_unlock; | ||
1008 | } | ||
1009 | |||
1010 | /* | ||
1011 | * ENOSPC and fragmentation are treated similarly for now. | ||
1012 | */ | ||
1013 | if (event == OCFS2_LA_EVENT_ENOSPC || | ||
1014 | event == OCFS2_LA_EVENT_FRAGMENTED) { | ||
1015 | /* | ||
1016 | * We ran out of contiguous space in the primary | ||
1017 | * bitmap. Drastically reduce the number of bits used | ||
1018 | * by local alloc until we have to disable it. | ||
1019 | */ | ||
1020 | bits = osb->local_alloc_bits >> 1; | ||
1021 | if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) { | ||
1022 | /* | ||
1023 | * By setting state to THROTTLED, we'll keep | ||
1024 | * the number of local alloc bits used down | ||
1025 | * until an event occurs which would give us | ||
1026 | * reason to assume the bitmap situation might | ||
1027 | * have changed. | ||
1028 | */ | ||
1029 | osb->local_alloc_state = OCFS2_LA_THROTTLED; | ||
1030 | osb->local_alloc_bits = bits; | ||
1031 | } else { | ||
1032 | osb->local_alloc_state = OCFS2_LA_DISABLED; | ||
1033 | } | ||
1034 | queue_delayed_work(ocfs2_wq, &osb->la_enable_wq, | ||
1035 | OCFS2_LA_ENABLE_INTERVAL); | ||
1036 | goto out_unlock; | ||
1037 | } | ||
1038 | |||
1039 | /* | ||
1040 | * Don't increase the size of the local alloc window until we | ||
1041 | * know we might be able to fulfill the request. Otherwise, we | ||
1042 | * risk bouncing around the global bitmap during periods of | ||
1043 | * low space. | ||
1044 | */ | ||
1045 | if (osb->local_alloc_state != OCFS2_LA_THROTTLED) | ||
1046 | osb->local_alloc_bits = osb->local_alloc_default_bits; | ||
1047 | |||
1048 | out_unlock: | ||
1049 | state = osb->local_alloc_state; | ||
1050 | spin_unlock(&osb->osb_lock); | ||
1051 | |||
1052 | return state; | ||
1053 | } | ||
1054 | |||
792 | static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, | 1055 | static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, |
793 | struct ocfs2_alloc_context **ac, | 1056 | struct ocfs2_alloc_context **ac, |
794 | struct inode **bitmap_inode, | 1057 | struct inode **bitmap_inode, |
@@ -803,12 +1066,21 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, | |||
803 | goto bail; | 1066 | goto bail; |
804 | } | 1067 | } |
805 | 1068 | ||
806 | (*ac)->ac_bits_wanted = ocfs2_local_alloc_window_bits(osb); | 1069 | retry_enospc: |
1070 | (*ac)->ac_bits_wanted = osb->local_alloc_bits; | ||
807 | 1071 | ||
808 | status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); | 1072 | status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); |
1073 | if (status == -ENOSPC) { | ||
1074 | if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == | ||
1075 | OCFS2_LA_DISABLED) | ||
1076 | goto bail; | ||
1077 | |||
1078 | ocfs2_free_ac_resource(*ac); | ||
1079 | memset(*ac, 0, sizeof(struct ocfs2_alloc_context)); | ||
1080 | goto retry_enospc; | ||
1081 | } | ||
809 | if (status < 0) { | 1082 | if (status < 0) { |
810 | if (status != -ENOSPC) | 1083 | mlog_errno(status); |
811 | mlog_errno(status); | ||
812 | goto bail; | 1084 | goto bail; |
813 | } | 1085 | } |
814 | 1086 | ||
@@ -849,7 +1121,7 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
849 | "one\n"); | 1121 | "one\n"); |
850 | 1122 | ||
851 | mlog(0, "Allocating %u clusters for a new window.\n", | 1123 | mlog(0, "Allocating %u clusters for a new window.\n", |
852 | ocfs2_local_alloc_window_bits(osb)); | 1124 | osb->local_alloc_bits); |
853 | 1125 | ||
854 | /* Instruct the allocation code to try the most recently used | 1126 | /* Instruct the allocation code to try the most recently used |
855 | * cluster group. We'll re-record the group used this pass | 1127 | * cluster group. We'll re-record the group used this pass |
@@ -859,9 +1131,36 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
859 | /* we used the generic suballoc reserve function, but we set | 1131 | /* we used the generic suballoc reserve function, but we set |
860 | * everything up nicely, so there's no reason why we can't use | 1132 | * everything up nicely, so there's no reason why we can't use |
861 | * the more specific cluster api to claim bits. */ | 1133 | * the more specific cluster api to claim bits. */ |
862 | status = ocfs2_claim_clusters(osb, handle, ac, | 1134 | status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits, |
863 | ocfs2_local_alloc_window_bits(osb), | ||
864 | &cluster_off, &cluster_count); | 1135 | &cluster_off, &cluster_count); |
1136 | if (status == -ENOSPC) { | ||
1137 | retry_enospc: | ||
1138 | /* | ||
1139 | * Note: We could also try syncing the journal here to | ||
1140 | * allow use of any free bits which the current | ||
1141 | * transaction can't give us access to. --Mark | ||
1142 | */ | ||
1143 | if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) == | ||
1144 | OCFS2_LA_DISABLED) | ||
1145 | goto bail; | ||
1146 | |||
1147 | status = ocfs2_claim_clusters(osb, handle, ac, | ||
1148 | osb->local_alloc_bits, | ||
1149 | &cluster_off, | ||
1150 | &cluster_count); | ||
1151 | if (status == -ENOSPC) | ||
1152 | goto retry_enospc; | ||
1153 | /* | ||
1154 | * We only shrunk the *minimum* number of in our | ||
1155 | * request - it's entirely possible that the allocator | ||
1156 | * might give us more than we asked for. | ||
1157 | */ | ||
1158 | if (status == 0) { | ||
1159 | spin_lock(&osb->osb_lock); | ||
1160 | osb->local_alloc_bits = cluster_count; | ||
1161 | spin_unlock(&osb->osb_lock); | ||
1162 | } | ||
1163 | } | ||
865 | if (status < 0) { | 1164 | if (status < 0) { |
866 | if (status != -ENOSPC) | 1165 | if (status != -ENOSPC) |
867 | mlog_errno(status); | 1166 | mlog_errno(status); |
@@ -905,6 +1204,8 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, | |||
905 | 1204 | ||
906 | mlog_entry_void(); | 1205 | mlog_entry_void(); |
907 | 1206 | ||
1207 | ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE); | ||
1208 | |||
908 | /* This will lock the main bitmap for us. */ | 1209 | /* This will lock the main bitmap for us. */ |
909 | status = ocfs2_local_alloc_reserve_for_window(osb, | 1210 | status = ocfs2_local_alloc_reserve_for_window(osb, |
910 | &ac, | 1211 | &ac, |
@@ -976,8 +1277,7 @@ bail: | |||
976 | if (handle) | 1277 | if (handle) |
977 | ocfs2_commit_trans(osb, handle); | 1278 | ocfs2_commit_trans(osb, handle); |
978 | 1279 | ||
979 | if (main_bm_bh) | 1280 | brelse(main_bm_bh); |
980 | brelse(main_bm_bh); | ||
981 | 1281 | ||
982 | if (main_bm_inode) | 1282 | if (main_bm_inode) |
983 | iput(main_bm_inode); | 1283 | iput(main_bm_inode); |
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h index 3f76631e110c..ac5ea9f86653 100644 --- a/fs/ocfs2/localalloc.h +++ b/fs/ocfs2/localalloc.h | |||
@@ -52,4 +52,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, | |||
52 | u32 *bit_off, | 52 | u32 *bit_off, |
53 | u32 *num_bits); | 53 | u32 *num_bits); |
54 | 54 | ||
55 | void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, | ||
56 | unsigned int num_clusters); | ||
57 | void ocfs2_la_enable_worker(struct work_struct *work); | ||
58 | |||
55 | #endif /* OCFS2_LOCALALLOC_H */ | 59 | #endif /* OCFS2_LOCALALLOC_H */ |
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c index 203f87143877..544ac6245175 100644 --- a/fs/ocfs2/locks.c +++ b/fs/ocfs2/locks.c | |||
@@ -24,6 +24,7 @@ | |||
24 | */ | 24 | */ |
25 | 25 | ||
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/fcntl.h> | ||
27 | 28 | ||
28 | #define MLOG_MASK_PREFIX ML_INODE | 29 | #define MLOG_MASK_PREFIX ML_INODE |
29 | #include <cluster/masklog.h> | 30 | #include <cluster/masklog.h> |
@@ -32,6 +33,7 @@ | |||
32 | 33 | ||
33 | #include "dlmglue.h" | 34 | #include "dlmglue.h" |
34 | #include "file.h" | 35 | #include "file.h" |
36 | #include "inode.h" | ||
35 | #include "locks.h" | 37 | #include "locks.h" |
36 | 38 | ||
37 | static int ocfs2_do_flock(struct file *file, struct inode *inode, | 39 | static int ocfs2_do_flock(struct file *file, struct inode *inode, |
@@ -123,3 +125,16 @@ int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl) | |||
123 | else | 125 | else |
124 | return ocfs2_do_flock(file, inode, cmd, fl); | 126 | return ocfs2_do_flock(file, inode, cmd, fl); |
125 | } | 127 | } |
128 | |||
129 | int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl) | ||
130 | { | ||
131 | struct inode *inode = file->f_mapping->host; | ||
132 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
133 | |||
134 | if (!(fl->fl_flags & FL_POSIX)) | ||
135 | return -ENOLCK; | ||
136 | if (__mandatory_lock(inode)) | ||
137 | return -ENOLCK; | ||
138 | |||
139 | return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl); | ||
140 | } | ||
diff --git a/fs/ocfs2/locks.h b/fs/ocfs2/locks.h index 9743ef2324ec..496d488b271f 100644 --- a/fs/ocfs2/locks.h +++ b/fs/ocfs2/locks.h | |||
@@ -27,5 +27,6 @@ | |||
27 | #define OCFS2_LOCKS_H | 27 | #define OCFS2_LOCKS_H |
28 | 28 | ||
29 | int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl); | 29 | int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl); |
30 | int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl); | ||
30 | 31 | ||
31 | #endif /* OCFS2_LOCKS_H */ | 32 | #endif /* OCFS2_LOCKS_H */ |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index d5d808fe0140..485a6aa0ad39 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -60,6 +60,7 @@ | |||
60 | #include "symlink.h" | 60 | #include "symlink.h" |
61 | #include "sysfile.h" | 61 | #include "sysfile.h" |
62 | #include "uptodate.h" | 62 | #include "uptodate.h" |
63 | #include "xattr.h" | ||
63 | 64 | ||
64 | #include "buffer_head_io.h" | 65 | #include "buffer_head_io.h" |
65 | 66 | ||
@@ -327,14 +328,9 @@ leave: | |||
327 | if (status == -ENOSPC) | 328 | if (status == -ENOSPC) |
328 | mlog(0, "Disk is full\n"); | 329 | mlog(0, "Disk is full\n"); |
329 | 330 | ||
330 | if (new_fe_bh) | 331 | brelse(new_fe_bh); |
331 | brelse(new_fe_bh); | 332 | brelse(de_bh); |
332 | 333 | brelse(parent_fe_bh); | |
333 | if (de_bh) | ||
334 | brelse(de_bh); | ||
335 | |||
336 | if (parent_fe_bh) | ||
337 | brelse(parent_fe_bh); | ||
338 | 334 | ||
339 | if ((status < 0) && inode) | 335 | if ((status < 0) && inode) |
340 | iput(inode); | 336 | iput(inode); |
@@ -647,12 +643,9 @@ out_unlock_inode: | |||
647 | out: | 643 | out: |
648 | ocfs2_inode_unlock(dir, 1); | 644 | ocfs2_inode_unlock(dir, 1); |
649 | 645 | ||
650 | if (de_bh) | 646 | brelse(de_bh); |
651 | brelse(de_bh); | 647 | brelse(fe_bh); |
652 | if (fe_bh) | 648 | brelse(parent_fe_bh); |
653 | brelse(fe_bh); | ||
654 | if (parent_fe_bh) | ||
655 | brelse(parent_fe_bh); | ||
656 | 649 | ||
657 | mlog_exit(err); | 650 | mlog_exit(err); |
658 | 651 | ||
@@ -851,17 +844,10 @@ leave: | |||
851 | iput(orphan_dir); | 844 | iput(orphan_dir); |
852 | } | 845 | } |
853 | 846 | ||
854 | if (fe_bh) | 847 | brelse(fe_bh); |
855 | brelse(fe_bh); | 848 | brelse(dirent_bh); |
856 | 849 | brelse(parent_node_bh); | |
857 | if (dirent_bh) | 850 | brelse(orphan_entry_bh); |
858 | brelse(dirent_bh); | ||
859 | |||
860 | if (parent_node_bh) | ||
861 | brelse(parent_node_bh); | ||
862 | |||
863 | if (orphan_entry_bh) | ||
864 | brelse(orphan_entry_bh); | ||
865 | 851 | ||
866 | mlog_exit(status); | 852 | mlog_exit(status); |
867 | 853 | ||
@@ -1372,24 +1358,15 @@ bail: | |||
1372 | 1358 | ||
1373 | if (new_inode) | 1359 | if (new_inode) |
1374 | iput(new_inode); | 1360 | iput(new_inode); |
1375 | if (newfe_bh) | 1361 | brelse(newfe_bh); |
1376 | brelse(newfe_bh); | 1362 | brelse(old_inode_bh); |
1377 | if (old_inode_bh) | 1363 | brelse(old_dir_bh); |
1378 | brelse(old_inode_bh); | 1364 | brelse(new_dir_bh); |
1379 | if (old_dir_bh) | 1365 | brelse(new_de_bh); |
1380 | brelse(old_dir_bh); | 1366 | brelse(old_de_bh); |
1381 | if (new_dir_bh) | 1367 | brelse(old_inode_de_bh); |
1382 | brelse(new_dir_bh); | 1368 | brelse(orphan_entry_bh); |
1383 | if (new_de_bh) | 1369 | brelse(insert_entry_bh); |
1384 | brelse(new_de_bh); | ||
1385 | if (old_de_bh) | ||
1386 | brelse(old_de_bh); | ||
1387 | if (old_inode_de_bh) | ||
1388 | brelse(old_inode_de_bh); | ||
1389 | if (orphan_entry_bh) | ||
1390 | brelse(orphan_entry_bh); | ||
1391 | if (insert_entry_bh) | ||
1392 | brelse(insert_entry_bh); | ||
1393 | 1370 | ||
1394 | mlog_exit(status); | 1371 | mlog_exit(status); |
1395 | 1372 | ||
@@ -1492,8 +1469,7 @@ bail: | |||
1492 | 1469 | ||
1493 | if (bhs) { | 1470 | if (bhs) { |
1494 | for(i = 0; i < blocks; i++) | 1471 | for(i = 0; i < blocks; i++) |
1495 | if (bhs[i]) | 1472 | brelse(bhs[i]); |
1496 | brelse(bhs[i]); | ||
1497 | kfree(bhs); | 1473 | kfree(bhs); |
1498 | } | 1474 | } |
1499 | 1475 | ||
@@ -1598,10 +1574,10 @@ static int ocfs2_symlink(struct inode *dir, | |||
1598 | u32 offset = 0; | 1574 | u32 offset = 0; |
1599 | 1575 | ||
1600 | inode->i_op = &ocfs2_symlink_inode_operations; | 1576 | inode->i_op = &ocfs2_symlink_inode_operations; |
1601 | status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 0, | 1577 | status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0, |
1602 | new_fe_bh, | 1578 | new_fe_bh, |
1603 | handle, data_ac, NULL, | 1579 | handle, data_ac, NULL, |
1604 | NULL); | 1580 | NULL); |
1605 | if (status < 0) { | 1581 | if (status < 0) { |
1606 | if (status != -ENOSPC && status != -EINTR) { | 1582 | if (status != -ENOSPC && status != -EINTR) { |
1607 | mlog(ML_ERROR, | 1583 | mlog(ML_ERROR, |
@@ -1659,12 +1635,9 @@ bail: | |||
1659 | 1635 | ||
1660 | ocfs2_inode_unlock(dir, 1); | 1636 | ocfs2_inode_unlock(dir, 1); |
1661 | 1637 | ||
1662 | if (new_fe_bh) | 1638 | brelse(new_fe_bh); |
1663 | brelse(new_fe_bh); | 1639 | brelse(parent_fe_bh); |
1664 | if (parent_fe_bh) | 1640 | brelse(de_bh); |
1665 | brelse(parent_fe_bh); | ||
1666 | if (de_bh) | ||
1667 | brelse(de_bh); | ||
1668 | if (inode_ac) | 1641 | if (inode_ac) |
1669 | ocfs2_free_alloc_context(inode_ac); | 1642 | ocfs2_free_alloc_context(inode_ac); |
1670 | if (data_ac) | 1643 | if (data_ac) |
@@ -1759,8 +1732,7 @@ leave: | |||
1759 | iput(orphan_dir_inode); | 1732 | iput(orphan_dir_inode); |
1760 | } | 1733 | } |
1761 | 1734 | ||
1762 | if (orphan_dir_bh) | 1735 | brelse(orphan_dir_bh); |
1763 | brelse(orphan_dir_bh); | ||
1764 | 1736 | ||
1765 | mlog_exit(status); | 1737 | mlog_exit(status); |
1766 | return status; | 1738 | return status; |
@@ -1780,10 +1752,9 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
1780 | 1752 | ||
1781 | mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); | 1753 | mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); |
1782 | 1754 | ||
1783 | status = ocfs2_read_block(osb, | 1755 | status = ocfs2_read_block(orphan_dir_inode, |
1784 | OCFS2_I(orphan_dir_inode)->ip_blkno, | 1756 | OCFS2_I(orphan_dir_inode)->ip_blkno, |
1785 | &orphan_dir_bh, OCFS2_BH_CACHED, | 1757 | &orphan_dir_bh); |
1786 | orphan_dir_inode); | ||
1787 | if (status < 0) { | 1758 | if (status < 0) { |
1788 | mlog_errno(status); | 1759 | mlog_errno(status); |
1789 | goto leave; | 1760 | goto leave; |
@@ -1829,8 +1800,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
1829 | (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); | 1800 | (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); |
1830 | 1801 | ||
1831 | leave: | 1802 | leave: |
1832 | if (orphan_dir_bh) | 1803 | brelse(orphan_dir_bh); |
1833 | brelse(orphan_dir_bh); | ||
1834 | 1804 | ||
1835 | mlog_exit(status); | 1805 | mlog_exit(status); |
1836 | return status; | 1806 | return status; |
@@ -1898,8 +1868,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, | |||
1898 | } | 1868 | } |
1899 | 1869 | ||
1900 | leave: | 1870 | leave: |
1901 | if (target_de_bh) | 1871 | brelse(target_de_bh); |
1902 | brelse(target_de_bh); | ||
1903 | 1872 | ||
1904 | mlog_exit(status); | 1873 | mlog_exit(status); |
1905 | return status; | 1874 | return status; |
@@ -1918,4 +1887,8 @@ const struct inode_operations ocfs2_dir_iops = { | |||
1918 | .setattr = ocfs2_setattr, | 1887 | .setattr = ocfs2_setattr, |
1919 | .getattr = ocfs2_getattr, | 1888 | .getattr = ocfs2_getattr, |
1920 | .permission = ocfs2_permission, | 1889 | .permission = ocfs2_permission, |
1890 | .setxattr = generic_setxattr, | ||
1891 | .getxattr = generic_getxattr, | ||
1892 | .listxattr = ocfs2_listxattr, | ||
1893 | .removexattr = generic_removexattr, | ||
1921 | }; | 1894 | }; |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 7f625f2b1117..a21a465490c4 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -34,7 +34,12 @@ | |||
34 | #include <linux/workqueue.h> | 34 | #include <linux/workqueue.h> |
35 | #include <linux/kref.h> | 35 | #include <linux/kref.h> |
36 | #include <linux/mutex.h> | 36 | #include <linux/mutex.h> |
37 | #include <linux/jbd.h> | 37 | #ifndef CONFIG_OCFS2_COMPAT_JBD |
38 | # include <linux/jbd2.h> | ||
39 | #else | ||
40 | # include <linux/jbd.h> | ||
41 | # include "ocfs2_jbd_compat.h" | ||
42 | #endif | ||
38 | 43 | ||
39 | /* For union ocfs2_dlm_lksb */ | 44 | /* For union ocfs2_dlm_lksb */ |
40 | #include "stackglue.h" | 45 | #include "stackglue.h" |
@@ -171,9 +176,13 @@ struct ocfs2_alloc_stats | |||
171 | 176 | ||
172 | enum ocfs2_local_alloc_state | 177 | enum ocfs2_local_alloc_state |
173 | { | 178 | { |
174 | OCFS2_LA_UNUSED = 0, | 179 | OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for |
175 | OCFS2_LA_ENABLED, | 180 | * this mountpoint. */ |
176 | OCFS2_LA_DISABLED | 181 | OCFS2_LA_ENABLED, /* Local alloc is in use. */ |
182 | OCFS2_LA_THROTTLED, /* Local alloc is in use, but number | ||
183 | * of bits has been reduced. */ | ||
184 | OCFS2_LA_DISABLED /* Local alloc has temporarily been | ||
185 | * disabled. */ | ||
177 | }; | 186 | }; |
178 | 187 | ||
179 | enum ocfs2_mount_options | 188 | enum ocfs2_mount_options |
@@ -184,6 +193,8 @@ enum ocfs2_mount_options | |||
184 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ | 193 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ |
185 | OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ | 194 | OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ |
186 | OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ | 195 | OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ |
196 | OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ | ||
197 | OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ | ||
187 | }; | 198 | }; |
188 | 199 | ||
189 | #define OCFS2_OSB_SOFT_RO 0x0001 | 200 | #define OCFS2_OSB_SOFT_RO 0x0001 |
@@ -214,6 +225,7 @@ struct ocfs2_super | |||
214 | u32 bitmap_cpg; | 225 | u32 bitmap_cpg; |
215 | u8 *uuid; | 226 | u8 *uuid; |
216 | char *uuid_str; | 227 | char *uuid_str; |
228 | u32 uuid_hash; | ||
217 | u8 *vol_label; | 229 | u8 *vol_label; |
218 | u64 first_cluster_group_blkno; | 230 | u64 first_cluster_group_blkno; |
219 | u32 fs_generation; | 231 | u32 fs_generation; |
@@ -241,6 +253,7 @@ struct ocfs2_super | |||
241 | int s_sectsize_bits; | 253 | int s_sectsize_bits; |
242 | int s_clustersize; | 254 | int s_clustersize; |
243 | int s_clustersize_bits; | 255 | int s_clustersize_bits; |
256 | unsigned int s_xattr_inline_size; | ||
244 | 257 | ||
245 | atomic_t vol_state; | 258 | atomic_t vol_state; |
246 | struct mutex recovery_lock; | 259 | struct mutex recovery_lock; |
@@ -252,11 +265,27 @@ struct ocfs2_super | |||
252 | struct ocfs2_journal *journal; | 265 | struct ocfs2_journal *journal; |
253 | unsigned long osb_commit_interval; | 266 | unsigned long osb_commit_interval; |
254 | 267 | ||
255 | int local_alloc_size; | 268 | struct delayed_work la_enable_wq; |
256 | enum ocfs2_local_alloc_state local_alloc_state; | 269 | |
270 | /* | ||
271 | * Must hold local alloc i_mutex and osb->osb_lock to change | ||
272 | * local_alloc_bits. Reads can be done under either lock. | ||
273 | */ | ||
274 | unsigned int local_alloc_bits; | ||
275 | unsigned int local_alloc_default_bits; | ||
276 | |||
277 | enum ocfs2_local_alloc_state local_alloc_state; /* protected | ||
278 | * by osb_lock */ | ||
279 | |||
257 | struct buffer_head *local_alloc_bh; | 280 | struct buffer_head *local_alloc_bh; |
281 | |||
258 | u64 la_last_gd; | 282 | u64 la_last_gd; |
259 | 283 | ||
284 | #ifdef CONFIG_OCFS2_FS_STATS | ||
285 | struct dentry *local_alloc_debug; | ||
286 | char *local_alloc_debug_buf; | ||
287 | #endif | ||
288 | |||
260 | /* Next two fields are for local node slot recovery during | 289 | /* Next two fields are for local node slot recovery during |
261 | * mount. */ | 290 | * mount. */ |
262 | int dirty; | 291 | int dirty; |
@@ -340,6 +369,13 @@ static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb) | |||
340 | return 0; | 369 | return 0; |
341 | } | 370 | } |
342 | 371 | ||
372 | static inline int ocfs2_supports_xattr(struct ocfs2_super *osb) | ||
373 | { | ||
374 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR) | ||
375 | return 1; | ||
376 | return 0; | ||
377 | } | ||
378 | |||
343 | /* set / clear functions because cluster events can make these happen | 379 | /* set / clear functions because cluster events can make these happen |
344 | * in parallel so we want the transitions to be atomic. this also | 380 | * in parallel so we want the transitions to be atomic. this also |
345 | * means that any future flags osb_flags must be protected by spinlock | 381 | * means that any future flags osb_flags must be protected by spinlock |
@@ -554,6 +590,14 @@ static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb) | |||
554 | return pages_per_cluster; | 590 | return pages_per_cluster; |
555 | } | 591 | } |
556 | 592 | ||
593 | static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb, | ||
594 | unsigned int megs) | ||
595 | { | ||
596 | BUILD_BUG_ON(OCFS2_MAX_CLUSTERSIZE > 1048576); | ||
597 | |||
598 | return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); | ||
599 | } | ||
600 | |||
557 | static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) | 601 | static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) |
558 | { | 602 | { |
559 | spin_lock(&osb->osb_lock); | 603 | spin_lock(&osb->osb_lock); |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 4f619850ccf7..f24ce3d3f956 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -64,6 +64,7 @@ | |||
64 | #define OCFS2_INODE_SIGNATURE "INODE01" | 64 | #define OCFS2_INODE_SIGNATURE "INODE01" |
65 | #define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01" | 65 | #define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01" |
66 | #define OCFS2_GROUP_DESC_SIGNATURE "GROUP01" | 66 | #define OCFS2_GROUP_DESC_SIGNATURE "GROUP01" |
67 | #define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01" | ||
67 | 68 | ||
68 | /* Compatibility flags */ | 69 | /* Compatibility flags */ |
69 | #define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ | 70 | #define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ |
@@ -90,7 +91,8 @@ | |||
90 | | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ | 91 | | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ |
91 | | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ | 92 | | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ |
92 | | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ | 93 | | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ |
93 | | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK) | 94 | | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \ |
95 | | OCFS2_FEATURE_INCOMPAT_XATTR) | ||
94 | #define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN | 96 | #define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN |
95 | 97 | ||
96 | /* | 98 | /* |
@@ -127,10 +129,6 @@ | |||
127 | /* Support for data packed into inode blocks */ | 129 | /* Support for data packed into inode blocks */ |
128 | #define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040 | 130 | #define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040 |
129 | 131 | ||
130 | /* Support for the extended slot map */ | ||
131 | #define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100 | ||
132 | |||
133 | |||
134 | /* | 132 | /* |
135 | * Support for alternate, userspace cluster stacks. If set, the superblock | 133 | * Support for alternate, userspace cluster stacks. If set, the superblock |
136 | * field s_cluster_info contains a tag for the alternate stack in use as | 134 | * field s_cluster_info contains a tag for the alternate stack in use as |
@@ -142,6 +140,12 @@ | |||
142 | */ | 140 | */ |
143 | #define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080 | 141 | #define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080 |
144 | 142 | ||
143 | /* Support for the extended slot map */ | ||
144 | #define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100 | ||
145 | |||
146 | /* Support for extended attributes */ | ||
147 | #define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200 | ||
148 | |||
145 | /* | 149 | /* |
146 | * backup superblock flag is used to indicate that this volume | 150 | * backup superblock flag is used to indicate that this volume |
147 | * has backup superblocks. | 151 | * has backup superblocks. |
@@ -299,6 +303,12 @@ struct ocfs2_new_group_input { | |||
299 | */ | 303 | */ |
300 | #define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8 | 304 | #define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8 |
301 | 305 | ||
306 | /* | ||
307 | * Inline extended attribute size (in bytes) | ||
308 | * The value chosen should be aligned to 16 byte boundaries. | ||
309 | */ | ||
310 | #define OCFS2_MIN_XATTR_INLINE_SIZE 256 | ||
311 | |||
302 | struct ocfs2_system_inode_info { | 312 | struct ocfs2_system_inode_info { |
303 | char *si_name; | 313 | char *si_name; |
304 | int si_iflags; | 314 | int si_iflags; |
@@ -563,7 +573,7 @@ struct ocfs2_super_block { | |||
563 | /*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts | 573 | /*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts |
564 | before tunefs required */ | 574 | before tunefs required */ |
565 | __le16 s_tunefs_flag; | 575 | __le16 s_tunefs_flag; |
566 | __le32 s_reserved1; | 576 | __le32 s_uuid_hash; /* hash value of uuid */ |
567 | __le64 s_first_cluster_group; /* Block offset of 1st cluster | 577 | __le64 s_first_cluster_group; /* Block offset of 1st cluster |
568 | * group header */ | 578 | * group header */ |
569 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ | 579 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ |
@@ -571,7 +581,11 @@ struct ocfs2_super_block { | |||
571 | /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace | 581 | /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace |
572 | stack. Only valid | 582 | stack. Only valid |
573 | with INCOMPAT flag. */ | 583 | with INCOMPAT flag. */ |
574 | /*B8*/ __le64 s_reserved2[17]; /* Fill out superblock */ | 584 | /*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size |
585 | for this fs*/ | ||
586 | __le16 s_reserved0; | ||
587 | __le32 s_reserved1; | ||
588 | /*C0*/ __le64 s_reserved2[16]; /* Fill out superblock */ | ||
575 | /*140*/ | 589 | /*140*/ |
576 | 590 | ||
577 | /* | 591 | /* |
@@ -621,7 +635,8 @@ struct ocfs2_dinode { | |||
621 | belongs to */ | 635 | belongs to */ |
622 | __le16 i_suballoc_bit; /* Bit offset in suballocator | 636 | __le16 i_suballoc_bit; /* Bit offset in suballocator |
623 | block group */ | 637 | block group */ |
624 | /*10*/ __le32 i_reserved0; | 638 | /*10*/ __le16 i_reserved0; |
639 | __le16 i_xattr_inline_size; | ||
625 | __le32 i_clusters; /* Cluster count */ | 640 | __le32 i_clusters; /* Cluster count */ |
626 | __le32 i_uid; /* Owner UID */ | 641 | __le32 i_uid; /* Owner UID */ |
627 | __le32 i_gid; /* Owning GID */ | 642 | __le32 i_gid; /* Owning GID */ |
@@ -640,11 +655,12 @@ struct ocfs2_dinode { | |||
640 | __le32 i_atime_nsec; | 655 | __le32 i_atime_nsec; |
641 | __le32 i_ctime_nsec; | 656 | __le32 i_ctime_nsec; |
642 | __le32 i_mtime_nsec; | 657 | __le32 i_mtime_nsec; |
643 | __le32 i_attr; | 658 | /*70*/ __le32 i_attr; |
644 | __le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL | 659 | __le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL |
645 | was set in i_flags */ | 660 | was set in i_flags */ |
646 | __le16 i_dyn_features; | 661 | __le16 i_dyn_features; |
647 | /*70*/ __le64 i_reserved2[8]; | 662 | __le64 i_xattr_loc; |
663 | /*80*/ __le64 i_reserved2[7]; | ||
648 | /*B8*/ union { | 664 | /*B8*/ union { |
649 | __le64 i_pad1; /* Generic way to refer to this | 665 | __le64 i_pad1; /* Generic way to refer to this |
650 | 64bit union */ | 666 | 64bit union */ |
@@ -715,6 +731,136 @@ struct ocfs2_group_desc | |||
715 | /*40*/ __u8 bg_bitmap[0]; | 731 | /*40*/ __u8 bg_bitmap[0]; |
716 | }; | 732 | }; |
717 | 733 | ||
734 | /* | ||
735 | * On disk extended attribute structure for OCFS2. | ||
736 | */ | ||
737 | |||
738 | /* | ||
739 | * ocfs2_xattr_entry indicates one extend attribute. | ||
740 | * | ||
741 | * Note that it can be stored in inode, one block or one xattr bucket. | ||
742 | */ | ||
743 | struct ocfs2_xattr_entry { | ||
744 | __le32 xe_name_hash; /* hash value of xattr prefix+suffix. */ | ||
745 | __le16 xe_name_offset; /* byte offset from the 1st etnry in the local | ||
746 | local xattr storage(inode, xattr block or | ||
747 | xattr bucket). */ | ||
748 | __u8 xe_name_len; /* xattr name len, does't include prefix. */ | ||
749 | __u8 xe_type; /* the low 7 bits indicates the name prefix's | ||
750 | * type and the highest 1 bits indicate whether | ||
751 | * the EA is stored in the local storage. */ | ||
752 | __le64 xe_value_size; /* real xattr value length. */ | ||
753 | }; | ||
754 | |||
755 | /* | ||
756 | * On disk structure for xattr header. | ||
757 | * | ||
758 | * One ocfs2_xattr_header describes how many ocfs2_xattr_entry records in | ||
759 | * the local xattr storage. | ||
760 | */ | ||
761 | struct ocfs2_xattr_header { | ||
762 | __le16 xh_count; /* contains the count of how | ||
763 | many records are in the | ||
764 | local xattr storage. */ | ||
765 | __le16 xh_free_start; /* current offset for storing | ||
766 | xattr. */ | ||
767 | __le16 xh_name_value_len; /* total length of name/value | ||
768 | length in this bucket. */ | ||
769 | __le16 xh_num_buckets; /* bucket nums in one extent | ||
770 | record, only valid in the | ||
771 | first bucket. */ | ||
772 | __le64 xh_csum; | ||
773 | struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */ | ||
774 | }; | ||
775 | |||
776 | /* | ||
777 | * On disk structure for xattr value root. | ||
778 | * | ||
779 | * It is used when one extended attribute's size is larger, and we will save it | ||
780 | * in an outside cluster. It will stored in a b-tree like file content. | ||
781 | */ | ||
782 | struct ocfs2_xattr_value_root { | ||
783 | /*00*/ __le32 xr_clusters; /* clusters covered by xattr value. */ | ||
784 | __le32 xr_reserved0; | ||
785 | __le64 xr_last_eb_blk; /* Pointer to last extent block */ | ||
786 | /*10*/ struct ocfs2_extent_list xr_list; /* Extent record list */ | ||
787 | }; | ||
788 | |||
789 | /* | ||
790 | * On disk structure for xattr tree root. | ||
791 | * | ||
792 | * It is used when there are too many extended attributes for one file. These | ||
793 | * attributes will be organized and stored in an indexed-btree. | ||
794 | */ | ||
795 | struct ocfs2_xattr_tree_root { | ||
796 | /*00*/ __le32 xt_clusters; /* clusters covered by xattr. */ | ||
797 | __le32 xt_reserved0; | ||
798 | __le64 xt_last_eb_blk; /* Pointer to last extent block */ | ||
799 | /*10*/ struct ocfs2_extent_list xt_list; /* Extent record list */ | ||
800 | }; | ||
801 | |||
802 | #define OCFS2_XATTR_INDEXED 0x1 | ||
803 | #define OCFS2_HASH_SHIFT 5 | ||
804 | #define OCFS2_XATTR_ROUND 3 | ||
805 | #define OCFS2_XATTR_SIZE(size) (((size) + OCFS2_XATTR_ROUND) & \ | ||
806 | ~(OCFS2_XATTR_ROUND)) | ||
807 | |||
808 | #define OCFS2_XATTR_BUCKET_SIZE 4096 | ||
809 | #define OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET (OCFS2_XATTR_BUCKET_SIZE \ | ||
810 | / OCFS2_MIN_BLOCKSIZE) | ||
811 | |||
812 | /* | ||
813 | * On disk structure for xattr block. | ||
814 | */ | ||
815 | struct ocfs2_xattr_block { | ||
816 | /*00*/ __u8 xb_signature[8]; /* Signature for verification */ | ||
817 | __le16 xb_suballoc_slot; /* Slot suballocator this | ||
818 | block belongs to. */ | ||
819 | __le16 xb_suballoc_bit; /* Bit offset in suballocator | ||
820 | block group */ | ||
821 | __le32 xb_fs_generation; /* Must match super block */ | ||
822 | /*10*/ __le64 xb_blkno; /* Offset on disk, in blocks */ | ||
823 | __le64 xb_csum; | ||
824 | /*20*/ __le16 xb_flags; /* Indicates whether this block contains | ||
825 | real xattr or a xattr tree. */ | ||
826 | __le16 xb_reserved0; | ||
827 | __le32 xb_reserved1; | ||
828 | __le64 xb_reserved2; | ||
829 | /*30*/ union { | ||
830 | struct ocfs2_xattr_header xb_header; /* xattr header if this | ||
831 | block contains xattr */ | ||
832 | struct ocfs2_xattr_tree_root xb_root;/* xattr tree root if this | ||
833 | block cotains xattr | ||
834 | tree. */ | ||
835 | } xb_attrs; | ||
836 | }; | ||
837 | |||
838 | #define OCFS2_XATTR_ENTRY_LOCAL 0x80 | ||
839 | #define OCFS2_XATTR_TYPE_MASK 0x7F | ||
840 | static inline void ocfs2_xattr_set_local(struct ocfs2_xattr_entry *xe, | ||
841 | int local) | ||
842 | { | ||
843 | if (local) | ||
844 | xe->xe_type |= OCFS2_XATTR_ENTRY_LOCAL; | ||
845 | else | ||
846 | xe->xe_type &= ~OCFS2_XATTR_ENTRY_LOCAL; | ||
847 | } | ||
848 | |||
849 | static inline int ocfs2_xattr_is_local(struct ocfs2_xattr_entry *xe) | ||
850 | { | ||
851 | return xe->xe_type & OCFS2_XATTR_ENTRY_LOCAL; | ||
852 | } | ||
853 | |||
854 | static inline void ocfs2_xattr_set_type(struct ocfs2_xattr_entry *xe, int type) | ||
855 | { | ||
856 | xe->xe_type |= type & OCFS2_XATTR_TYPE_MASK; | ||
857 | } | ||
858 | |||
859 | static inline int ocfs2_xattr_get_type(struct ocfs2_xattr_entry *xe) | ||
860 | { | ||
861 | return xe->xe_type & OCFS2_XATTR_TYPE_MASK; | ||
862 | } | ||
863 | |||
718 | #ifdef __KERNEL__ | 864 | #ifdef __KERNEL__ |
719 | static inline int ocfs2_fast_symlink_chars(struct super_block *sb) | 865 | static inline int ocfs2_fast_symlink_chars(struct super_block *sb) |
720 | { | 866 | { |
@@ -728,6 +874,20 @@ static inline int ocfs2_max_inline_data(struct super_block *sb) | |||
728 | offsetof(struct ocfs2_dinode, id2.i_data.id_data); | 874 | offsetof(struct ocfs2_dinode, id2.i_data.id_data); |
729 | } | 875 | } |
730 | 876 | ||
877 | static inline int ocfs2_max_inline_data_with_xattr(struct super_block *sb, | ||
878 | struct ocfs2_dinode *di) | ||
879 | { | ||
880 | unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size); | ||
881 | |||
882 | if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL) | ||
883 | return sb->s_blocksize - | ||
884 | offsetof(struct ocfs2_dinode, id2.i_data.id_data) - | ||
885 | xattrsize; | ||
886 | else | ||
887 | return sb->s_blocksize - | ||
888 | offsetof(struct ocfs2_dinode, id2.i_data.id_data); | ||
889 | } | ||
890 | |||
731 | static inline int ocfs2_extent_recs_per_inode(struct super_block *sb) | 891 | static inline int ocfs2_extent_recs_per_inode(struct super_block *sb) |
732 | { | 892 | { |
733 | int size; | 893 | int size; |
@@ -738,6 +898,24 @@ static inline int ocfs2_extent_recs_per_inode(struct super_block *sb) | |||
738 | return size / sizeof(struct ocfs2_extent_rec); | 898 | return size / sizeof(struct ocfs2_extent_rec); |
739 | } | 899 | } |
740 | 900 | ||
901 | static inline int ocfs2_extent_recs_per_inode_with_xattr( | ||
902 | struct super_block *sb, | ||
903 | struct ocfs2_dinode *di) | ||
904 | { | ||
905 | int size; | ||
906 | unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size); | ||
907 | |||
908 | if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL) | ||
909 | size = sb->s_blocksize - | ||
910 | offsetof(struct ocfs2_dinode, id2.i_list.l_recs) - | ||
911 | xattrsize; | ||
912 | else | ||
913 | size = sb->s_blocksize - | ||
914 | offsetof(struct ocfs2_dinode, id2.i_list.l_recs); | ||
915 | |||
916 | return size / sizeof(struct ocfs2_extent_rec); | ||
917 | } | ||
918 | |||
741 | static inline int ocfs2_chain_recs_per_inode(struct super_block *sb) | 919 | static inline int ocfs2_chain_recs_per_inode(struct super_block *sb) |
742 | { | 920 | { |
743 | int size; | 921 | int size; |
@@ -801,6 +979,17 @@ static inline u64 ocfs2_backup_super_blkno(struct super_block *sb, int index) | |||
801 | return 0; | 979 | return 0; |
802 | 980 | ||
803 | } | 981 | } |
982 | |||
983 | static inline u16 ocfs2_xattr_recs_per_xb(struct super_block *sb) | ||
984 | { | ||
985 | int size; | ||
986 | |||
987 | size = sb->s_blocksize - | ||
988 | offsetof(struct ocfs2_xattr_block, | ||
989 | xb_attrs.xb_root.xt_list.l_recs); | ||
990 | |||
991 | return size / sizeof(struct ocfs2_extent_rec); | ||
992 | } | ||
804 | #else | 993 | #else |
805 | static inline int ocfs2_fast_symlink_chars(int blocksize) | 994 | static inline int ocfs2_fast_symlink_chars(int blocksize) |
806 | { | 995 | { |
@@ -884,6 +1073,17 @@ static inline uint64_t ocfs2_backup_super_blkno(int blocksize, int index) | |||
884 | 1073 | ||
885 | return 0; | 1074 | return 0; |
886 | } | 1075 | } |
1076 | |||
1077 | static inline int ocfs2_xattr_recs_per_xb(int blocksize) | ||
1078 | { | ||
1079 | int size; | ||
1080 | |||
1081 | size = blocksize - | ||
1082 | offsetof(struct ocfs2_xattr_block, | ||
1083 | xb_attrs.xb_root.xt_list.l_recs); | ||
1084 | |||
1085 | return size / sizeof(struct ocfs2_extent_rec); | ||
1086 | } | ||
887 | #endif /* __KERNEL__ */ | 1087 | #endif /* __KERNEL__ */ |
888 | 1088 | ||
889 | 1089 | ||
diff --git a/fs/ocfs2/ocfs2_jbd_compat.h b/fs/ocfs2/ocfs2_jbd_compat.h new file mode 100644 index 000000000000..b91c78f8f558 --- /dev/null +++ b/fs/ocfs2/ocfs2_jbd_compat.h | |||
@@ -0,0 +1,82 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * ocfs2_jbd_compat.h | ||
5 | * | ||
6 | * Compatibility defines for JBD. | ||
7 | * | ||
8 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License version 2 as published by the Free Software Foundation. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
17 | * General Public License for more details. | ||
18 | */ | ||
19 | |||
20 | #ifndef OCFS2_JBD_COMPAT_H | ||
21 | #define OCFS2_JBD_COMPAT_H | ||
22 | |||
23 | #ifndef CONFIG_OCFS2_COMPAT_JBD | ||
24 | # error Should not have been included | ||
25 | #endif | ||
26 | |||
27 | struct jbd2_inode { | ||
28 | unsigned int dummy; | ||
29 | }; | ||
30 | |||
31 | #define JBD2_BARRIER JFS_BARRIER | ||
32 | #define JBD2_DEFAULT_MAX_COMMIT_AGE JBD_DEFAULT_MAX_COMMIT_AGE | ||
33 | |||
34 | #define jbd2_journal_ack_err journal_ack_err | ||
35 | #define jbd2_journal_clear_err journal_clear_err | ||
36 | #define jbd2_journal_destroy journal_destroy | ||
37 | #define jbd2_journal_dirty_metadata journal_dirty_metadata | ||
38 | #define jbd2_journal_errno journal_errno | ||
39 | #define jbd2_journal_extend journal_extend | ||
40 | #define jbd2_journal_flush journal_flush | ||
41 | #define jbd2_journal_force_commit journal_force_commit | ||
42 | #define jbd2_journal_get_write_access journal_get_write_access | ||
43 | #define jbd2_journal_get_undo_access journal_get_undo_access | ||
44 | #define jbd2_journal_init_inode journal_init_inode | ||
45 | #define jbd2_journal_invalidatepage journal_invalidatepage | ||
46 | #define jbd2_journal_load journal_load | ||
47 | #define jbd2_journal_lock_updates journal_lock_updates | ||
48 | #define jbd2_journal_restart journal_restart | ||
49 | #define jbd2_journal_start journal_start | ||
50 | #define jbd2_journal_start_commit journal_start_commit | ||
51 | #define jbd2_journal_stop journal_stop | ||
52 | #define jbd2_journal_try_to_free_buffers journal_try_to_free_buffers | ||
53 | #define jbd2_journal_unlock_updates journal_unlock_updates | ||
54 | #define jbd2_journal_wipe journal_wipe | ||
55 | #define jbd2_log_wait_commit log_wait_commit | ||
56 | |||
57 | static inline int jbd2_journal_file_inode(handle_t *handle, | ||
58 | struct jbd2_inode *inode) | ||
59 | { | ||
60 | return 0; | ||
61 | } | ||
62 | |||
63 | static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, | ||
64 | loff_t new_size) | ||
65 | { | ||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | static inline void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, | ||
70 | struct inode *inode) | ||
71 | { | ||
72 | return; | ||
73 | } | ||
74 | |||
75 | static inline void jbd2_journal_release_jbd_inode(journal_t *journal, | ||
76 | struct jbd2_inode *jinode) | ||
77 | { | ||
78 | return; | ||
79 | } | ||
80 | |||
81 | |||
82 | #endif /* OCFS2_JBD_COMPAT_H */ | ||
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index 8166968e9015..ffd48db229a7 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c | |||
@@ -200,7 +200,7 @@ static int update_backups(struct inode * inode, u32 clusters, char *data) | |||
200 | if (cluster > clusters) | 200 | if (cluster > clusters) |
201 | break; | 201 | break; |
202 | 202 | ||
203 | ret = ocfs2_read_block(osb, blkno, &backup, 0, NULL); | 203 | ret = ocfs2_read_blocks_sync(osb, blkno, 1, &backup); |
204 | if (ret < 0) { | 204 | if (ret < 0) { |
205 | mlog_errno(ret); | 205 | mlog_errno(ret); |
206 | break; | 206 | break; |
@@ -236,8 +236,8 @@ static void ocfs2_update_super_and_backups(struct inode *inode, | |||
236 | * update the superblock last. | 236 | * update the superblock last. |
237 | * It doesn't matter if the write failed. | 237 | * It doesn't matter if the write failed. |
238 | */ | 238 | */ |
239 | ret = ocfs2_read_block(osb, OCFS2_SUPER_BLOCK_BLKNO, | 239 | ret = ocfs2_read_blocks_sync(osb, OCFS2_SUPER_BLOCK_BLKNO, 1, |
240 | &super_bh, 0, NULL); | 240 | &super_bh); |
241 | if (ret < 0) { | 241 | if (ret < 0) { |
242 | mlog_errno(ret); | 242 | mlog_errno(ret); |
243 | goto out; | 243 | goto out; |
@@ -332,8 +332,7 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters) | |||
332 | lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, | 332 | lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, |
333 | first_new_cluster - 1); | 333 | first_new_cluster - 1); |
334 | 334 | ||
335 | ret = ocfs2_read_block(osb, lgd_blkno, &group_bh, OCFS2_BH_CACHED, | 335 | ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh); |
336 | main_bm_inode); | ||
337 | if (ret < 0) { | 336 | if (ret < 0) { |
338 | mlog_errno(ret); | 337 | mlog_errno(ret); |
339 | goto out_unlock; | 338 | goto out_unlock; |
@@ -540,7 +539,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) | |||
540 | goto out_unlock; | 539 | goto out_unlock; |
541 | } | 540 | } |
542 | 541 | ||
543 | ret = ocfs2_read_block(osb, input->group, &group_bh, 0, NULL); | 542 | ret = ocfs2_read_blocks_sync(osb, input->group, 1, &group_bh); |
544 | if (ret < 0) { | 543 | if (ret < 0) { |
545 | mlog(ML_ERROR, "Can't read the group descriptor # %llu " | 544 | mlog(ML_ERROR, "Can't read the group descriptor # %llu " |
546 | "from the device.", (unsigned long long)input->group); | 545 | "from the device.", (unsigned long long)input->group); |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index bb5ff8939bf1..bdda2d8f8508 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
@@ -150,8 +150,8 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb) | |||
150 | * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If | 150 | * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If |
151 | * this is not true, the read of -1 (UINT64_MAX) will fail. | 151 | * this is not true, the read of -1 (UINT64_MAX) will fail. |
152 | */ | 152 | */ |
153 | ret = ocfs2_read_blocks(osb, -1, si->si_blocks, si->si_bh, 0, | 153 | ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh, |
154 | si->si_inode); | 154 | OCFS2_BH_IGNORE_CACHE); |
155 | if (ret == 0) { | 155 | if (ret == 0) { |
156 | spin_lock(&osb->osb_lock); | 156 | spin_lock(&osb->osb_lock); |
157 | ocfs2_update_slot_info(si); | 157 | ocfs2_update_slot_info(si); |
@@ -404,7 +404,8 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, | |||
404 | (unsigned long long)blkno); | 404 | (unsigned long long)blkno); |
405 | 405 | ||
406 | bh = NULL; /* Acquire a fresh bh */ | 406 | bh = NULL; /* Acquire a fresh bh */ |
407 | status = ocfs2_read_block(osb, blkno, &bh, 0, si->si_inode); | 407 | status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh, |
408 | OCFS2_BH_IGNORE_CACHE); | ||
408 | if (status < 0) { | 409 | if (status < 0) { |
409 | mlog_errno(status); | 410 | mlog_errno(status); |
410 | goto bail; | 411 | goto bail; |
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 353fc35c6748..faec2d879357 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include "ocfs2.h" /* For struct ocfs2_lock_res */ | 28 | #include "ocfs2.h" /* For struct ocfs2_lock_res */ |
29 | #include "stackglue.h" | 29 | #include "stackglue.h" |
30 | 30 | ||
31 | #include <linux/dlm_plock.h> | ||
31 | 32 | ||
32 | /* | 33 | /* |
33 | * The control protocol starts with a handshake. Until the handshake | 34 | * The control protocol starts with a handshake. Until the handshake |
@@ -746,6 +747,37 @@ static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) | |||
746 | { | 747 | { |
747 | } | 748 | } |
748 | 749 | ||
750 | static int user_plock(struct ocfs2_cluster_connection *conn, | ||
751 | u64 ino, | ||
752 | struct file *file, | ||
753 | int cmd, | ||
754 | struct file_lock *fl) | ||
755 | { | ||
756 | /* | ||
757 | * This more or less just demuxes the plock request into any | ||
758 | * one of three dlm calls. | ||
759 | * | ||
760 | * Internally, fs/dlm will pass these to a misc device, which | ||
761 | * a userspace daemon will read and write to. | ||
762 | * | ||
763 | * For now, cancel requests (which happen internally only), | ||
764 | * are turned into unlocks. Most of this function taken from | ||
765 | * gfs2_lock. | ||
766 | */ | ||
767 | |||
768 | if (cmd == F_CANCELLK) { | ||
769 | cmd = F_SETLK; | ||
770 | fl->fl_type = F_UNLCK; | ||
771 | } | ||
772 | |||
773 | if (IS_GETLK(cmd)) | ||
774 | return dlm_posix_get(conn->cc_lockspace, ino, file, fl); | ||
775 | else if (fl->fl_type == F_UNLCK) | ||
776 | return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl); | ||
777 | else | ||
778 | return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl); | ||
779 | } | ||
780 | |||
749 | /* | 781 | /* |
750 | * Compare a requested locking protocol version against the current one. | 782 | * Compare a requested locking protocol version against the current one. |
751 | * | 783 | * |
@@ -839,6 +871,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = { | |||
839 | .dlm_unlock = user_dlm_unlock, | 871 | .dlm_unlock = user_dlm_unlock, |
840 | .lock_status = user_dlm_lock_status, | 872 | .lock_status = user_dlm_lock_status, |
841 | .lock_lvb = user_dlm_lvb, | 873 | .lock_lvb = user_dlm_lvb, |
874 | .plock = user_plock, | ||
842 | .dump_lksb = user_dlm_dump_lksb, | 875 | .dump_lksb = user_dlm_dump_lksb, |
843 | }; | 876 | }; |
844 | 877 | ||
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 10e149ae5e3a..68b668b0e60a 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c | |||
@@ -97,13 +97,14 @@ static int ocfs2_stack_driver_request(const char *stack_name, | |||
97 | goto out; | 97 | goto out; |
98 | } | 98 | } |
99 | 99 | ||
100 | /* Ok, the stack is pinned */ | ||
101 | p->sp_count++; | ||
102 | active_stack = p; | 100 | active_stack = p; |
103 | |||
104 | rc = 0; | 101 | rc = 0; |
105 | 102 | ||
106 | out: | 103 | out: |
104 | /* If we found it, pin it */ | ||
105 | if (!rc) | ||
106 | active_stack->sp_count++; | ||
107 | |||
107 | spin_unlock(&ocfs2_stack_lock); | 108 | spin_unlock(&ocfs2_stack_lock); |
108 | return rc; | 109 | return rc; |
109 | } | 110 | } |
@@ -287,6 +288,26 @@ void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) | |||
287 | } | 288 | } |
288 | EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); | 289 | EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); |
289 | 290 | ||
291 | int ocfs2_stack_supports_plocks(void) | ||
292 | { | ||
293 | return active_stack && active_stack->sp_ops->plock; | ||
294 | } | ||
295 | EXPORT_SYMBOL_GPL(ocfs2_stack_supports_plocks); | ||
296 | |||
297 | /* | ||
298 | * ocfs2_plock() can only be safely called if | ||
299 | * ocfs2_stack_supports_plocks() returned true | ||
300 | */ | ||
301 | int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino, | ||
302 | struct file *file, int cmd, struct file_lock *fl) | ||
303 | { | ||
304 | WARN_ON_ONCE(active_stack->sp_ops->plock == NULL); | ||
305 | if (active_stack->sp_ops->plock) | ||
306 | return active_stack->sp_ops->plock(conn, ino, file, cmd, fl); | ||
307 | return -EOPNOTSUPP; | ||
308 | } | ||
309 | EXPORT_SYMBOL_GPL(ocfs2_plock); | ||
310 | |||
290 | int ocfs2_cluster_connect(const char *stack_name, | 311 | int ocfs2_cluster_connect(const char *stack_name, |
291 | const char *group, | 312 | const char *group, |
292 | int grouplen, | 313 | int grouplen, |
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index db56281dd1be..c571af375ef8 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h | |||
@@ -28,6 +28,10 @@ | |||
28 | #include "dlm/dlmapi.h" | 28 | #include "dlm/dlmapi.h" |
29 | #include <linux/dlm.h> | 29 | #include <linux/dlm.h> |
30 | 30 | ||
31 | /* Needed for plock-related prototypes */ | ||
32 | struct file; | ||
33 | struct file_lock; | ||
34 | |||
31 | /* | 35 | /* |
32 | * dlmconstants.h does not have a LOCAL flag. We hope to remove it | 36 | * dlmconstants.h does not have a LOCAL flag. We hope to remove it |
33 | * some day, but right now we need it. Let's fake it. This value is larger | 37 | * some day, but right now we need it. Let's fake it. This value is larger |
@@ -187,6 +191,17 @@ struct ocfs2_stack_operations { | |||
187 | void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); | 191 | void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); |
188 | 192 | ||
189 | /* | 193 | /* |
194 | * Cluster-aware posix locks | ||
195 | * | ||
196 | * This is NULL for stacks which do not support posix locks. | ||
197 | */ | ||
198 | int (*plock)(struct ocfs2_cluster_connection *conn, | ||
199 | u64 ino, | ||
200 | struct file *file, | ||
201 | int cmd, | ||
202 | struct file_lock *fl); | ||
203 | |||
204 | /* | ||
190 | * This is an optoinal debugging hook. If provided, the | 205 | * This is an optoinal debugging hook. If provided, the |
191 | * stack can dump debugging information about this lock. | 206 | * stack can dump debugging information about this lock. |
192 | */ | 207 | */ |
@@ -240,6 +255,10 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb); | |||
240 | void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); | 255 | void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); |
241 | void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); | 256 | void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); |
242 | 257 | ||
258 | int ocfs2_stack_supports_plocks(void); | ||
259 | int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino, | ||
260 | struct file *file, int cmd, struct file_lock *fl); | ||
261 | |||
243 | void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto); | 262 | void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto); |
244 | 263 | ||
245 | 264 | ||
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index d2d278fb9819..c5ff18b46b57 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -62,15 +62,18 @@ static int ocfs2_block_group_fill(handle_t *handle, | |||
62 | struct ocfs2_chain_list *cl); | 62 | struct ocfs2_chain_list *cl); |
63 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | 63 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, |
64 | struct inode *alloc_inode, | 64 | struct inode *alloc_inode, |
65 | struct buffer_head *bh); | 65 | struct buffer_head *bh, |
66 | u64 max_block); | ||
66 | 67 | ||
67 | static int ocfs2_cluster_group_search(struct inode *inode, | 68 | static int ocfs2_cluster_group_search(struct inode *inode, |
68 | struct buffer_head *group_bh, | 69 | struct buffer_head *group_bh, |
69 | u32 bits_wanted, u32 min_bits, | 70 | u32 bits_wanted, u32 min_bits, |
71 | u64 max_block, | ||
70 | u16 *bit_off, u16 *bits_found); | 72 | u16 *bit_off, u16 *bits_found); |
71 | static int ocfs2_block_group_search(struct inode *inode, | 73 | static int ocfs2_block_group_search(struct inode *inode, |
72 | struct buffer_head *group_bh, | 74 | struct buffer_head *group_bh, |
73 | u32 bits_wanted, u32 min_bits, | 75 | u32 bits_wanted, u32 min_bits, |
76 | u64 max_block, | ||
74 | u16 *bit_off, u16 *bits_found); | 77 | u16 *bit_off, u16 *bits_found); |
75 | static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, | 78 | static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, |
76 | struct ocfs2_alloc_context *ac, | 79 | struct ocfs2_alloc_context *ac, |
@@ -110,8 +113,11 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode, | |||
110 | u64 data_blkno, | 113 | u64 data_blkno, |
111 | u64 *bg_blkno, | 114 | u64 *bg_blkno, |
112 | u16 *bg_bit_off); | 115 | u16 *bg_bit_off); |
116 | static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, | ||
117 | u32 bits_wanted, u64 max_block, | ||
118 | struct ocfs2_alloc_context **ac); | ||
113 | 119 | ||
114 | static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) | 120 | void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) |
115 | { | 121 | { |
116 | struct inode *inode = ac->ac_inode; | 122 | struct inode *inode = ac->ac_inode; |
117 | 123 | ||
@@ -124,10 +130,8 @@ static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) | |||
124 | iput(inode); | 130 | iput(inode); |
125 | ac->ac_inode = NULL; | 131 | ac->ac_inode = NULL; |
126 | } | 132 | } |
127 | if (ac->ac_bh) { | 133 | brelse(ac->ac_bh); |
128 | brelse(ac->ac_bh); | 134 | ac->ac_bh = NULL; |
129 | ac->ac_bh = NULL; | ||
130 | } | ||
131 | } | 135 | } |
132 | 136 | ||
133 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | 137 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) |
@@ -276,7 +280,8 @@ static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl) | |||
276 | */ | 280 | */ |
277 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | 281 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, |
278 | struct inode *alloc_inode, | 282 | struct inode *alloc_inode, |
279 | struct buffer_head *bh) | 283 | struct buffer_head *bh, |
284 | u64 max_block) | ||
280 | { | 285 | { |
281 | int status, credits; | 286 | int status, credits; |
282 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; | 287 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; |
@@ -294,9 +299,9 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |||
294 | mlog_entry_void(); | 299 | mlog_entry_void(); |
295 | 300 | ||
296 | cl = &fe->id2.i_chain; | 301 | cl = &fe->id2.i_chain; |
297 | status = ocfs2_reserve_clusters(osb, | 302 | status = ocfs2_reserve_clusters_with_limit(osb, |
298 | le16_to_cpu(cl->cl_cpg), | 303 | le16_to_cpu(cl->cl_cpg), |
299 | &ac); | 304 | max_block, &ac); |
300 | if (status < 0) { | 305 | if (status < 0) { |
301 | if (status != -ENOSPC) | 306 | if (status != -ENOSPC) |
302 | mlog_errno(status); | 307 | mlog_errno(status); |
@@ -394,8 +399,7 @@ bail: | |||
394 | if (ac) | 399 | if (ac) |
395 | ocfs2_free_alloc_context(ac); | 400 | ocfs2_free_alloc_context(ac); |
396 | 401 | ||
397 | if (bg_bh) | 402 | brelse(bg_bh); |
398 | brelse(bg_bh); | ||
399 | 403 | ||
400 | mlog_exit(status); | 404 | mlog_exit(status); |
401 | return status; | 405 | return status; |
@@ -469,7 +473,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
469 | goto bail; | 473 | goto bail; |
470 | } | 474 | } |
471 | 475 | ||
472 | status = ocfs2_block_group_alloc(osb, alloc_inode, bh); | 476 | status = ocfs2_block_group_alloc(osb, alloc_inode, bh, |
477 | ac->ac_max_block); | ||
473 | if (status < 0) { | 478 | if (status < 0) { |
474 | if (status != -ENOSPC) | 479 | if (status != -ENOSPC) |
475 | mlog_errno(status); | 480 | mlog_errno(status); |
@@ -486,16 +491,15 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
486 | get_bh(bh); | 491 | get_bh(bh); |
487 | ac->ac_bh = bh; | 492 | ac->ac_bh = bh; |
488 | bail: | 493 | bail: |
489 | if (bh) | 494 | brelse(bh); |
490 | brelse(bh); | ||
491 | 495 | ||
492 | mlog_exit(status); | 496 | mlog_exit(status); |
493 | return status; | 497 | return status; |
494 | } | 498 | } |
495 | 499 | ||
496 | int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | 500 | int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb, |
497 | struct ocfs2_dinode *fe, | 501 | int blocks, |
498 | struct ocfs2_alloc_context **ac) | 502 | struct ocfs2_alloc_context **ac) |
499 | { | 503 | { |
500 | int status; | 504 | int status; |
501 | u32 slot; | 505 | u32 slot; |
@@ -507,7 +511,7 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | |||
507 | goto bail; | 511 | goto bail; |
508 | } | 512 | } |
509 | 513 | ||
510 | (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); | 514 | (*ac)->ac_bits_wanted = blocks; |
511 | (*ac)->ac_which = OCFS2_AC_USE_META; | 515 | (*ac)->ac_which = OCFS2_AC_USE_META; |
512 | slot = osb->slot_num; | 516 | slot = osb->slot_num; |
513 | (*ac)->ac_group_search = ocfs2_block_group_search; | 517 | (*ac)->ac_group_search = ocfs2_block_group_search; |
@@ -532,6 +536,15 @@ bail: | |||
532 | return status; | 536 | return status; |
533 | } | 537 | } |
534 | 538 | ||
539 | int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | ||
540 | struct ocfs2_extent_list *root_el, | ||
541 | struct ocfs2_alloc_context **ac) | ||
542 | { | ||
543 | return ocfs2_reserve_new_metadata_blocks(osb, | ||
544 | ocfs2_extend_meta_needed(root_el), | ||
545 | ac); | ||
546 | } | ||
547 | |||
535 | static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, | 548 | static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, |
536 | struct ocfs2_alloc_context *ac) | 549 | struct ocfs2_alloc_context *ac) |
537 | { | 550 | { |
@@ -582,6 +595,14 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | |||
582 | (*ac)->ac_group_search = ocfs2_block_group_search; | 595 | (*ac)->ac_group_search = ocfs2_block_group_search; |
583 | 596 | ||
584 | /* | 597 | /* |
598 | * stat(2) can't handle i_ino > 32bits, so we tell the | ||
599 | * lower levels not to allocate us a block group past that | ||
600 | * limit. The 'inode64' mount option avoids this behavior. | ||
601 | */ | ||
602 | if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64)) | ||
603 | (*ac)->ac_max_block = (u32)~0U; | ||
604 | |||
605 | /* | ||
585 | * slot is set when we successfully steal inode from other nodes. | 606 | * slot is set when we successfully steal inode from other nodes. |
586 | * It is reset in 3 places: | 607 | * It is reset in 3 places: |
587 | * 1. when we flush the truncate log | 608 | * 1. when we flush the truncate log |
@@ -661,9 +682,9 @@ bail: | |||
661 | /* Callers don't need to care which bitmap (local alloc or main) to | 682 | /* Callers don't need to care which bitmap (local alloc or main) to |
662 | * use so we figure it out for them, but unfortunately this clutters | 683 | * use so we figure it out for them, but unfortunately this clutters |
663 | * things a bit. */ | 684 | * things a bit. */ |
664 | int ocfs2_reserve_clusters(struct ocfs2_super *osb, | 685 | static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, |
665 | u32 bits_wanted, | 686 | u32 bits_wanted, u64 max_block, |
666 | struct ocfs2_alloc_context **ac) | 687 | struct ocfs2_alloc_context **ac) |
667 | { | 688 | { |
668 | int status; | 689 | int status; |
669 | 690 | ||
@@ -677,24 +698,20 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb, | |||
677 | } | 698 | } |
678 | 699 | ||
679 | (*ac)->ac_bits_wanted = bits_wanted; | 700 | (*ac)->ac_bits_wanted = bits_wanted; |
701 | (*ac)->ac_max_block = max_block; | ||
680 | 702 | ||
681 | status = -ENOSPC; | 703 | status = -ENOSPC; |
682 | if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { | 704 | if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { |
683 | status = ocfs2_reserve_local_alloc_bits(osb, | 705 | status = ocfs2_reserve_local_alloc_bits(osb, |
684 | bits_wanted, | 706 | bits_wanted, |
685 | *ac); | 707 | *ac); |
686 | if ((status < 0) && (status != -ENOSPC)) { | 708 | if (status == -EFBIG) { |
709 | /* The local alloc window is outside ac_max_block. | ||
710 | * use the main bitmap. */ | ||
711 | status = -ENOSPC; | ||
712 | } else if ((status < 0) && (status != -ENOSPC)) { | ||
687 | mlog_errno(status); | 713 | mlog_errno(status); |
688 | goto bail; | 714 | goto bail; |
689 | } else if (status == -ENOSPC) { | ||
690 | /* reserve_local_bits will return enospc with | ||
691 | * the local alloc inode still locked, so we | ||
692 | * can change this safely here. */ | ||
693 | mlog(0, "Disabling local alloc\n"); | ||
694 | /* We set to OCFS2_LA_DISABLED so that umount | ||
695 | * can clean up what's left of the local | ||
696 | * allocation */ | ||
697 | osb->local_alloc_state = OCFS2_LA_DISABLED; | ||
698 | } | 715 | } |
699 | } | 716 | } |
700 | 717 | ||
@@ -718,6 +735,13 @@ bail: | |||
718 | return status; | 735 | return status; |
719 | } | 736 | } |
720 | 737 | ||
738 | int ocfs2_reserve_clusters(struct ocfs2_super *osb, | ||
739 | u32 bits_wanted, | ||
740 | struct ocfs2_alloc_context **ac) | ||
741 | { | ||
742 | return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac); | ||
743 | } | ||
744 | |||
721 | /* | 745 | /* |
722 | * More or less lifted from ext3. I'll leave their description below: | 746 | * More or less lifted from ext3. I'll leave their description below: |
723 | * | 747 | * |
@@ -1000,11 +1024,14 @@ static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg | |||
1000 | static int ocfs2_cluster_group_search(struct inode *inode, | 1024 | static int ocfs2_cluster_group_search(struct inode *inode, |
1001 | struct buffer_head *group_bh, | 1025 | struct buffer_head *group_bh, |
1002 | u32 bits_wanted, u32 min_bits, | 1026 | u32 bits_wanted, u32 min_bits, |
1027 | u64 max_block, | ||
1003 | u16 *bit_off, u16 *bits_found) | 1028 | u16 *bit_off, u16 *bits_found) |
1004 | { | 1029 | { |
1005 | int search = -ENOSPC; | 1030 | int search = -ENOSPC; |
1006 | int ret; | 1031 | int ret; |
1032 | u64 blkoff; | ||
1007 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; | 1033 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; |
1034 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1008 | u16 tmp_off, tmp_found; | 1035 | u16 tmp_off, tmp_found; |
1009 | unsigned int max_bits, gd_cluster_off; | 1036 | unsigned int max_bits, gd_cluster_off; |
1010 | 1037 | ||
@@ -1037,6 +1064,17 @@ static int ocfs2_cluster_group_search(struct inode *inode, | |||
1037 | if (ret) | 1064 | if (ret) |
1038 | return ret; | 1065 | return ret; |
1039 | 1066 | ||
1067 | if (max_block) { | ||
1068 | blkoff = ocfs2_clusters_to_blocks(inode->i_sb, | ||
1069 | gd_cluster_off + | ||
1070 | tmp_off + tmp_found); | ||
1071 | mlog(0, "Checking %llu against %llu\n", | ||
1072 | (unsigned long long)blkoff, | ||
1073 | (unsigned long long)max_block); | ||
1074 | if (blkoff > max_block) | ||
1075 | return -ENOSPC; | ||
1076 | } | ||
1077 | |||
1040 | /* ocfs2_block_group_find_clear_bits() might | 1078 | /* ocfs2_block_group_find_clear_bits() might |
1041 | * return success, but we still want to return | 1079 | * return success, but we still want to return |
1042 | * -ENOSPC unless it found the minimum number | 1080 | * -ENOSPC unless it found the minimum number |
@@ -1045,6 +1083,12 @@ static int ocfs2_cluster_group_search(struct inode *inode, | |||
1045 | *bit_off = tmp_off; | 1083 | *bit_off = tmp_off; |
1046 | *bits_found = tmp_found; | 1084 | *bits_found = tmp_found; |
1047 | search = 0; /* success */ | 1085 | search = 0; /* success */ |
1086 | } else if (tmp_found) { | ||
1087 | /* | ||
1088 | * Don't show bits which we'll be returning | ||
1089 | * for allocation to the local alloc bitmap. | ||
1090 | */ | ||
1091 | ocfs2_local_alloc_seen_free_bits(osb, tmp_found); | ||
1048 | } | 1092 | } |
1049 | } | 1093 | } |
1050 | 1094 | ||
@@ -1054,19 +1098,31 @@ static int ocfs2_cluster_group_search(struct inode *inode, | |||
1054 | static int ocfs2_block_group_search(struct inode *inode, | 1098 | static int ocfs2_block_group_search(struct inode *inode, |
1055 | struct buffer_head *group_bh, | 1099 | struct buffer_head *group_bh, |
1056 | u32 bits_wanted, u32 min_bits, | 1100 | u32 bits_wanted, u32 min_bits, |
1101 | u64 max_block, | ||
1057 | u16 *bit_off, u16 *bits_found) | 1102 | u16 *bit_off, u16 *bits_found) |
1058 | { | 1103 | { |
1059 | int ret = -ENOSPC; | 1104 | int ret = -ENOSPC; |
1105 | u64 blkoff; | ||
1060 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; | 1106 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; |
1061 | 1107 | ||
1062 | BUG_ON(min_bits != 1); | 1108 | BUG_ON(min_bits != 1); |
1063 | BUG_ON(ocfs2_is_cluster_bitmap(inode)); | 1109 | BUG_ON(ocfs2_is_cluster_bitmap(inode)); |
1064 | 1110 | ||
1065 | if (bg->bg_free_bits_count) | 1111 | if (bg->bg_free_bits_count) { |
1066 | ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), | 1112 | ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), |
1067 | group_bh, bits_wanted, | 1113 | group_bh, bits_wanted, |
1068 | le16_to_cpu(bg->bg_bits), | 1114 | le16_to_cpu(bg->bg_bits), |
1069 | bit_off, bits_found); | 1115 | bit_off, bits_found); |
1116 | if (!ret && max_block) { | ||
1117 | blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off + | ||
1118 | *bits_found; | ||
1119 | mlog(0, "Checking %llu against %llu\n", | ||
1120 | (unsigned long long)blkoff, | ||
1121 | (unsigned long long)max_block); | ||
1122 | if (blkoff > max_block) | ||
1123 | ret = -ENOSPC; | ||
1124 | } | ||
1125 | } | ||
1070 | 1126 | ||
1071 | return ret; | 1127 | return ret; |
1072 | } | 1128 | } |
@@ -1116,8 +1172,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, | |||
1116 | struct ocfs2_group_desc *gd; | 1172 | struct ocfs2_group_desc *gd; |
1117 | struct inode *alloc_inode = ac->ac_inode; | 1173 | struct inode *alloc_inode = ac->ac_inode; |
1118 | 1174 | ||
1119 | ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno, | 1175 | ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh); |
1120 | &group_bh, OCFS2_BH_CACHED, alloc_inode); | ||
1121 | if (ret < 0) { | 1176 | if (ret < 0) { |
1122 | mlog_errno(ret); | 1177 | mlog_errno(ret); |
1123 | return ret; | 1178 | return ret; |
@@ -1131,7 +1186,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, | |||
1131 | } | 1186 | } |
1132 | 1187 | ||
1133 | ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, | 1188 | ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, |
1134 | bit_off, &found); | 1189 | ac->ac_max_block, bit_off, &found); |
1135 | if (ret < 0) { | 1190 | if (ret < 0) { |
1136 | if (ret != -ENOSPC) | 1191 | if (ret != -ENOSPC) |
1137 | mlog_errno(ret); | 1192 | mlog_errno(ret); |
@@ -1186,9 +1241,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1186 | bits_wanted, chain, | 1241 | bits_wanted, chain, |
1187 | (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); | 1242 | (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); |
1188 | 1243 | ||
1189 | status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), | 1244 | status = ocfs2_read_block(alloc_inode, |
1190 | le64_to_cpu(cl->cl_recs[chain].c_blkno), | 1245 | le64_to_cpu(cl->cl_recs[chain].c_blkno), |
1191 | &group_bh, OCFS2_BH_CACHED, alloc_inode); | 1246 | &group_bh); |
1192 | if (status < 0) { | 1247 | if (status < 0) { |
1193 | mlog_errno(status); | 1248 | mlog_errno(status); |
1194 | goto bail; | 1249 | goto bail; |
@@ -1204,21 +1259,20 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1204 | /* for now, the chain search is a bit simplistic. We just use | 1259 | /* for now, the chain search is a bit simplistic. We just use |
1205 | * the 1st group with any empty bits. */ | 1260 | * the 1st group with any empty bits. */ |
1206 | while ((status = ac->ac_group_search(alloc_inode, group_bh, | 1261 | while ((status = ac->ac_group_search(alloc_inode, group_bh, |
1207 | bits_wanted, min_bits, bit_off, | 1262 | bits_wanted, min_bits, |
1263 | ac->ac_max_block, bit_off, | ||
1208 | &tmp_bits)) == -ENOSPC) { | 1264 | &tmp_bits)) == -ENOSPC) { |
1209 | if (!bg->bg_next_group) | 1265 | if (!bg->bg_next_group) |
1210 | break; | 1266 | break; |
1211 | 1267 | ||
1212 | if (prev_group_bh) { | 1268 | brelse(prev_group_bh); |
1213 | brelse(prev_group_bh); | 1269 | prev_group_bh = NULL; |
1214 | prev_group_bh = NULL; | 1270 | |
1215 | } | ||
1216 | next_group = le64_to_cpu(bg->bg_next_group); | 1271 | next_group = le64_to_cpu(bg->bg_next_group); |
1217 | prev_group_bh = group_bh; | 1272 | prev_group_bh = group_bh; |
1218 | group_bh = NULL; | 1273 | group_bh = NULL; |
1219 | status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), | 1274 | status = ocfs2_read_block(alloc_inode, |
1220 | next_group, &group_bh, | 1275 | next_group, &group_bh); |
1221 | OCFS2_BH_CACHED, alloc_inode); | ||
1222 | if (status < 0) { | 1276 | if (status < 0) { |
1223 | mlog_errno(status); | 1277 | mlog_errno(status); |
1224 | goto bail; | 1278 | goto bail; |
@@ -1307,10 +1361,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1307 | *bg_blkno = le64_to_cpu(bg->bg_blkno); | 1361 | *bg_blkno = le64_to_cpu(bg->bg_blkno); |
1308 | *bits_left = le16_to_cpu(bg->bg_free_bits_count); | 1362 | *bits_left = le16_to_cpu(bg->bg_free_bits_count); |
1309 | bail: | 1363 | bail: |
1310 | if (group_bh) | 1364 | brelse(group_bh); |
1311 | brelse(group_bh); | 1365 | brelse(prev_group_bh); |
1312 | if (prev_group_bh) | ||
1313 | brelse(prev_group_bh); | ||
1314 | 1366 | ||
1315 | mlog_exit(status); | 1367 | mlog_exit(status); |
1316 | return status; | 1368 | return status; |
@@ -1723,7 +1775,6 @@ int ocfs2_free_suballoc_bits(handle_t *handle, | |||
1723 | { | 1775 | { |
1724 | int status = 0; | 1776 | int status = 0; |
1725 | u32 tmp_used; | 1777 | u32 tmp_used; |
1726 | struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); | ||
1727 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; | 1778 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; |
1728 | struct ocfs2_chain_list *cl = &fe->id2.i_chain; | 1779 | struct ocfs2_chain_list *cl = &fe->id2.i_chain; |
1729 | struct buffer_head *group_bh = NULL; | 1780 | struct buffer_head *group_bh = NULL; |
@@ -1742,8 +1793,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle, | |||
1742 | (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, | 1793 | (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, |
1743 | (unsigned long long)bg_blkno, start_bit); | 1794 | (unsigned long long)bg_blkno, start_bit); |
1744 | 1795 | ||
1745 | status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED, | 1796 | status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh); |
1746 | alloc_inode); | ||
1747 | if (status < 0) { | 1797 | if (status < 0) { |
1748 | mlog_errno(status); | 1798 | mlog_errno(status); |
1749 | goto bail; | 1799 | goto bail; |
@@ -1784,8 +1834,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle, | |||
1784 | } | 1834 | } |
1785 | 1835 | ||
1786 | bail: | 1836 | bail: |
1787 | if (group_bh) | 1837 | brelse(group_bh); |
1788 | brelse(group_bh); | ||
1789 | 1838 | ||
1790 | mlog_exit(status); | 1839 | mlog_exit(status); |
1791 | return status; | 1840 | return status; |
@@ -1838,9 +1887,15 @@ int ocfs2_free_clusters(handle_t *handle, | |||
1838 | status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, | 1887 | status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, |
1839 | bg_start_bit, bg_blkno, | 1888 | bg_start_bit, bg_blkno, |
1840 | num_clusters); | 1889 | num_clusters); |
1841 | if (status < 0) | 1890 | if (status < 0) { |
1842 | mlog_errno(status); | 1891 | mlog_errno(status); |
1892 | goto out; | ||
1893 | } | ||
1843 | 1894 | ||
1895 | ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb), | ||
1896 | num_clusters); | ||
1897 | |||
1898 | out: | ||
1844 | mlog_exit(status); | 1899 | mlog_exit(status); |
1845 | return status; | 1900 | return status; |
1846 | } | 1901 | } |
@@ -1891,3 +1946,84 @@ static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe) | |||
1891 | (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno); | 1946 | (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno); |
1892 | } | 1947 | } |
1893 | } | 1948 | } |
1949 | |||
1950 | /* | ||
1951 | * For a given allocation, determine which allocators will need to be | ||
1952 | * accessed, and lock them, reserving the appropriate number of bits. | ||
1953 | * | ||
1954 | * Sparse file systems call this from ocfs2_write_begin_nolock() | ||
1955 | * and ocfs2_allocate_unwritten_extents(). | ||
1956 | * | ||
1957 | * File systems which don't support holes call this from | ||
1958 | * ocfs2_extend_allocation(). | ||
1959 | */ | ||
1960 | int ocfs2_lock_allocators(struct inode *inode, | ||
1961 | struct ocfs2_extent_tree *et, | ||
1962 | u32 clusters_to_add, u32 extents_to_split, | ||
1963 | struct ocfs2_alloc_context **data_ac, | ||
1964 | struct ocfs2_alloc_context **meta_ac) | ||
1965 | { | ||
1966 | int ret = 0, num_free_extents; | ||
1967 | unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; | ||
1968 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1969 | |||
1970 | *meta_ac = NULL; | ||
1971 | if (data_ac) | ||
1972 | *data_ac = NULL; | ||
1973 | |||
1974 | BUG_ON(clusters_to_add != 0 && data_ac == NULL); | ||
1975 | |||
1976 | num_free_extents = ocfs2_num_free_extents(osb, inode, et); | ||
1977 | if (num_free_extents < 0) { | ||
1978 | ret = num_free_extents; | ||
1979 | mlog_errno(ret); | ||
1980 | goto out; | ||
1981 | } | ||
1982 | |||
1983 | /* | ||
1984 | * Sparse allocation file systems need to be more conservative | ||
1985 | * with reserving room for expansion - the actual allocation | ||
1986 | * happens while we've got a journal handle open so re-taking | ||
1987 | * a cluster lock (because we ran out of room for another | ||
1988 | * extent) will violate ordering rules. | ||
1989 | * | ||
1990 | * Most of the time we'll only be seeing this 1 cluster at a time | ||
1991 | * anyway. | ||
1992 | * | ||
1993 | * Always lock for any unwritten extents - we might want to | ||
1994 | * add blocks during a split. | ||
1995 | */ | ||
1996 | if (!num_free_extents || | ||
1997 | (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) { | ||
1998 | ret = ocfs2_reserve_new_metadata(osb, et->et_root_el, meta_ac); | ||
1999 | if (ret < 0) { | ||
2000 | if (ret != -ENOSPC) | ||
2001 | mlog_errno(ret); | ||
2002 | goto out; | ||
2003 | } | ||
2004 | } | ||
2005 | |||
2006 | if (clusters_to_add == 0) | ||
2007 | goto out; | ||
2008 | |||
2009 | ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); | ||
2010 | if (ret < 0) { | ||
2011 | if (ret != -ENOSPC) | ||
2012 | mlog_errno(ret); | ||
2013 | goto out; | ||
2014 | } | ||
2015 | |||
2016 | out: | ||
2017 | if (ret) { | ||
2018 | if (*meta_ac) { | ||
2019 | ocfs2_free_alloc_context(*meta_ac); | ||
2020 | *meta_ac = NULL; | ||
2021 | } | ||
2022 | |||
2023 | /* | ||
2024 | * We cannot have an error and a non null *data_ac. | ||
2025 | */ | ||
2026 | } | ||
2027 | |||
2028 | return ret; | ||
2029 | } | ||
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 544c600662bd..4df159d8f450 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
@@ -28,10 +28,11 @@ | |||
28 | 28 | ||
29 | typedef int (group_search_t)(struct inode *, | 29 | typedef int (group_search_t)(struct inode *, |
30 | struct buffer_head *, | 30 | struct buffer_head *, |
31 | u32, | 31 | u32, /* bits_wanted */ |
32 | u32, | 32 | u32, /* min_bits */ |
33 | u16 *, | 33 | u64, /* max_block */ |
34 | u16 *); | 34 | u16 *, /* *bit_off */ |
35 | u16 *); /* *bits_found */ | ||
35 | 36 | ||
36 | struct ocfs2_alloc_context { | 37 | struct ocfs2_alloc_context { |
37 | struct inode *ac_inode; /* which bitmap are we allocating from? */ | 38 | struct inode *ac_inode; /* which bitmap are we allocating from? */ |
@@ -51,6 +52,8 @@ struct ocfs2_alloc_context { | |||
51 | group_search_t *ac_group_search; | 52 | group_search_t *ac_group_search; |
52 | 53 | ||
53 | u64 ac_last_group; | 54 | u64 ac_last_group; |
55 | u64 ac_max_block; /* Highest block number to allocate. 0 is | ||
56 | is the same as ~0 - unlimited */ | ||
54 | }; | 57 | }; |
55 | 58 | ||
56 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); | 59 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); |
@@ -59,9 +62,17 @@ static inline int ocfs2_alloc_context_bits_left(struct ocfs2_alloc_context *ac) | |||
59 | return ac->ac_bits_wanted - ac->ac_bits_given; | 62 | return ac->ac_bits_wanted - ac->ac_bits_given; |
60 | } | 63 | } |
61 | 64 | ||
65 | /* | ||
66 | * Please note that the caller must make sure that root_el is the root | ||
67 | * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise | ||
68 | * the result may be wrong. | ||
69 | */ | ||
62 | int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | 70 | int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, |
63 | struct ocfs2_dinode *fe, | 71 | struct ocfs2_extent_list *root_el, |
64 | struct ocfs2_alloc_context **ac); | 72 | struct ocfs2_alloc_context **ac); |
73 | int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb, | ||
74 | int blocks, | ||
75 | struct ocfs2_alloc_context **ac); | ||
65 | int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | 76 | int ocfs2_reserve_new_inode(struct ocfs2_super *osb, |
66 | struct ocfs2_alloc_context **ac); | 77 | struct ocfs2_alloc_context **ac); |
67 | int ocfs2_reserve_clusters(struct ocfs2_super *osb, | 78 | int ocfs2_reserve_clusters(struct ocfs2_super *osb, |
@@ -147,6 +158,7 @@ static inline int ocfs2_is_cluster_bitmap(struct inode *inode) | |||
147 | * apis above. */ | 158 | * apis above. */ |
148 | int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, | 159 | int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, |
149 | struct ocfs2_alloc_context *ac); | 160 | struct ocfs2_alloc_context *ac); |
161 | void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac); | ||
150 | 162 | ||
151 | /* given a cluster offset, calculate which block group it belongs to | 163 | /* given a cluster offset, calculate which block group it belongs to |
152 | * and return that block offset. */ | 164 | * and return that block offset. */ |
@@ -156,4 +168,8 @@ u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster); | |||
156 | int ocfs2_check_group_descriptor(struct super_block *sb, | 168 | int ocfs2_check_group_descriptor(struct super_block *sb, |
157 | struct ocfs2_dinode *di, | 169 | struct ocfs2_dinode *di, |
158 | struct ocfs2_group_desc *gd); | 170 | struct ocfs2_group_desc *gd); |
171 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, | ||
172 | u32 clusters_to_add, u32 extents_to_split, | ||
173 | struct ocfs2_alloc_context **data_ac, | ||
174 | struct ocfs2_alloc_context **meta_ac); | ||
159 | #endif /* _CHAINALLOC_H_ */ | 175 | #endif /* _CHAINALLOC_H_ */ |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 88255d3f52b4..304b63ac78cf 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -64,6 +64,7 @@ | |||
64 | #include "sysfile.h" | 64 | #include "sysfile.h" |
65 | #include "uptodate.h" | 65 | #include "uptodate.h" |
66 | #include "ver.h" | 66 | #include "ver.h" |
67 | #include "xattr.h" | ||
67 | 68 | ||
68 | #include "buffer_head_io.h" | 69 | #include "buffer_head_io.h" |
69 | 70 | ||
@@ -154,10 +155,13 @@ enum { | |||
154 | Opt_localalloc, | 155 | Opt_localalloc, |
155 | Opt_localflocks, | 156 | Opt_localflocks, |
156 | Opt_stack, | 157 | Opt_stack, |
158 | Opt_user_xattr, | ||
159 | Opt_nouser_xattr, | ||
160 | Opt_inode64, | ||
157 | Opt_err, | 161 | Opt_err, |
158 | }; | 162 | }; |
159 | 163 | ||
160 | static match_table_t tokens = { | 164 | static const match_table_t tokens = { |
161 | {Opt_barrier, "barrier=%u"}, | 165 | {Opt_barrier, "barrier=%u"}, |
162 | {Opt_err_panic, "errors=panic"}, | 166 | {Opt_err_panic, "errors=panic"}, |
163 | {Opt_err_ro, "errors=remount-ro"}, | 167 | {Opt_err_ro, "errors=remount-ro"}, |
@@ -173,6 +177,9 @@ static match_table_t tokens = { | |||
173 | {Opt_localalloc, "localalloc=%d"}, | 177 | {Opt_localalloc, "localalloc=%d"}, |
174 | {Opt_localflocks, "localflocks"}, | 178 | {Opt_localflocks, "localflocks"}, |
175 | {Opt_stack, "cluster_stack=%s"}, | 179 | {Opt_stack, "cluster_stack=%s"}, |
180 | {Opt_user_xattr, "user_xattr"}, | ||
181 | {Opt_nouser_xattr, "nouser_xattr"}, | ||
182 | {Opt_inode64, "inode64"}, | ||
176 | {Opt_err, NULL} | 183 | {Opt_err, NULL} |
177 | }; | 184 | }; |
178 | 185 | ||
@@ -205,10 +212,11 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait) | |||
205 | ocfs2_schedule_truncate_log_flush(osb, 0); | 212 | ocfs2_schedule_truncate_log_flush(osb, 0); |
206 | } | 213 | } |
207 | 214 | ||
208 | if (journal_start_commit(OCFS2_SB(sb)->journal->j_journal, &target)) { | 215 | if (jbd2_journal_start_commit(OCFS2_SB(sb)->journal->j_journal, |
216 | &target)) { | ||
209 | if (wait) | 217 | if (wait) |
210 | log_wait_commit(OCFS2_SB(sb)->journal->j_journal, | 218 | jbd2_log_wait_commit(OCFS2_SB(sb)->journal->j_journal, |
211 | target); | 219 | target); |
212 | } | 220 | } |
213 | return 0; | 221 | return 0; |
214 | } | 222 | } |
@@ -325,6 +333,7 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb) | |||
325 | if (!oi) | 333 | if (!oi) |
326 | return NULL; | 334 | return NULL; |
327 | 335 | ||
336 | jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode); | ||
328 | return &oi->vfs_inode; | 337 | return &oi->vfs_inode; |
329 | } | 338 | } |
330 | 339 | ||
@@ -406,6 +415,15 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) | |||
406 | goto out; | 415 | goto out; |
407 | } | 416 | } |
408 | 417 | ||
418 | /* Probably don't want this on remount; it might | ||
419 | * mess with other nodes */ | ||
420 | if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64) && | ||
421 | (parsed_options.mount_opt & OCFS2_MOUNT_INODE64)) { | ||
422 | ret = -EINVAL; | ||
423 | mlog(ML_ERROR, "Cannot enable inode64 on remount\n"); | ||
424 | goto out; | ||
425 | } | ||
426 | |||
409 | /* We're going to/from readonly mode. */ | 427 | /* We're going to/from readonly mode. */ |
410 | if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { | 428 | if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { |
411 | /* Lock here so the check of HARD_RO and the potential | 429 | /* Lock here so the check of HARD_RO and the potential |
@@ -637,7 +655,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
637 | osb->s_atime_quantum = parsed_options.atime_quantum; | 655 | osb->s_atime_quantum = parsed_options.atime_quantum; |
638 | osb->preferred_slot = parsed_options.slot; | 656 | osb->preferred_slot = parsed_options.slot; |
639 | osb->osb_commit_interval = parsed_options.commit_interval; | 657 | osb->osb_commit_interval = parsed_options.commit_interval; |
640 | osb->local_alloc_size = parsed_options.localalloc_opt; | 658 | osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); |
659 | osb->local_alloc_bits = osb->local_alloc_default_bits; | ||
641 | 660 | ||
642 | status = ocfs2_verify_userspace_stack(osb, &parsed_options); | 661 | status = ocfs2_verify_userspace_stack(osb, &parsed_options); |
643 | if (status) | 662 | if (status) |
@@ -743,8 +762,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
743 | return status; | 762 | return status; |
744 | 763 | ||
745 | read_super_error: | 764 | read_super_error: |
746 | if (bh != NULL) | 765 | brelse(bh); |
747 | brelse(bh); | ||
748 | 766 | ||
749 | if (inode) | 767 | if (inode) |
750 | iput(inode); | 768 | iput(inode); |
@@ -847,6 +865,12 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
847 | case Opt_data_writeback: | 865 | case Opt_data_writeback: |
848 | mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK; | 866 | mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK; |
849 | break; | 867 | break; |
868 | case Opt_user_xattr: | ||
869 | mopt->mount_opt &= ~OCFS2_MOUNT_NOUSERXATTR; | ||
870 | break; | ||
871 | case Opt_nouser_xattr: | ||
872 | mopt->mount_opt |= OCFS2_MOUNT_NOUSERXATTR; | ||
873 | break; | ||
850 | case Opt_atime_quantum: | 874 | case Opt_atime_quantum: |
851 | if (match_int(&args[0], &option)) { | 875 | if (match_int(&args[0], &option)) { |
852 | status = 0; | 876 | status = 0; |
@@ -873,7 +897,7 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
873 | if (option < 0) | 897 | if (option < 0) |
874 | return 0; | 898 | return 0; |
875 | if (option == 0) | 899 | if (option == 0) |
876 | option = JBD_DEFAULT_MAX_COMMIT_AGE; | 900 | option = JBD2_DEFAULT_MAX_COMMIT_AGE; |
877 | mopt->commit_interval = HZ * option; | 901 | mopt->commit_interval = HZ * option; |
878 | break; | 902 | break; |
879 | case Opt_localalloc: | 903 | case Opt_localalloc: |
@@ -918,6 +942,9 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
918 | OCFS2_STACK_LABEL_LEN); | 942 | OCFS2_STACK_LABEL_LEN); |
919 | mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; | 943 | mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; |
920 | break; | 944 | break; |
945 | case Opt_inode64: | ||
946 | mopt->mount_opt |= OCFS2_MOUNT_INODE64; | ||
947 | break; | ||
921 | default: | 948 | default: |
922 | mlog(ML_ERROR, | 949 | mlog(ML_ERROR, |
923 | "Unrecognized mount option \"%s\" " | 950 | "Unrecognized mount option \"%s\" " |
@@ -938,6 +965,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
938 | { | 965 | { |
939 | struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb); | 966 | struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb); |
940 | unsigned long opts = osb->s_mount_opt; | 967 | unsigned long opts = osb->s_mount_opt; |
968 | unsigned int local_alloc_megs; | ||
941 | 969 | ||
942 | if (opts & OCFS2_MOUNT_HB_LOCAL) | 970 | if (opts & OCFS2_MOUNT_HB_LOCAL) |
943 | seq_printf(s, ",_netdev,heartbeat=local"); | 971 | seq_printf(s, ",_netdev,heartbeat=local"); |
@@ -970,8 +998,9 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
970 | seq_printf(s, ",commit=%u", | 998 | seq_printf(s, ",commit=%u", |
971 | (unsigned) (osb->osb_commit_interval / HZ)); | 999 | (unsigned) (osb->osb_commit_interval / HZ)); |
972 | 1000 | ||
973 | if (osb->local_alloc_size != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) | 1001 | local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits); |
974 | seq_printf(s, ",localalloc=%d", osb->local_alloc_size); | 1002 | if (local_alloc_megs != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) |
1003 | seq_printf(s, ",localalloc=%d", local_alloc_megs); | ||
975 | 1004 | ||
976 | if (opts & OCFS2_MOUNT_LOCALFLOCKS) | 1005 | if (opts & OCFS2_MOUNT_LOCALFLOCKS) |
977 | seq_printf(s, ",localflocks,"); | 1006 | seq_printf(s, ",localflocks,"); |
@@ -980,6 +1009,14 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
980 | seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, | 1009 | seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, |
981 | osb->osb_cluster_stack); | 1010 | osb->osb_cluster_stack); |
982 | 1011 | ||
1012 | if (opts & OCFS2_MOUNT_NOUSERXATTR) | ||
1013 | seq_printf(s, ",nouser_xattr"); | ||
1014 | else | ||
1015 | seq_printf(s, ",user_xattr"); | ||
1016 | |||
1017 | if (opts & OCFS2_MOUNT_INODE64) | ||
1018 | seq_printf(s, ",inode64"); | ||
1019 | |||
983 | return 0; | 1020 | return 0; |
984 | } | 1021 | } |
985 | 1022 | ||
@@ -1132,6 +1169,7 @@ static void ocfs2_inode_init_once(void *data) | |||
1132 | oi->ip_dir_start_lookup = 0; | 1169 | oi->ip_dir_start_lookup = 0; |
1133 | 1170 | ||
1134 | init_rwsem(&oi->ip_alloc_sem); | 1171 | init_rwsem(&oi->ip_alloc_sem); |
1172 | init_rwsem(&oi->ip_xattr_sem); | ||
1135 | mutex_init(&oi->ip_io_mutex); | 1173 | mutex_init(&oi->ip_io_mutex); |
1136 | 1174 | ||
1137 | oi->ip_blkno = 0ULL; | 1175 | oi->ip_blkno = 0ULL; |
@@ -1375,6 +1413,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1375 | sb->s_fs_info = osb; | 1413 | sb->s_fs_info = osb; |
1376 | sb->s_op = &ocfs2_sops; | 1414 | sb->s_op = &ocfs2_sops; |
1377 | sb->s_export_op = &ocfs2_export_ops; | 1415 | sb->s_export_op = &ocfs2_export_ops; |
1416 | sb->s_xattr = ocfs2_xattr_handlers; | ||
1378 | sb->s_time_gran = 1; | 1417 | sb->s_time_gran = 1; |
1379 | sb->s_flags |= MS_NOATIME; | 1418 | sb->s_flags |= MS_NOATIME; |
1380 | /* this is needed to support O_LARGEFILE */ | 1419 | /* this is needed to support O_LARGEFILE */ |
@@ -1421,8 +1460,12 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1421 | 1460 | ||
1422 | osb->slot_num = OCFS2_INVALID_SLOT; | 1461 | osb->slot_num = OCFS2_INVALID_SLOT; |
1423 | 1462 | ||
1463 | osb->s_xattr_inline_size = le16_to_cpu( | ||
1464 | di->id2.i_super.s_xattr_inline_size); | ||
1465 | |||
1424 | osb->local_alloc_state = OCFS2_LA_UNUSED; | 1466 | osb->local_alloc_state = OCFS2_LA_UNUSED; |
1425 | osb->local_alloc_bh = NULL; | 1467 | osb->local_alloc_bh = NULL; |
1468 | INIT_DELAYED_WORK(&osb->la_enable_wq, ocfs2_la_enable_worker); | ||
1426 | 1469 | ||
1427 | init_waitqueue_head(&osb->osb_mount_event); | 1470 | init_waitqueue_head(&osb->osb_mount_event); |
1428 | 1471 | ||
@@ -1568,6 +1611,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1568 | osb->first_cluster_group_blkno = | 1611 | osb->first_cluster_group_blkno = |
1569 | le64_to_cpu(di->id2.i_super.s_first_cluster_group); | 1612 | le64_to_cpu(di->id2.i_super.s_first_cluster_group); |
1570 | osb->fs_generation = le32_to_cpu(di->i_fs_generation); | 1613 | osb->fs_generation = le32_to_cpu(di->i_fs_generation); |
1614 | osb->uuid_hash = le32_to_cpu(di->id2.i_super.s_uuid_hash); | ||
1571 | mlog(0, "vol_label: %s\n", osb->vol_label); | 1615 | mlog(0, "vol_label: %s\n", osb->vol_label); |
1572 | mlog(0, "uuid: %s\n", osb->uuid_str); | 1616 | mlog(0, "uuid: %s\n", osb->uuid_str); |
1573 | mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n", | 1617 | mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n", |
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index ba9dbb51d25b..cbd03dfdc7b9 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include "inode.h" | 50 | #include "inode.h" |
51 | #include "journal.h" | 51 | #include "journal.h" |
52 | #include "symlink.h" | 52 | #include "symlink.h" |
53 | #include "xattr.h" | ||
53 | 54 | ||
54 | #include "buffer_head_io.h" | 55 | #include "buffer_head_io.h" |
55 | 56 | ||
@@ -83,11 +84,7 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode, | |||
83 | 84 | ||
84 | mlog_entry_void(); | 85 | mlog_entry_void(); |
85 | 86 | ||
86 | status = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 87 | status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, bh); |
87 | OCFS2_I(inode)->ip_blkno, | ||
88 | bh, | ||
89 | OCFS2_BH_CACHED, | ||
90 | inode); | ||
91 | if (status < 0) { | 88 | if (status < 0) { |
92 | mlog_errno(status); | 89 | mlog_errno(status); |
93 | link = ERR_PTR(status); | 90 | link = ERR_PTR(status); |
@@ -157,8 +154,7 @@ bail: | |||
157 | kunmap(page); | 154 | kunmap(page); |
158 | page_cache_release(page); | 155 | page_cache_release(page); |
159 | } | 156 | } |
160 | if (bh) | 157 | brelse(bh); |
161 | brelse(bh); | ||
162 | 158 | ||
163 | return ERR_PTR(status); | 159 | return ERR_PTR(status); |
164 | } | 160 | } |
@@ -168,10 +164,18 @@ const struct inode_operations ocfs2_symlink_inode_operations = { | |||
168 | .follow_link = ocfs2_follow_link, | 164 | .follow_link = ocfs2_follow_link, |
169 | .getattr = ocfs2_getattr, | 165 | .getattr = ocfs2_getattr, |
170 | .setattr = ocfs2_setattr, | 166 | .setattr = ocfs2_setattr, |
167 | .setxattr = generic_setxattr, | ||
168 | .getxattr = generic_getxattr, | ||
169 | .listxattr = ocfs2_listxattr, | ||
170 | .removexattr = generic_removexattr, | ||
171 | }; | 171 | }; |
172 | const struct inode_operations ocfs2_fast_symlink_inode_operations = { | 172 | const struct inode_operations ocfs2_fast_symlink_inode_operations = { |
173 | .readlink = ocfs2_readlink, | 173 | .readlink = ocfs2_readlink, |
174 | .follow_link = ocfs2_follow_link, | 174 | .follow_link = ocfs2_follow_link, |
175 | .getattr = ocfs2_getattr, | 175 | .getattr = ocfs2_getattr, |
176 | .setattr = ocfs2_setattr, | 176 | .setattr = ocfs2_setattr, |
177 | .setxattr = generic_setxattr, | ||
178 | .getxattr = generic_getxattr, | ||
179 | .listxattr = ocfs2_listxattr, | ||
180 | .removexattr = generic_removexattr, | ||
177 | }; | 181 | }; |
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index 4da8851f2b23..187b99ff0368 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c | |||
@@ -53,7 +53,11 @@ | |||
53 | #include <linux/highmem.h> | 53 | #include <linux/highmem.h> |
54 | #include <linux/buffer_head.h> | 54 | #include <linux/buffer_head.h> |
55 | #include <linux/rbtree.h> | 55 | #include <linux/rbtree.h> |
56 | #include <linux/jbd.h> | 56 | #ifndef CONFIG_OCFS2_COMPAT_JBD |
57 | # include <linux/jbd2.h> | ||
58 | #else | ||
59 | # include <linux/jbd.h> | ||
60 | #endif | ||
57 | 61 | ||
58 | #define MLOG_MASK_PREFIX ML_UPTODATE | 62 | #define MLOG_MASK_PREFIX ML_UPTODATE |
59 | 63 | ||
@@ -511,14 +515,10 @@ static void ocfs2_remove_metadata_tree(struct ocfs2_caching_info *ci, | |||
511 | ci->ci_num_cached--; | 515 | ci->ci_num_cached--; |
512 | } | 516 | } |
513 | 517 | ||
514 | /* Called when we remove a chunk of metadata from an inode. We don't | 518 | static void ocfs2_remove_block_from_cache(struct inode *inode, |
515 | * bother reverting things to an inlined array in the case of a remove | 519 | sector_t block) |
516 | * which moves us back under the limit. */ | ||
517 | void ocfs2_remove_from_cache(struct inode *inode, | ||
518 | struct buffer_head *bh) | ||
519 | { | 520 | { |
520 | int index; | 521 | int index; |
521 | sector_t block = bh->b_blocknr; | ||
522 | struct ocfs2_meta_cache_item *item = NULL; | 522 | struct ocfs2_meta_cache_item *item = NULL; |
523 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 523 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
524 | struct ocfs2_caching_info *ci = &oi->ip_metadata_cache; | 524 | struct ocfs2_caching_info *ci = &oi->ip_metadata_cache; |
@@ -544,6 +544,30 @@ void ocfs2_remove_from_cache(struct inode *inode, | |||
544 | kmem_cache_free(ocfs2_uptodate_cachep, item); | 544 | kmem_cache_free(ocfs2_uptodate_cachep, item); |
545 | } | 545 | } |
546 | 546 | ||
547 | /* | ||
548 | * Called when we remove a chunk of metadata from an inode. We don't | ||
549 | * bother reverting things to an inlined array in the case of a remove | ||
550 | * which moves us back under the limit. | ||
551 | */ | ||
552 | void ocfs2_remove_from_cache(struct inode *inode, | ||
553 | struct buffer_head *bh) | ||
554 | { | ||
555 | sector_t block = bh->b_blocknr; | ||
556 | |||
557 | ocfs2_remove_block_from_cache(inode, block); | ||
558 | } | ||
559 | |||
560 | /* Called when we remove xattr clusters from an inode. */ | ||
561 | void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode, | ||
562 | sector_t block, | ||
563 | u32 c_len) | ||
564 | { | ||
565 | unsigned int i, b_len = ocfs2_clusters_to_blocks(inode->i_sb, 1) * c_len; | ||
566 | |||
567 | for (i = 0; i < b_len; i++, block++) | ||
568 | ocfs2_remove_block_from_cache(inode, block); | ||
569 | } | ||
570 | |||
547 | int __init init_ocfs2_uptodate_cache(void) | 571 | int __init init_ocfs2_uptodate_cache(void) |
548 | { | 572 | { |
549 | ocfs2_uptodate_cachep = kmem_cache_create("ocfs2_uptodate", | 573 | ocfs2_uptodate_cachep = kmem_cache_create("ocfs2_uptodate", |
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h index 2e73206059a8..531b4b3a0c47 100644 --- a/fs/ocfs2/uptodate.h +++ b/fs/ocfs2/uptodate.h | |||
@@ -40,6 +40,9 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode, | |||
40 | struct buffer_head *bh); | 40 | struct buffer_head *bh); |
41 | void ocfs2_remove_from_cache(struct inode *inode, | 41 | void ocfs2_remove_from_cache(struct inode *inode, |
42 | struct buffer_head *bh); | 42 | struct buffer_head *bh); |
43 | void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode, | ||
44 | sector_t block, | ||
45 | u32 c_len); | ||
43 | int ocfs2_buffer_read_ahead(struct inode *inode, | 46 | int ocfs2_buffer_read_ahead(struct inode *inode, |
44 | struct buffer_head *bh); | 47 | struct buffer_head *bh); |
45 | 48 | ||
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c new file mode 100644 index 000000000000..c25780a70dfd --- /dev/null +++ b/fs/ocfs2/xattr.c | |||
@@ -0,0 +1,4834 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * xattr.c | ||
5 | * | ||
6 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
7 | * | ||
8 | * CREDITS: | ||
9 | * Lots of code in this file is taken from ext3. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or | ||
12 | * modify it under the terms of the GNU General Public | ||
13 | * License as published by the Free Software Foundation; either | ||
14 | * version 2 of the License, or (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
19 | * General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public | ||
22 | * License along with this program; if not, write to the | ||
23 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
24 | * Boston, MA 021110-1307, USA. | ||
25 | */ | ||
26 | |||
27 | #include <linux/capability.h> | ||
28 | #include <linux/fs.h> | ||
29 | #include <linux/types.h> | ||
30 | #include <linux/slab.h> | ||
31 | #include <linux/highmem.h> | ||
32 | #include <linux/pagemap.h> | ||
33 | #include <linux/uio.h> | ||
34 | #include <linux/sched.h> | ||
35 | #include <linux/splice.h> | ||
36 | #include <linux/mount.h> | ||
37 | #include <linux/writeback.h> | ||
38 | #include <linux/falloc.h> | ||
39 | #include <linux/sort.h> | ||
40 | #include <linux/init.h> | ||
41 | #include <linux/module.h> | ||
42 | #include <linux/string.h> | ||
43 | |||
44 | #define MLOG_MASK_PREFIX ML_XATTR | ||
45 | #include <cluster/masklog.h> | ||
46 | |||
47 | #include "ocfs2.h" | ||
48 | #include "alloc.h" | ||
49 | #include "dlmglue.h" | ||
50 | #include "file.h" | ||
51 | #include "symlink.h" | ||
52 | #include "sysfile.h" | ||
53 | #include "inode.h" | ||
54 | #include "journal.h" | ||
55 | #include "ocfs2_fs.h" | ||
56 | #include "suballoc.h" | ||
57 | #include "uptodate.h" | ||
58 | #include "buffer_head_io.h" | ||
59 | #include "super.h" | ||
60 | #include "xattr.h" | ||
61 | |||
62 | |||
63 | struct ocfs2_xattr_def_value_root { | ||
64 | struct ocfs2_xattr_value_root xv; | ||
65 | struct ocfs2_extent_rec er; | ||
66 | }; | ||
67 | |||
68 | struct ocfs2_xattr_bucket { | ||
69 | struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; | ||
70 | struct ocfs2_xattr_header *xh; | ||
71 | }; | ||
72 | |||
73 | #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) | ||
74 | #define OCFS2_XATTR_INLINE_SIZE 80 | ||
75 | |||
76 | static struct ocfs2_xattr_def_value_root def_xv = { | ||
77 | .xv.xr_list.l_count = cpu_to_le16(1), | ||
78 | }; | ||
79 | |||
80 | struct xattr_handler *ocfs2_xattr_handlers[] = { | ||
81 | &ocfs2_xattr_user_handler, | ||
82 | &ocfs2_xattr_trusted_handler, | ||
83 | NULL | ||
84 | }; | ||
85 | |||
86 | static struct xattr_handler *ocfs2_xattr_handler_map[] = { | ||
87 | [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, | ||
88 | [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, | ||
89 | }; | ||
90 | |||
91 | struct ocfs2_xattr_info { | ||
92 | int name_index; | ||
93 | const char *name; | ||
94 | const void *value; | ||
95 | size_t value_len; | ||
96 | }; | ||
97 | |||
98 | struct ocfs2_xattr_search { | ||
99 | struct buffer_head *inode_bh; | ||
100 | /* | ||
101 | * xattr_bh point to the block buffer head which has extended attribute | ||
102 | * when extended attribute in inode, xattr_bh is equal to inode_bh. | ||
103 | */ | ||
104 | struct buffer_head *xattr_bh; | ||
105 | struct ocfs2_xattr_header *header; | ||
106 | struct ocfs2_xattr_bucket bucket; | ||
107 | void *base; | ||
108 | void *end; | ||
109 | struct ocfs2_xattr_entry *here; | ||
110 | int not_found; | ||
111 | }; | ||
112 | |||
113 | static int ocfs2_xattr_bucket_get_name_value(struct inode *inode, | ||
114 | struct ocfs2_xattr_header *xh, | ||
115 | int index, | ||
116 | int *block_off, | ||
117 | int *new_offset); | ||
118 | |||
119 | static int ocfs2_xattr_index_block_find(struct inode *inode, | ||
120 | struct buffer_head *root_bh, | ||
121 | int name_index, | ||
122 | const char *name, | ||
123 | struct ocfs2_xattr_search *xs); | ||
124 | |||
125 | static int ocfs2_xattr_tree_list_index_block(struct inode *inode, | ||
126 | struct ocfs2_xattr_tree_root *xt, | ||
127 | char *buffer, | ||
128 | size_t buffer_size); | ||
129 | |||
130 | static int ocfs2_xattr_create_index_block(struct inode *inode, | ||
131 | struct ocfs2_xattr_search *xs); | ||
132 | |||
133 | static int ocfs2_xattr_set_entry_index_block(struct inode *inode, | ||
134 | struct ocfs2_xattr_info *xi, | ||
135 | struct ocfs2_xattr_search *xs); | ||
136 | |||
137 | static int ocfs2_delete_xattr_index_block(struct inode *inode, | ||
138 | struct buffer_head *xb_bh); | ||
139 | |||
140 | static inline const char *ocfs2_xattr_prefix(int name_index) | ||
141 | { | ||
142 | struct xattr_handler *handler = NULL; | ||
143 | |||
144 | if (name_index > 0 && name_index < OCFS2_XATTR_MAX) | ||
145 | handler = ocfs2_xattr_handler_map[name_index]; | ||
146 | |||
147 | return handler ? handler->prefix : NULL; | ||
148 | } | ||
149 | |||
150 | static u32 ocfs2_xattr_name_hash(struct inode *inode, | ||
151 | const char *name, | ||
152 | int name_len) | ||
153 | { | ||
154 | /* Get hash value of uuid from super block */ | ||
155 | u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash; | ||
156 | int i; | ||
157 | |||
158 | /* hash extended attribute name */ | ||
159 | for (i = 0; i < name_len; i++) { | ||
160 | hash = (hash << OCFS2_HASH_SHIFT) ^ | ||
161 | (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^ | ||
162 | *name++; | ||
163 | } | ||
164 | |||
165 | return hash; | ||
166 | } | ||
167 | |||
168 | /* | ||
169 | * ocfs2_xattr_hash_entry() | ||
170 | * | ||
171 | * Compute the hash of an extended attribute. | ||
172 | */ | ||
173 | static void ocfs2_xattr_hash_entry(struct inode *inode, | ||
174 | struct ocfs2_xattr_header *header, | ||
175 | struct ocfs2_xattr_entry *entry) | ||
176 | { | ||
177 | u32 hash = 0; | ||
178 | char *name = (char *)header + le16_to_cpu(entry->xe_name_offset); | ||
179 | |||
180 | hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len); | ||
181 | entry->xe_name_hash = cpu_to_le32(hash); | ||
182 | |||
183 | return; | ||
184 | } | ||
185 | |||
186 | static int ocfs2_xattr_extend_allocation(struct inode *inode, | ||
187 | u32 clusters_to_add, | ||
188 | struct buffer_head *xattr_bh, | ||
189 | struct ocfs2_xattr_value_root *xv) | ||
190 | { | ||
191 | int status = 0; | ||
192 | int restart_func = 0; | ||
193 | int credits = 0; | ||
194 | handle_t *handle = NULL; | ||
195 | struct ocfs2_alloc_context *data_ac = NULL; | ||
196 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
197 | enum ocfs2_alloc_restarted why; | ||
198 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
199 | u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters); | ||
200 | struct ocfs2_extent_tree et; | ||
201 | |||
202 | mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add); | ||
203 | |||
204 | ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv); | ||
205 | |||
206 | restart_all: | ||
207 | |||
208 | status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, | ||
209 | &data_ac, &meta_ac); | ||
210 | if (status) { | ||
211 | mlog_errno(status); | ||
212 | goto leave; | ||
213 | } | ||
214 | |||
215 | credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el, | ||
216 | clusters_to_add); | ||
217 | handle = ocfs2_start_trans(osb, credits); | ||
218 | if (IS_ERR(handle)) { | ||
219 | status = PTR_ERR(handle); | ||
220 | handle = NULL; | ||
221 | mlog_errno(status); | ||
222 | goto leave; | ||
223 | } | ||
224 | |||
225 | restarted_transaction: | ||
226 | status = ocfs2_journal_access(handle, inode, xattr_bh, | ||
227 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
228 | if (status < 0) { | ||
229 | mlog_errno(status); | ||
230 | goto leave; | ||
231 | } | ||
232 | |||
233 | prev_clusters = le32_to_cpu(xv->xr_clusters); | ||
234 | status = ocfs2_add_clusters_in_btree(osb, | ||
235 | inode, | ||
236 | &logical_start, | ||
237 | clusters_to_add, | ||
238 | 0, | ||
239 | &et, | ||
240 | handle, | ||
241 | data_ac, | ||
242 | meta_ac, | ||
243 | &why); | ||
244 | if ((status < 0) && (status != -EAGAIN)) { | ||
245 | if (status != -ENOSPC) | ||
246 | mlog_errno(status); | ||
247 | goto leave; | ||
248 | } | ||
249 | |||
250 | status = ocfs2_journal_dirty(handle, xattr_bh); | ||
251 | if (status < 0) { | ||
252 | mlog_errno(status); | ||
253 | goto leave; | ||
254 | } | ||
255 | |||
256 | clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters; | ||
257 | |||
258 | if (why != RESTART_NONE && clusters_to_add) { | ||
259 | if (why == RESTART_META) { | ||
260 | mlog(0, "restarting function.\n"); | ||
261 | restart_func = 1; | ||
262 | } else { | ||
263 | BUG_ON(why != RESTART_TRANS); | ||
264 | |||
265 | mlog(0, "restarting transaction.\n"); | ||
266 | /* TODO: This can be more intelligent. */ | ||
267 | credits = ocfs2_calc_extend_credits(osb->sb, | ||
268 | et.et_root_el, | ||
269 | clusters_to_add); | ||
270 | status = ocfs2_extend_trans(handle, credits); | ||
271 | if (status < 0) { | ||
272 | /* handle still has to be committed at | ||
273 | * this point. */ | ||
274 | status = -ENOMEM; | ||
275 | mlog_errno(status); | ||
276 | goto leave; | ||
277 | } | ||
278 | goto restarted_transaction; | ||
279 | } | ||
280 | } | ||
281 | |||
282 | leave: | ||
283 | if (handle) { | ||
284 | ocfs2_commit_trans(osb, handle); | ||
285 | handle = NULL; | ||
286 | } | ||
287 | if (data_ac) { | ||
288 | ocfs2_free_alloc_context(data_ac); | ||
289 | data_ac = NULL; | ||
290 | } | ||
291 | if (meta_ac) { | ||
292 | ocfs2_free_alloc_context(meta_ac); | ||
293 | meta_ac = NULL; | ||
294 | } | ||
295 | if ((!status) && restart_func) { | ||
296 | restart_func = 0; | ||
297 | goto restart_all; | ||
298 | } | ||
299 | |||
300 | return status; | ||
301 | } | ||
302 | |||
303 | static int __ocfs2_remove_xattr_range(struct inode *inode, | ||
304 | struct buffer_head *root_bh, | ||
305 | struct ocfs2_xattr_value_root *xv, | ||
306 | u32 cpos, u32 phys_cpos, u32 len, | ||
307 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
308 | { | ||
309 | int ret; | ||
310 | u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||
311 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
312 | struct inode *tl_inode = osb->osb_tl_inode; | ||
313 | handle_t *handle; | ||
314 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
315 | struct ocfs2_extent_tree et; | ||
316 | |||
317 | ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv); | ||
318 | |||
319 | ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); | ||
320 | if (ret) { | ||
321 | mlog_errno(ret); | ||
322 | return ret; | ||
323 | } | ||
324 | |||
325 | mutex_lock(&tl_inode->i_mutex); | ||
326 | |||
327 | if (ocfs2_truncate_log_needs_flush(osb)) { | ||
328 | ret = __ocfs2_flush_truncate_log(osb); | ||
329 | if (ret < 0) { | ||
330 | mlog_errno(ret); | ||
331 | goto out; | ||
332 | } | ||
333 | } | ||
334 | |||
335 | handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); | ||
336 | if (IS_ERR(handle)) { | ||
337 | ret = PTR_ERR(handle); | ||
338 | mlog_errno(ret); | ||
339 | goto out; | ||
340 | } | ||
341 | |||
342 | ret = ocfs2_journal_access(handle, inode, root_bh, | ||
343 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
344 | if (ret) { | ||
345 | mlog_errno(ret); | ||
346 | goto out_commit; | ||
347 | } | ||
348 | |||
349 | ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac, | ||
350 | dealloc); | ||
351 | if (ret) { | ||
352 | mlog_errno(ret); | ||
353 | goto out_commit; | ||
354 | } | ||
355 | |||
356 | le32_add_cpu(&xv->xr_clusters, -len); | ||
357 | |||
358 | ret = ocfs2_journal_dirty(handle, root_bh); | ||
359 | if (ret) { | ||
360 | mlog_errno(ret); | ||
361 | goto out_commit; | ||
362 | } | ||
363 | |||
364 | ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); | ||
365 | if (ret) | ||
366 | mlog_errno(ret); | ||
367 | |||
368 | out_commit: | ||
369 | ocfs2_commit_trans(osb, handle); | ||
370 | out: | ||
371 | mutex_unlock(&tl_inode->i_mutex); | ||
372 | |||
373 | if (meta_ac) | ||
374 | ocfs2_free_alloc_context(meta_ac); | ||
375 | |||
376 | return ret; | ||
377 | } | ||
378 | |||
379 | static int ocfs2_xattr_shrink_size(struct inode *inode, | ||
380 | u32 old_clusters, | ||
381 | u32 new_clusters, | ||
382 | struct buffer_head *root_bh, | ||
383 | struct ocfs2_xattr_value_root *xv) | ||
384 | { | ||
385 | int ret = 0; | ||
386 | u32 trunc_len, cpos, phys_cpos, alloc_size; | ||
387 | u64 block; | ||
388 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
389 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
390 | |||
391 | ocfs2_init_dealloc_ctxt(&dealloc); | ||
392 | |||
393 | if (old_clusters <= new_clusters) | ||
394 | return 0; | ||
395 | |||
396 | cpos = new_clusters; | ||
397 | trunc_len = old_clusters - new_clusters; | ||
398 | while (trunc_len) { | ||
399 | ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, | ||
400 | &alloc_size, &xv->xr_list); | ||
401 | if (ret) { | ||
402 | mlog_errno(ret); | ||
403 | goto out; | ||
404 | } | ||
405 | |||
406 | if (alloc_size > trunc_len) | ||
407 | alloc_size = trunc_len; | ||
408 | |||
409 | ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos, | ||
410 | phys_cpos, alloc_size, | ||
411 | &dealloc); | ||
412 | if (ret) { | ||
413 | mlog_errno(ret); | ||
414 | goto out; | ||
415 | } | ||
416 | |||
417 | block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||
418 | ocfs2_remove_xattr_clusters_from_cache(inode, block, | ||
419 | alloc_size); | ||
420 | cpos += alloc_size; | ||
421 | trunc_len -= alloc_size; | ||
422 | } | ||
423 | |||
424 | out: | ||
425 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
426 | ocfs2_run_deallocs(osb, &dealloc); | ||
427 | |||
428 | return ret; | ||
429 | } | ||
430 | |||
431 | static int ocfs2_xattr_value_truncate(struct inode *inode, | ||
432 | struct buffer_head *root_bh, | ||
433 | struct ocfs2_xattr_value_root *xv, | ||
434 | int len) | ||
435 | { | ||
436 | int ret; | ||
437 | u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); | ||
438 | u32 old_clusters = le32_to_cpu(xv->xr_clusters); | ||
439 | |||
440 | if (new_clusters == old_clusters) | ||
441 | return 0; | ||
442 | |||
443 | if (new_clusters > old_clusters) | ||
444 | ret = ocfs2_xattr_extend_allocation(inode, | ||
445 | new_clusters - old_clusters, | ||
446 | root_bh, xv); | ||
447 | else | ||
448 | ret = ocfs2_xattr_shrink_size(inode, | ||
449 | old_clusters, new_clusters, | ||
450 | root_bh, xv); | ||
451 | |||
452 | return ret; | ||
453 | } | ||
454 | |||
455 | static int ocfs2_xattr_list_entry(char *buffer, size_t size, | ||
456 | size_t *result, const char *prefix, | ||
457 | const char *name, int name_len) | ||
458 | { | ||
459 | char *p = buffer + *result; | ||
460 | int prefix_len = strlen(prefix); | ||
461 | int total_len = prefix_len + name_len + 1; | ||
462 | |||
463 | *result += total_len; | ||
464 | |||
465 | /* we are just looking for how big our buffer needs to be */ | ||
466 | if (!size) | ||
467 | return 0; | ||
468 | |||
469 | if (*result > size) | ||
470 | return -ERANGE; | ||
471 | |||
472 | memcpy(p, prefix, prefix_len); | ||
473 | memcpy(p + prefix_len, name, name_len); | ||
474 | p[prefix_len + name_len] = '\0'; | ||
475 | |||
476 | return 0; | ||
477 | } | ||
478 | |||
479 | static int ocfs2_xattr_list_entries(struct inode *inode, | ||
480 | struct ocfs2_xattr_header *header, | ||
481 | char *buffer, size_t buffer_size) | ||
482 | { | ||
483 | size_t result = 0; | ||
484 | int i, type, ret; | ||
485 | const char *prefix, *name; | ||
486 | |||
487 | for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) { | ||
488 | struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; | ||
489 | type = ocfs2_xattr_get_type(entry); | ||
490 | prefix = ocfs2_xattr_prefix(type); | ||
491 | |||
492 | if (prefix) { | ||
493 | name = (const char *)header + | ||
494 | le16_to_cpu(entry->xe_name_offset); | ||
495 | |||
496 | ret = ocfs2_xattr_list_entry(buffer, buffer_size, | ||
497 | &result, prefix, name, | ||
498 | entry->xe_name_len); | ||
499 | if (ret) | ||
500 | return ret; | ||
501 | } | ||
502 | } | ||
503 | |||
504 | return result; | ||
505 | } | ||
506 | |||
507 | static int ocfs2_xattr_ibody_list(struct inode *inode, | ||
508 | struct ocfs2_dinode *di, | ||
509 | char *buffer, | ||
510 | size_t buffer_size) | ||
511 | { | ||
512 | struct ocfs2_xattr_header *header = NULL; | ||
513 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
514 | int ret = 0; | ||
515 | |||
516 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) | ||
517 | return ret; | ||
518 | |||
519 | header = (struct ocfs2_xattr_header *) | ||
520 | ((void *)di + inode->i_sb->s_blocksize - | ||
521 | le16_to_cpu(di->i_xattr_inline_size)); | ||
522 | |||
523 | ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); | ||
524 | |||
525 | return ret; | ||
526 | } | ||
527 | |||
528 | static int ocfs2_xattr_block_list(struct inode *inode, | ||
529 | struct ocfs2_dinode *di, | ||
530 | char *buffer, | ||
531 | size_t buffer_size) | ||
532 | { | ||
533 | struct buffer_head *blk_bh = NULL; | ||
534 | struct ocfs2_xattr_block *xb; | ||
535 | int ret = 0; | ||
536 | |||
537 | if (!di->i_xattr_loc) | ||
538 | return ret; | ||
539 | |||
540 | ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); | ||
541 | if (ret < 0) { | ||
542 | mlog_errno(ret); | ||
543 | return ret; | ||
544 | } | ||
545 | /*Verify the signature of xattr block*/ | ||
546 | if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, | ||
547 | strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { | ||
548 | ret = -EFAULT; | ||
549 | goto cleanup; | ||
550 | } | ||
551 | |||
552 | xb = (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
553 | |||
554 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { | ||
555 | struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; | ||
556 | ret = ocfs2_xattr_list_entries(inode, header, | ||
557 | buffer, buffer_size); | ||
558 | } else { | ||
559 | struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root; | ||
560 | ret = ocfs2_xattr_tree_list_index_block(inode, xt, | ||
561 | buffer, buffer_size); | ||
562 | } | ||
563 | cleanup: | ||
564 | brelse(blk_bh); | ||
565 | |||
566 | return ret; | ||
567 | } | ||
568 | |||
569 | ssize_t ocfs2_listxattr(struct dentry *dentry, | ||
570 | char *buffer, | ||
571 | size_t size) | ||
572 | { | ||
573 | int ret = 0, i_ret = 0, b_ret = 0; | ||
574 | struct buffer_head *di_bh = NULL; | ||
575 | struct ocfs2_dinode *di = NULL; | ||
576 | struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode); | ||
577 | |||
578 | if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb))) | ||
579 | return -EOPNOTSUPP; | ||
580 | |||
581 | if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) | ||
582 | return ret; | ||
583 | |||
584 | ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0); | ||
585 | if (ret < 0) { | ||
586 | mlog_errno(ret); | ||
587 | return ret; | ||
588 | } | ||
589 | |||
590 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
591 | |||
592 | down_read(&oi->ip_xattr_sem); | ||
593 | i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size); | ||
594 | if (i_ret < 0) | ||
595 | b_ret = 0; | ||
596 | else { | ||
597 | if (buffer) { | ||
598 | buffer += i_ret; | ||
599 | size -= i_ret; | ||
600 | } | ||
601 | b_ret = ocfs2_xattr_block_list(dentry->d_inode, di, | ||
602 | buffer, size); | ||
603 | if (b_ret < 0) | ||
604 | i_ret = 0; | ||
605 | } | ||
606 | up_read(&oi->ip_xattr_sem); | ||
607 | ocfs2_inode_unlock(dentry->d_inode, 0); | ||
608 | |||
609 | brelse(di_bh); | ||
610 | |||
611 | return i_ret + b_ret; | ||
612 | } | ||
613 | |||
614 | static int ocfs2_xattr_find_entry(int name_index, | ||
615 | const char *name, | ||
616 | struct ocfs2_xattr_search *xs) | ||
617 | { | ||
618 | struct ocfs2_xattr_entry *entry; | ||
619 | size_t name_len; | ||
620 | int i, cmp = 1; | ||
621 | |||
622 | if (name == NULL) | ||
623 | return -EINVAL; | ||
624 | |||
625 | name_len = strlen(name); | ||
626 | entry = xs->here; | ||
627 | for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { | ||
628 | cmp = name_index - ocfs2_xattr_get_type(entry); | ||
629 | if (!cmp) | ||
630 | cmp = name_len - entry->xe_name_len; | ||
631 | if (!cmp) | ||
632 | cmp = memcmp(name, (xs->base + | ||
633 | le16_to_cpu(entry->xe_name_offset)), | ||
634 | name_len); | ||
635 | if (cmp == 0) | ||
636 | break; | ||
637 | entry += 1; | ||
638 | } | ||
639 | xs->here = entry; | ||
640 | |||
641 | return cmp ? -ENODATA : 0; | ||
642 | } | ||
643 | |||
644 | static int ocfs2_xattr_get_value_outside(struct inode *inode, | ||
645 | struct ocfs2_xattr_value_root *xv, | ||
646 | void *buffer, | ||
647 | size_t len) | ||
648 | { | ||
649 | u32 cpos, p_cluster, num_clusters, bpc, clusters; | ||
650 | u64 blkno; | ||
651 | int i, ret = 0; | ||
652 | size_t cplen, blocksize; | ||
653 | struct buffer_head *bh = NULL; | ||
654 | struct ocfs2_extent_list *el; | ||
655 | |||
656 | el = &xv->xr_list; | ||
657 | clusters = le32_to_cpu(xv->xr_clusters); | ||
658 | bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
659 | blocksize = inode->i_sb->s_blocksize; | ||
660 | |||
661 | cpos = 0; | ||
662 | while (cpos < clusters) { | ||
663 | ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, | ||
664 | &num_clusters, el); | ||
665 | if (ret) { | ||
666 | mlog_errno(ret); | ||
667 | goto out; | ||
668 | } | ||
669 | |||
670 | blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); | ||
671 | /* Copy ocfs2_xattr_value */ | ||
672 | for (i = 0; i < num_clusters * bpc; i++, blkno++) { | ||
673 | ret = ocfs2_read_block(inode, blkno, &bh); | ||
674 | if (ret) { | ||
675 | mlog_errno(ret); | ||
676 | goto out; | ||
677 | } | ||
678 | |||
679 | cplen = len >= blocksize ? blocksize : len; | ||
680 | memcpy(buffer, bh->b_data, cplen); | ||
681 | len -= cplen; | ||
682 | buffer += cplen; | ||
683 | |||
684 | brelse(bh); | ||
685 | bh = NULL; | ||
686 | if (len == 0) | ||
687 | break; | ||
688 | } | ||
689 | cpos += num_clusters; | ||
690 | } | ||
691 | out: | ||
692 | return ret; | ||
693 | } | ||
694 | |||
695 | static int ocfs2_xattr_ibody_get(struct inode *inode, | ||
696 | int name_index, | ||
697 | const char *name, | ||
698 | void *buffer, | ||
699 | size_t buffer_size, | ||
700 | struct ocfs2_xattr_search *xs) | ||
701 | { | ||
702 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
703 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
704 | struct ocfs2_xattr_value_root *xv; | ||
705 | size_t size; | ||
706 | int ret = 0; | ||
707 | |||
708 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) | ||
709 | return -ENODATA; | ||
710 | |||
711 | xs->end = (void *)di + inode->i_sb->s_blocksize; | ||
712 | xs->header = (struct ocfs2_xattr_header *) | ||
713 | (xs->end - le16_to_cpu(di->i_xattr_inline_size)); | ||
714 | xs->base = (void *)xs->header; | ||
715 | xs->here = xs->header->xh_entries; | ||
716 | |||
717 | ret = ocfs2_xattr_find_entry(name_index, name, xs); | ||
718 | if (ret) | ||
719 | return ret; | ||
720 | size = le64_to_cpu(xs->here->xe_value_size); | ||
721 | if (buffer) { | ||
722 | if (size > buffer_size) | ||
723 | return -ERANGE; | ||
724 | if (ocfs2_xattr_is_local(xs->here)) { | ||
725 | memcpy(buffer, (void *)xs->base + | ||
726 | le16_to_cpu(xs->here->xe_name_offset) + | ||
727 | OCFS2_XATTR_SIZE(xs->here->xe_name_len), size); | ||
728 | } else { | ||
729 | xv = (struct ocfs2_xattr_value_root *) | ||
730 | (xs->base + le16_to_cpu( | ||
731 | xs->here->xe_name_offset) + | ||
732 | OCFS2_XATTR_SIZE(xs->here->xe_name_len)); | ||
733 | ret = ocfs2_xattr_get_value_outside(inode, xv, | ||
734 | buffer, size); | ||
735 | if (ret < 0) { | ||
736 | mlog_errno(ret); | ||
737 | return ret; | ||
738 | } | ||
739 | } | ||
740 | } | ||
741 | |||
742 | return size; | ||
743 | } | ||
744 | |||
745 | static int ocfs2_xattr_block_get(struct inode *inode, | ||
746 | int name_index, | ||
747 | const char *name, | ||
748 | void *buffer, | ||
749 | size_t buffer_size, | ||
750 | struct ocfs2_xattr_search *xs) | ||
751 | { | ||
752 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
753 | struct buffer_head *blk_bh = NULL; | ||
754 | struct ocfs2_xattr_block *xb; | ||
755 | struct ocfs2_xattr_value_root *xv; | ||
756 | size_t size; | ||
757 | int ret = -ENODATA, name_offset, name_len, block_off, i; | ||
758 | |||
759 | if (!di->i_xattr_loc) | ||
760 | return ret; | ||
761 | |||
762 | memset(&xs->bucket, 0, sizeof(xs->bucket)); | ||
763 | |||
764 | ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); | ||
765 | if (ret < 0) { | ||
766 | mlog_errno(ret); | ||
767 | return ret; | ||
768 | } | ||
769 | /*Verify the signature of xattr block*/ | ||
770 | if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, | ||
771 | strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { | ||
772 | ret = -EFAULT; | ||
773 | goto cleanup; | ||
774 | } | ||
775 | |||
776 | xs->xattr_bh = blk_bh; | ||
777 | xb = (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
778 | |||
779 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { | ||
780 | xs->header = &xb->xb_attrs.xb_header; | ||
781 | xs->base = (void *)xs->header; | ||
782 | xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; | ||
783 | xs->here = xs->header->xh_entries; | ||
784 | |||
785 | ret = ocfs2_xattr_find_entry(name_index, name, xs); | ||
786 | } else | ||
787 | ret = ocfs2_xattr_index_block_find(inode, blk_bh, | ||
788 | name_index, | ||
789 | name, xs); | ||
790 | |||
791 | if (ret) | ||
792 | goto cleanup; | ||
793 | size = le64_to_cpu(xs->here->xe_value_size); | ||
794 | if (buffer) { | ||
795 | ret = -ERANGE; | ||
796 | if (size > buffer_size) | ||
797 | goto cleanup; | ||
798 | |||
799 | name_offset = le16_to_cpu(xs->here->xe_name_offset); | ||
800 | name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len); | ||
801 | i = xs->here - xs->header->xh_entries; | ||
802 | |||
803 | if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { | ||
804 | ret = ocfs2_xattr_bucket_get_name_value(inode, | ||
805 | xs->bucket.xh, | ||
806 | i, | ||
807 | &block_off, | ||
808 | &name_offset); | ||
809 | xs->base = xs->bucket.bhs[block_off]->b_data; | ||
810 | } | ||
811 | if (ocfs2_xattr_is_local(xs->here)) { | ||
812 | memcpy(buffer, (void *)xs->base + | ||
813 | name_offset + name_len, size); | ||
814 | } else { | ||
815 | xv = (struct ocfs2_xattr_value_root *) | ||
816 | (xs->base + name_offset + name_len); | ||
817 | ret = ocfs2_xattr_get_value_outside(inode, xv, | ||
818 | buffer, size); | ||
819 | if (ret < 0) { | ||
820 | mlog_errno(ret); | ||
821 | goto cleanup; | ||
822 | } | ||
823 | } | ||
824 | } | ||
825 | ret = size; | ||
826 | cleanup: | ||
827 | for (i = 0; i < OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET; i++) | ||
828 | brelse(xs->bucket.bhs[i]); | ||
829 | memset(&xs->bucket, 0, sizeof(xs->bucket)); | ||
830 | |||
831 | brelse(blk_bh); | ||
832 | return ret; | ||
833 | } | ||
834 | |||
835 | /* ocfs2_xattr_get() | ||
836 | * | ||
837 | * Copy an extended attribute into the buffer provided. | ||
838 | * Buffer is NULL to compute the size of buffer required. | ||
839 | */ | ||
840 | int ocfs2_xattr_get(struct inode *inode, | ||
841 | int name_index, | ||
842 | const char *name, | ||
843 | void *buffer, | ||
844 | size_t buffer_size) | ||
845 | { | ||
846 | int ret; | ||
847 | struct ocfs2_dinode *di = NULL; | ||
848 | struct buffer_head *di_bh = NULL; | ||
849 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
850 | struct ocfs2_xattr_search xis = { | ||
851 | .not_found = -ENODATA, | ||
852 | }; | ||
853 | struct ocfs2_xattr_search xbs = { | ||
854 | .not_found = -ENODATA, | ||
855 | }; | ||
856 | |||
857 | if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) | ||
858 | return -EOPNOTSUPP; | ||
859 | |||
860 | if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) | ||
861 | ret = -ENODATA; | ||
862 | |||
863 | ret = ocfs2_inode_lock(inode, &di_bh, 0); | ||
864 | if (ret < 0) { | ||
865 | mlog_errno(ret); | ||
866 | return ret; | ||
867 | } | ||
868 | xis.inode_bh = xbs.inode_bh = di_bh; | ||
869 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
870 | |||
871 | down_read(&oi->ip_xattr_sem); | ||
872 | ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, | ||
873 | buffer_size, &xis); | ||
874 | if (ret == -ENODATA) | ||
875 | ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, | ||
876 | buffer_size, &xbs); | ||
877 | up_read(&oi->ip_xattr_sem); | ||
878 | ocfs2_inode_unlock(inode, 0); | ||
879 | |||
880 | brelse(di_bh); | ||
881 | |||
882 | return ret; | ||
883 | } | ||
884 | |||
885 | static int __ocfs2_xattr_set_value_outside(struct inode *inode, | ||
886 | struct ocfs2_xattr_value_root *xv, | ||
887 | const void *value, | ||
888 | int value_len) | ||
889 | { | ||
890 | int ret = 0, i, cp_len, credits; | ||
891 | u16 blocksize = inode->i_sb->s_blocksize; | ||
892 | u32 p_cluster, num_clusters; | ||
893 | u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
894 | u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); | ||
895 | u64 blkno; | ||
896 | struct buffer_head *bh = NULL; | ||
897 | handle_t *handle; | ||
898 | |||
899 | BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); | ||
900 | |||
901 | credits = clusters * bpc; | ||
902 | handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), credits); | ||
903 | if (IS_ERR(handle)) { | ||
904 | ret = PTR_ERR(handle); | ||
905 | mlog_errno(ret); | ||
906 | goto out; | ||
907 | } | ||
908 | |||
909 | while (cpos < clusters) { | ||
910 | ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, | ||
911 | &num_clusters, &xv->xr_list); | ||
912 | if (ret) { | ||
913 | mlog_errno(ret); | ||
914 | goto out_commit; | ||
915 | } | ||
916 | |||
917 | blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); | ||
918 | |||
919 | for (i = 0; i < num_clusters * bpc; i++, blkno++) { | ||
920 | ret = ocfs2_read_block(inode, blkno, &bh); | ||
921 | if (ret) { | ||
922 | mlog_errno(ret); | ||
923 | goto out_commit; | ||
924 | } | ||
925 | |||
926 | ret = ocfs2_journal_access(handle, | ||
927 | inode, | ||
928 | bh, | ||
929 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
930 | if (ret < 0) { | ||
931 | mlog_errno(ret); | ||
932 | goto out_commit; | ||
933 | } | ||
934 | |||
935 | cp_len = value_len > blocksize ? blocksize : value_len; | ||
936 | memcpy(bh->b_data, value, cp_len); | ||
937 | value_len -= cp_len; | ||
938 | value += cp_len; | ||
939 | if (cp_len < blocksize) | ||
940 | memset(bh->b_data + cp_len, 0, | ||
941 | blocksize - cp_len); | ||
942 | |||
943 | ret = ocfs2_journal_dirty(handle, bh); | ||
944 | if (ret < 0) { | ||
945 | mlog_errno(ret); | ||
946 | goto out_commit; | ||
947 | } | ||
948 | brelse(bh); | ||
949 | bh = NULL; | ||
950 | |||
951 | /* | ||
952 | * XXX: do we need to empty all the following | ||
953 | * blocks in this cluster? | ||
954 | */ | ||
955 | if (!value_len) | ||
956 | break; | ||
957 | } | ||
958 | cpos += num_clusters; | ||
959 | } | ||
960 | out_commit: | ||
961 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
962 | out: | ||
963 | brelse(bh); | ||
964 | |||
965 | return ret; | ||
966 | } | ||
967 | |||
968 | static int ocfs2_xattr_cleanup(struct inode *inode, | ||
969 | struct ocfs2_xattr_info *xi, | ||
970 | struct ocfs2_xattr_search *xs, | ||
971 | size_t offs) | ||
972 | { | ||
973 | handle_t *handle = NULL; | ||
974 | int ret = 0; | ||
975 | size_t name_len = strlen(xi->name); | ||
976 | void *val = xs->base + offs; | ||
977 | size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; | ||
978 | |||
979 | handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), | ||
980 | OCFS2_XATTR_BLOCK_UPDATE_CREDITS); | ||
981 | if (IS_ERR(handle)) { | ||
982 | ret = PTR_ERR(handle); | ||
983 | mlog_errno(ret); | ||
984 | goto out; | ||
985 | } | ||
986 | ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, | ||
987 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
988 | if (ret) { | ||
989 | mlog_errno(ret); | ||
990 | goto out_commit; | ||
991 | } | ||
992 | /* Decrease xattr count */ | ||
993 | le16_add_cpu(&xs->header->xh_count, -1); | ||
994 | /* Remove the xattr entry and tree root which has already be set*/ | ||
995 | memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry)); | ||
996 | memset(val, 0, size); | ||
997 | |||
998 | ret = ocfs2_journal_dirty(handle, xs->xattr_bh); | ||
999 | if (ret < 0) | ||
1000 | mlog_errno(ret); | ||
1001 | out_commit: | ||
1002 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
1003 | out: | ||
1004 | return ret; | ||
1005 | } | ||
1006 | |||
1007 | static int ocfs2_xattr_update_entry(struct inode *inode, | ||
1008 | struct ocfs2_xattr_info *xi, | ||
1009 | struct ocfs2_xattr_search *xs, | ||
1010 | size_t offs) | ||
1011 | { | ||
1012 | handle_t *handle = NULL; | ||
1013 | int ret = 0; | ||
1014 | |||
1015 | handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), | ||
1016 | OCFS2_XATTR_BLOCK_UPDATE_CREDITS); | ||
1017 | if (IS_ERR(handle)) { | ||
1018 | ret = PTR_ERR(handle); | ||
1019 | mlog_errno(ret); | ||
1020 | goto out; | ||
1021 | } | ||
1022 | ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, | ||
1023 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1024 | if (ret) { | ||
1025 | mlog_errno(ret); | ||
1026 | goto out_commit; | ||
1027 | } | ||
1028 | |||
1029 | xs->here->xe_name_offset = cpu_to_le16(offs); | ||
1030 | xs->here->xe_value_size = cpu_to_le64(xi->value_len); | ||
1031 | if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE) | ||
1032 | ocfs2_xattr_set_local(xs->here, 1); | ||
1033 | else | ||
1034 | ocfs2_xattr_set_local(xs->here, 0); | ||
1035 | ocfs2_xattr_hash_entry(inode, xs->header, xs->here); | ||
1036 | |||
1037 | ret = ocfs2_journal_dirty(handle, xs->xattr_bh); | ||
1038 | if (ret < 0) | ||
1039 | mlog_errno(ret); | ||
1040 | out_commit: | ||
1041 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
1042 | out: | ||
1043 | return ret; | ||
1044 | } | ||
1045 | |||
1046 | /* | ||
1047 | * ocfs2_xattr_set_value_outside() | ||
1048 | * | ||
1049 | * Set large size value in B tree. | ||
1050 | */ | ||
1051 | static int ocfs2_xattr_set_value_outside(struct inode *inode, | ||
1052 | struct ocfs2_xattr_info *xi, | ||
1053 | struct ocfs2_xattr_search *xs, | ||
1054 | size_t offs) | ||
1055 | { | ||
1056 | size_t name_len = strlen(xi->name); | ||
1057 | void *val = xs->base + offs; | ||
1058 | struct ocfs2_xattr_value_root *xv = NULL; | ||
1059 | size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; | ||
1060 | int ret = 0; | ||
1061 | |||
1062 | memset(val, 0, size); | ||
1063 | memcpy(val, xi->name, name_len); | ||
1064 | xv = (struct ocfs2_xattr_value_root *) | ||
1065 | (val + OCFS2_XATTR_SIZE(name_len)); | ||
1066 | xv->xr_clusters = 0; | ||
1067 | xv->xr_last_eb_blk = 0; | ||
1068 | xv->xr_list.l_tree_depth = 0; | ||
1069 | xv->xr_list.l_count = cpu_to_le16(1); | ||
1070 | xv->xr_list.l_next_free_rec = 0; | ||
1071 | |||
1072 | ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv, | ||
1073 | xi->value_len); | ||
1074 | if (ret < 0) { | ||
1075 | mlog_errno(ret); | ||
1076 | return ret; | ||
1077 | } | ||
1078 | ret = __ocfs2_xattr_set_value_outside(inode, xv, xi->value, | ||
1079 | xi->value_len); | ||
1080 | if (ret < 0) { | ||
1081 | mlog_errno(ret); | ||
1082 | return ret; | ||
1083 | } | ||
1084 | ret = ocfs2_xattr_update_entry(inode, xi, xs, offs); | ||
1085 | if (ret < 0) | ||
1086 | mlog_errno(ret); | ||
1087 | |||
1088 | return ret; | ||
1089 | } | ||
1090 | |||
1091 | /* | ||
1092 | * ocfs2_xattr_set_entry_local() | ||
1093 | * | ||
1094 | * Set, replace or remove extended attribute in local. | ||
1095 | */ | ||
1096 | static void ocfs2_xattr_set_entry_local(struct inode *inode, | ||
1097 | struct ocfs2_xattr_info *xi, | ||
1098 | struct ocfs2_xattr_search *xs, | ||
1099 | struct ocfs2_xattr_entry *last, | ||
1100 | size_t min_offs) | ||
1101 | { | ||
1102 | size_t name_len = strlen(xi->name); | ||
1103 | int i; | ||
1104 | |||
1105 | if (xi->value && xs->not_found) { | ||
1106 | /* Insert the new xattr entry. */ | ||
1107 | le16_add_cpu(&xs->header->xh_count, 1); | ||
1108 | ocfs2_xattr_set_type(last, xi->name_index); | ||
1109 | ocfs2_xattr_set_local(last, 1); | ||
1110 | last->xe_name_len = name_len; | ||
1111 | } else { | ||
1112 | void *first_val; | ||
1113 | void *val; | ||
1114 | size_t offs, size; | ||
1115 | |||
1116 | first_val = xs->base + min_offs; | ||
1117 | offs = le16_to_cpu(xs->here->xe_name_offset); | ||
1118 | val = xs->base + offs; | ||
1119 | |||
1120 | if (le64_to_cpu(xs->here->xe_value_size) > | ||
1121 | OCFS2_XATTR_INLINE_SIZE) | ||
1122 | size = OCFS2_XATTR_SIZE(name_len) + | ||
1123 | OCFS2_XATTR_ROOT_SIZE; | ||
1124 | else | ||
1125 | size = OCFS2_XATTR_SIZE(name_len) + | ||
1126 | OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size)); | ||
1127 | |||
1128 | if (xi->value && size == OCFS2_XATTR_SIZE(name_len) + | ||
1129 | OCFS2_XATTR_SIZE(xi->value_len)) { | ||
1130 | /* The old and the new value have the | ||
1131 | same size. Just replace the value. */ | ||
1132 | ocfs2_xattr_set_local(xs->here, 1); | ||
1133 | xs->here->xe_value_size = cpu_to_le64(xi->value_len); | ||
1134 | /* Clear value bytes. */ | ||
1135 | memset(val + OCFS2_XATTR_SIZE(name_len), | ||
1136 | 0, | ||
1137 | OCFS2_XATTR_SIZE(xi->value_len)); | ||
1138 | memcpy(val + OCFS2_XATTR_SIZE(name_len), | ||
1139 | xi->value, | ||
1140 | xi->value_len); | ||
1141 | return; | ||
1142 | } | ||
1143 | /* Remove the old name+value. */ | ||
1144 | memmove(first_val + size, first_val, val - first_val); | ||
1145 | memset(first_val, 0, size); | ||
1146 | xs->here->xe_name_hash = 0; | ||
1147 | xs->here->xe_name_offset = 0; | ||
1148 | ocfs2_xattr_set_local(xs->here, 1); | ||
1149 | xs->here->xe_value_size = 0; | ||
1150 | |||
1151 | min_offs += size; | ||
1152 | |||
1153 | /* Adjust all value offsets. */ | ||
1154 | last = xs->header->xh_entries; | ||
1155 | for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) { | ||
1156 | size_t o = le16_to_cpu(last->xe_name_offset); | ||
1157 | |||
1158 | if (o < offs) | ||
1159 | last->xe_name_offset = cpu_to_le16(o + size); | ||
1160 | last += 1; | ||
1161 | } | ||
1162 | |||
1163 | if (!xi->value) { | ||
1164 | /* Remove the old entry. */ | ||
1165 | last -= 1; | ||
1166 | memmove(xs->here, xs->here + 1, | ||
1167 | (void *)last - (void *)xs->here); | ||
1168 | memset(last, 0, sizeof(struct ocfs2_xattr_entry)); | ||
1169 | le16_add_cpu(&xs->header->xh_count, -1); | ||
1170 | } | ||
1171 | } | ||
1172 | if (xi->value) { | ||
1173 | /* Insert the new name+value. */ | ||
1174 | size_t size = OCFS2_XATTR_SIZE(name_len) + | ||
1175 | OCFS2_XATTR_SIZE(xi->value_len); | ||
1176 | void *val = xs->base + min_offs - size; | ||
1177 | |||
1178 | xs->here->xe_name_offset = cpu_to_le16(min_offs - size); | ||
1179 | memset(val, 0, size); | ||
1180 | memcpy(val, xi->name, name_len); | ||
1181 | memcpy(val + OCFS2_XATTR_SIZE(name_len), | ||
1182 | xi->value, | ||
1183 | xi->value_len); | ||
1184 | xs->here->xe_value_size = cpu_to_le64(xi->value_len); | ||
1185 | ocfs2_xattr_set_local(xs->here, 1); | ||
1186 | ocfs2_xattr_hash_entry(inode, xs->header, xs->here); | ||
1187 | } | ||
1188 | |||
1189 | return; | ||
1190 | } | ||
1191 | |||
1192 | /* | ||
1193 | * ocfs2_xattr_set_entry() | ||
1194 | * | ||
1195 | * Set extended attribute entry into inode or block. | ||
1196 | * | ||
1197 | * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE, | ||
1198 | * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(), | ||
1199 | * then set value in B tree with set_value_outside(). | ||
1200 | */ | ||
1201 | static int ocfs2_xattr_set_entry(struct inode *inode, | ||
1202 | struct ocfs2_xattr_info *xi, | ||
1203 | struct ocfs2_xattr_search *xs, | ||
1204 | int flag) | ||
1205 | { | ||
1206 | struct ocfs2_xattr_entry *last; | ||
1207 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
1208 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
1209 | size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name); | ||
1210 | size_t size_l = 0; | ||
1211 | handle_t *handle = NULL; | ||
1212 | int free, i, ret; | ||
1213 | struct ocfs2_xattr_info xi_l = { | ||
1214 | .name_index = xi->name_index, | ||
1215 | .name = xi->name, | ||
1216 | .value = xi->value, | ||
1217 | .value_len = xi->value_len, | ||
1218 | }; | ||
1219 | |||
1220 | /* Compute min_offs, last and free space. */ | ||
1221 | last = xs->header->xh_entries; | ||
1222 | |||
1223 | for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) { | ||
1224 | size_t offs = le16_to_cpu(last->xe_name_offset); | ||
1225 | if (offs < min_offs) | ||
1226 | min_offs = offs; | ||
1227 | last += 1; | ||
1228 | } | ||
1229 | |||
1230 | free = min_offs - ((void *)last - xs->base) - sizeof(__u32); | ||
1231 | if (free < 0) | ||
1232 | return -EFAULT; | ||
1233 | |||
1234 | if (!xs->not_found) { | ||
1235 | size_t size = 0; | ||
1236 | if (ocfs2_xattr_is_local(xs->here)) | ||
1237 | size = OCFS2_XATTR_SIZE(name_len) + | ||
1238 | OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size)); | ||
1239 | else | ||
1240 | size = OCFS2_XATTR_SIZE(name_len) + | ||
1241 | OCFS2_XATTR_ROOT_SIZE; | ||
1242 | free += (size + sizeof(struct ocfs2_xattr_entry)); | ||
1243 | } | ||
1244 | /* Check free space in inode or block */ | ||
1245 | if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) { | ||
1246 | if (free < sizeof(struct ocfs2_xattr_entry) + | ||
1247 | OCFS2_XATTR_SIZE(name_len) + | ||
1248 | OCFS2_XATTR_ROOT_SIZE) { | ||
1249 | ret = -ENOSPC; | ||
1250 | goto out; | ||
1251 | } | ||
1252 | size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; | ||
1253 | xi_l.value = (void *)&def_xv; | ||
1254 | xi_l.value_len = OCFS2_XATTR_ROOT_SIZE; | ||
1255 | } else if (xi->value) { | ||
1256 | if (free < sizeof(struct ocfs2_xattr_entry) + | ||
1257 | OCFS2_XATTR_SIZE(name_len) + | ||
1258 | OCFS2_XATTR_SIZE(xi->value_len)) { | ||
1259 | ret = -ENOSPC; | ||
1260 | goto out; | ||
1261 | } | ||
1262 | } | ||
1263 | |||
1264 | if (!xs->not_found) { | ||
1265 | /* For existing extended attribute */ | ||
1266 | size_t size = OCFS2_XATTR_SIZE(name_len) + | ||
1267 | OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size)); | ||
1268 | size_t offs = le16_to_cpu(xs->here->xe_name_offset); | ||
1269 | void *val = xs->base + offs; | ||
1270 | |||
1271 | if (ocfs2_xattr_is_local(xs->here) && size == size_l) { | ||
1272 | /* Replace existing local xattr with tree root */ | ||
1273 | ret = ocfs2_xattr_set_value_outside(inode, xi, xs, | ||
1274 | offs); | ||
1275 | if (ret < 0) | ||
1276 | mlog_errno(ret); | ||
1277 | goto out; | ||
1278 | } else if (!ocfs2_xattr_is_local(xs->here)) { | ||
1279 | /* For existing xattr which has value outside */ | ||
1280 | struct ocfs2_xattr_value_root *xv = NULL; | ||
1281 | xv = (struct ocfs2_xattr_value_root *)(val + | ||
1282 | OCFS2_XATTR_SIZE(name_len)); | ||
1283 | |||
1284 | if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) { | ||
1285 | /* | ||
1286 | * If new value need set outside also, | ||
1287 | * first truncate old value to new value, | ||
1288 | * then set new value with set_value_outside(). | ||
1289 | */ | ||
1290 | ret = ocfs2_xattr_value_truncate(inode, | ||
1291 | xs->xattr_bh, | ||
1292 | xv, | ||
1293 | xi->value_len); | ||
1294 | if (ret < 0) { | ||
1295 | mlog_errno(ret); | ||
1296 | goto out; | ||
1297 | } | ||
1298 | |||
1299 | ret = __ocfs2_xattr_set_value_outside(inode, | ||
1300 | xv, | ||
1301 | xi->value, | ||
1302 | xi->value_len); | ||
1303 | if (ret < 0) { | ||
1304 | mlog_errno(ret); | ||
1305 | goto out; | ||
1306 | } | ||
1307 | |||
1308 | ret = ocfs2_xattr_update_entry(inode, | ||
1309 | xi, | ||
1310 | xs, | ||
1311 | offs); | ||
1312 | if (ret < 0) | ||
1313 | mlog_errno(ret); | ||
1314 | goto out; | ||
1315 | } else { | ||
1316 | /* | ||
1317 | * If new value need set in local, | ||
1318 | * just trucate old value to zero. | ||
1319 | */ | ||
1320 | ret = ocfs2_xattr_value_truncate(inode, | ||
1321 | xs->xattr_bh, | ||
1322 | xv, | ||
1323 | 0); | ||
1324 | if (ret < 0) | ||
1325 | mlog_errno(ret); | ||
1326 | } | ||
1327 | } | ||
1328 | } | ||
1329 | |||
1330 | handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), | ||
1331 | OCFS2_INODE_UPDATE_CREDITS); | ||
1332 | if (IS_ERR(handle)) { | ||
1333 | ret = PTR_ERR(handle); | ||
1334 | mlog_errno(ret); | ||
1335 | goto out; | ||
1336 | } | ||
1337 | |||
1338 | ret = ocfs2_journal_access(handle, inode, xs->inode_bh, | ||
1339 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1340 | if (ret) { | ||
1341 | mlog_errno(ret); | ||
1342 | goto out_commit; | ||
1343 | } | ||
1344 | |||
1345 | if (!(flag & OCFS2_INLINE_XATTR_FL)) { | ||
1346 | /* set extended attribute in external block. */ | ||
1347 | ret = ocfs2_extend_trans(handle, | ||
1348 | OCFS2_INODE_UPDATE_CREDITS + | ||
1349 | OCFS2_XATTR_BLOCK_UPDATE_CREDITS); | ||
1350 | if (ret) { | ||
1351 | mlog_errno(ret); | ||
1352 | goto out_commit; | ||
1353 | } | ||
1354 | ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, | ||
1355 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1356 | if (ret) { | ||
1357 | mlog_errno(ret); | ||
1358 | goto out_commit; | ||
1359 | } | ||
1360 | } | ||
1361 | |||
1362 | /* | ||
1363 | * Set value in local, include set tree root in local. | ||
1364 | * This is the first step for value size >INLINE_SIZE. | ||
1365 | */ | ||
1366 | ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs); | ||
1367 | |||
1368 | if (!(flag & OCFS2_INLINE_XATTR_FL)) { | ||
1369 | ret = ocfs2_journal_dirty(handle, xs->xattr_bh); | ||
1370 | if (ret < 0) { | ||
1371 | mlog_errno(ret); | ||
1372 | goto out_commit; | ||
1373 | } | ||
1374 | } | ||
1375 | |||
1376 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) && | ||
1377 | (flag & OCFS2_INLINE_XATTR_FL)) { | ||
1378 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1379 | unsigned int xattrsize = osb->s_xattr_inline_size; | ||
1380 | |||
1381 | /* | ||
1382 | * Adjust extent record count or inline data size | ||
1383 | * to reserve space for extended attribute. | ||
1384 | */ | ||
1385 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
1386 | struct ocfs2_inline_data *idata = &di->id2.i_data; | ||
1387 | le16_add_cpu(&idata->id_count, -xattrsize); | ||
1388 | } else if (!(ocfs2_inode_is_fast_symlink(inode))) { | ||
1389 | struct ocfs2_extent_list *el = &di->id2.i_list; | ||
1390 | le16_add_cpu(&el->l_count, -(xattrsize / | ||
1391 | sizeof(struct ocfs2_extent_rec))); | ||
1392 | } | ||
1393 | di->i_xattr_inline_size = cpu_to_le16(xattrsize); | ||
1394 | } | ||
1395 | /* Update xattr flag */ | ||
1396 | spin_lock(&oi->ip_lock); | ||
1397 | oi->ip_dyn_features |= flag; | ||
1398 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); | ||
1399 | spin_unlock(&oi->ip_lock); | ||
1400 | /* Update inode ctime */ | ||
1401 | inode->i_ctime = CURRENT_TIME; | ||
1402 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | ||
1403 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | ||
1404 | |||
1405 | ret = ocfs2_journal_dirty(handle, xs->inode_bh); | ||
1406 | if (ret < 0) | ||
1407 | mlog_errno(ret); | ||
1408 | |||
1409 | out_commit: | ||
1410 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
1411 | |||
1412 | if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) { | ||
1413 | /* | ||
1414 | * Set value outside in B tree. | ||
1415 | * This is the second step for value size > INLINE_SIZE. | ||
1416 | */ | ||
1417 | size_t offs = le16_to_cpu(xs->here->xe_name_offset); | ||
1418 | ret = ocfs2_xattr_set_value_outside(inode, xi, xs, offs); | ||
1419 | if (ret < 0) { | ||
1420 | int ret2; | ||
1421 | |||
1422 | mlog_errno(ret); | ||
1423 | /* | ||
1424 | * If set value outside failed, we have to clean | ||
1425 | * the junk tree root we have already set in local. | ||
1426 | */ | ||
1427 | ret2 = ocfs2_xattr_cleanup(inode, xi, xs, offs); | ||
1428 | if (ret2 < 0) | ||
1429 | mlog_errno(ret2); | ||
1430 | } | ||
1431 | } | ||
1432 | out: | ||
1433 | return ret; | ||
1434 | |||
1435 | } | ||
1436 | |||
1437 | static int ocfs2_remove_value_outside(struct inode*inode, | ||
1438 | struct buffer_head *bh, | ||
1439 | struct ocfs2_xattr_header *header) | ||
1440 | { | ||
1441 | int ret = 0, i; | ||
1442 | |||
1443 | for (i = 0; i < le16_to_cpu(header->xh_count); i++) { | ||
1444 | struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; | ||
1445 | |||
1446 | if (!ocfs2_xattr_is_local(entry)) { | ||
1447 | struct ocfs2_xattr_value_root *xv; | ||
1448 | void *val; | ||
1449 | |||
1450 | val = (void *)header + | ||
1451 | le16_to_cpu(entry->xe_name_offset); | ||
1452 | xv = (struct ocfs2_xattr_value_root *) | ||
1453 | (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); | ||
1454 | ret = ocfs2_xattr_value_truncate(inode, bh, xv, 0); | ||
1455 | if (ret < 0) { | ||
1456 | mlog_errno(ret); | ||
1457 | return ret; | ||
1458 | } | ||
1459 | } | ||
1460 | } | ||
1461 | |||
1462 | return ret; | ||
1463 | } | ||
1464 | |||
1465 | static int ocfs2_xattr_ibody_remove(struct inode *inode, | ||
1466 | struct buffer_head *di_bh) | ||
1467 | { | ||
1468 | |||
1469 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
1470 | struct ocfs2_xattr_header *header; | ||
1471 | int ret; | ||
1472 | |||
1473 | header = (struct ocfs2_xattr_header *) | ||
1474 | ((void *)di + inode->i_sb->s_blocksize - | ||
1475 | le16_to_cpu(di->i_xattr_inline_size)); | ||
1476 | |||
1477 | ret = ocfs2_remove_value_outside(inode, di_bh, header); | ||
1478 | |||
1479 | return ret; | ||
1480 | } | ||
1481 | |||
1482 | static int ocfs2_xattr_block_remove(struct inode *inode, | ||
1483 | struct buffer_head *blk_bh) | ||
1484 | { | ||
1485 | struct ocfs2_xattr_block *xb; | ||
1486 | int ret = 0; | ||
1487 | |||
1488 | xb = (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
1489 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { | ||
1490 | struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); | ||
1491 | ret = ocfs2_remove_value_outside(inode, blk_bh, header); | ||
1492 | } else | ||
1493 | ret = ocfs2_delete_xattr_index_block(inode, blk_bh); | ||
1494 | |||
1495 | return ret; | ||
1496 | } | ||
1497 | |||
1498 | static int ocfs2_xattr_free_block(struct inode *inode, | ||
1499 | u64 block) | ||
1500 | { | ||
1501 | struct inode *xb_alloc_inode; | ||
1502 | struct buffer_head *xb_alloc_bh = NULL; | ||
1503 | struct buffer_head *blk_bh = NULL; | ||
1504 | struct ocfs2_xattr_block *xb; | ||
1505 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1506 | handle_t *handle; | ||
1507 | int ret = 0; | ||
1508 | u64 blk, bg_blkno; | ||
1509 | u16 bit; | ||
1510 | |||
1511 | ret = ocfs2_read_block(inode, block, &blk_bh); | ||
1512 | if (ret < 0) { | ||
1513 | mlog_errno(ret); | ||
1514 | goto out; | ||
1515 | } | ||
1516 | |||
1517 | /*Verify the signature of xattr block*/ | ||
1518 | if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, | ||
1519 | strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { | ||
1520 | ret = -EFAULT; | ||
1521 | goto out; | ||
1522 | } | ||
1523 | |||
1524 | ret = ocfs2_xattr_block_remove(inode, blk_bh); | ||
1525 | if (ret < 0) { | ||
1526 | mlog_errno(ret); | ||
1527 | goto out; | ||
1528 | } | ||
1529 | |||
1530 | xb = (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
1531 | blk = le64_to_cpu(xb->xb_blkno); | ||
1532 | bit = le16_to_cpu(xb->xb_suballoc_bit); | ||
1533 | bg_blkno = ocfs2_which_suballoc_group(blk, bit); | ||
1534 | |||
1535 | xb_alloc_inode = ocfs2_get_system_file_inode(osb, | ||
1536 | EXTENT_ALLOC_SYSTEM_INODE, | ||
1537 | le16_to_cpu(xb->xb_suballoc_slot)); | ||
1538 | if (!xb_alloc_inode) { | ||
1539 | ret = -ENOMEM; | ||
1540 | mlog_errno(ret); | ||
1541 | goto out; | ||
1542 | } | ||
1543 | mutex_lock(&xb_alloc_inode->i_mutex); | ||
1544 | |||
1545 | ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1); | ||
1546 | if (ret < 0) { | ||
1547 | mlog_errno(ret); | ||
1548 | goto out_mutex; | ||
1549 | } | ||
1550 | |||
1551 | handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); | ||
1552 | if (IS_ERR(handle)) { | ||
1553 | ret = PTR_ERR(handle); | ||
1554 | mlog_errno(ret); | ||
1555 | goto out_unlock; | ||
1556 | } | ||
1557 | |||
1558 | ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh, | ||
1559 | bit, bg_blkno, 1); | ||
1560 | if (ret < 0) | ||
1561 | mlog_errno(ret); | ||
1562 | |||
1563 | ocfs2_commit_trans(osb, handle); | ||
1564 | out_unlock: | ||
1565 | ocfs2_inode_unlock(xb_alloc_inode, 1); | ||
1566 | brelse(xb_alloc_bh); | ||
1567 | out_mutex: | ||
1568 | mutex_unlock(&xb_alloc_inode->i_mutex); | ||
1569 | iput(xb_alloc_inode); | ||
1570 | out: | ||
1571 | brelse(blk_bh); | ||
1572 | return ret; | ||
1573 | } | ||
1574 | |||
1575 | /* | ||
1576 | * ocfs2_xattr_remove() | ||
1577 | * | ||
1578 | * Free extended attribute resources associated with this inode. | ||
1579 | */ | ||
1580 | int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) | ||
1581 | { | ||
1582 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
1583 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
1584 | handle_t *handle; | ||
1585 | int ret; | ||
1586 | |||
1587 | if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) | ||
1588 | return 0; | ||
1589 | |||
1590 | if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) | ||
1591 | return 0; | ||
1592 | |||
1593 | if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { | ||
1594 | ret = ocfs2_xattr_ibody_remove(inode, di_bh); | ||
1595 | if (ret < 0) { | ||
1596 | mlog_errno(ret); | ||
1597 | goto out; | ||
1598 | } | ||
1599 | } | ||
1600 | |||
1601 | if (di->i_xattr_loc) { | ||
1602 | ret = ocfs2_xattr_free_block(inode, | ||
1603 | le64_to_cpu(di->i_xattr_loc)); | ||
1604 | if (ret < 0) { | ||
1605 | mlog_errno(ret); | ||
1606 | goto out; | ||
1607 | } | ||
1608 | } | ||
1609 | |||
1610 | handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), | ||
1611 | OCFS2_INODE_UPDATE_CREDITS); | ||
1612 | if (IS_ERR(handle)) { | ||
1613 | ret = PTR_ERR(handle); | ||
1614 | mlog_errno(ret); | ||
1615 | goto out; | ||
1616 | } | ||
1617 | ret = ocfs2_journal_access(handle, inode, di_bh, | ||
1618 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1619 | if (ret) { | ||
1620 | mlog_errno(ret); | ||
1621 | goto out_commit; | ||
1622 | } | ||
1623 | |||
1624 | di->i_xattr_loc = 0; | ||
1625 | |||
1626 | spin_lock(&oi->ip_lock); | ||
1627 | oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); | ||
1628 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); | ||
1629 | spin_unlock(&oi->ip_lock); | ||
1630 | |||
1631 | ret = ocfs2_journal_dirty(handle, di_bh); | ||
1632 | if (ret < 0) | ||
1633 | mlog_errno(ret); | ||
1634 | out_commit: | ||
1635 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
1636 | out: | ||
1637 | return ret; | ||
1638 | } | ||
1639 | |||
1640 | static int ocfs2_xattr_has_space_inline(struct inode *inode, | ||
1641 | struct ocfs2_dinode *di) | ||
1642 | { | ||
1643 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
1644 | unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size; | ||
1645 | int free; | ||
1646 | |||
1647 | if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE) | ||
1648 | return 0; | ||
1649 | |||
1650 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
1651 | struct ocfs2_inline_data *idata = &di->id2.i_data; | ||
1652 | free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size); | ||
1653 | } else if (ocfs2_inode_is_fast_symlink(inode)) { | ||
1654 | free = ocfs2_fast_symlink_chars(inode->i_sb) - | ||
1655 | le64_to_cpu(di->i_size); | ||
1656 | } else { | ||
1657 | struct ocfs2_extent_list *el = &di->id2.i_list; | ||
1658 | free = (le16_to_cpu(el->l_count) - | ||
1659 | le16_to_cpu(el->l_next_free_rec)) * | ||
1660 | sizeof(struct ocfs2_extent_rec); | ||
1661 | } | ||
1662 | if (free >= xattrsize) | ||
1663 | return 1; | ||
1664 | |||
1665 | return 0; | ||
1666 | } | ||
1667 | |||
1668 | /* | ||
1669 | * ocfs2_xattr_ibody_find() | ||
1670 | * | ||
1671 | * Find extended attribute in inode block and | ||
1672 | * fill search info into struct ocfs2_xattr_search. | ||
1673 | */ | ||
1674 | static int ocfs2_xattr_ibody_find(struct inode *inode, | ||
1675 | int name_index, | ||
1676 | const char *name, | ||
1677 | struct ocfs2_xattr_search *xs) | ||
1678 | { | ||
1679 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
1680 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
1681 | int ret; | ||
1682 | int has_space = 0; | ||
1683 | |||
1684 | if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) | ||
1685 | return 0; | ||
1686 | |||
1687 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { | ||
1688 | down_read(&oi->ip_alloc_sem); | ||
1689 | has_space = ocfs2_xattr_has_space_inline(inode, di); | ||
1690 | up_read(&oi->ip_alloc_sem); | ||
1691 | if (!has_space) | ||
1692 | return 0; | ||
1693 | } | ||
1694 | |||
1695 | xs->xattr_bh = xs->inode_bh; | ||
1696 | xs->end = (void *)di + inode->i_sb->s_blocksize; | ||
1697 | if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) | ||
1698 | xs->header = (struct ocfs2_xattr_header *) | ||
1699 | (xs->end - le16_to_cpu(di->i_xattr_inline_size)); | ||
1700 | else | ||
1701 | xs->header = (struct ocfs2_xattr_header *) | ||
1702 | (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size); | ||
1703 | xs->base = (void *)xs->header; | ||
1704 | xs->here = xs->header->xh_entries; | ||
1705 | |||
1706 | /* Find the named attribute. */ | ||
1707 | if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { | ||
1708 | ret = ocfs2_xattr_find_entry(name_index, name, xs); | ||
1709 | if (ret && ret != -ENODATA) | ||
1710 | return ret; | ||
1711 | xs->not_found = ret; | ||
1712 | } | ||
1713 | |||
1714 | return 0; | ||
1715 | } | ||
1716 | |||
1717 | /* | ||
1718 | * ocfs2_xattr_ibody_set() | ||
1719 | * | ||
1720 | * Set, replace or remove an extended attribute into inode block. | ||
1721 | * | ||
1722 | */ | ||
1723 | static int ocfs2_xattr_ibody_set(struct inode *inode, | ||
1724 | struct ocfs2_xattr_info *xi, | ||
1725 | struct ocfs2_xattr_search *xs) | ||
1726 | { | ||
1727 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
1728 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
1729 | int ret; | ||
1730 | |||
1731 | if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) | ||
1732 | return -ENOSPC; | ||
1733 | |||
1734 | down_write(&oi->ip_alloc_sem); | ||
1735 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { | ||
1736 | if (!ocfs2_xattr_has_space_inline(inode, di)) { | ||
1737 | ret = -ENOSPC; | ||
1738 | goto out; | ||
1739 | } | ||
1740 | } | ||
1741 | |||
1742 | ret = ocfs2_xattr_set_entry(inode, xi, xs, | ||
1743 | (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL)); | ||
1744 | out: | ||
1745 | up_write(&oi->ip_alloc_sem); | ||
1746 | |||
1747 | return ret; | ||
1748 | } | ||
1749 | |||
1750 | /* | ||
1751 | * ocfs2_xattr_block_find() | ||
1752 | * | ||
1753 | * Find extended attribute in external block and | ||
1754 | * fill search info into struct ocfs2_xattr_search. | ||
1755 | */ | ||
1756 | static int ocfs2_xattr_block_find(struct inode *inode, | ||
1757 | int name_index, | ||
1758 | const char *name, | ||
1759 | struct ocfs2_xattr_search *xs) | ||
1760 | { | ||
1761 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
1762 | struct buffer_head *blk_bh = NULL; | ||
1763 | struct ocfs2_xattr_block *xb; | ||
1764 | int ret = 0; | ||
1765 | |||
1766 | if (!di->i_xattr_loc) | ||
1767 | return ret; | ||
1768 | |||
1769 | ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); | ||
1770 | if (ret < 0) { | ||
1771 | mlog_errno(ret); | ||
1772 | return ret; | ||
1773 | } | ||
1774 | /*Verify the signature of xattr block*/ | ||
1775 | if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, | ||
1776 | strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { | ||
1777 | ret = -EFAULT; | ||
1778 | goto cleanup; | ||
1779 | } | ||
1780 | |||
1781 | xs->xattr_bh = blk_bh; | ||
1782 | xb = (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
1783 | |||
1784 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { | ||
1785 | xs->header = &xb->xb_attrs.xb_header; | ||
1786 | xs->base = (void *)xs->header; | ||
1787 | xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; | ||
1788 | xs->here = xs->header->xh_entries; | ||
1789 | |||
1790 | ret = ocfs2_xattr_find_entry(name_index, name, xs); | ||
1791 | } else | ||
1792 | ret = ocfs2_xattr_index_block_find(inode, blk_bh, | ||
1793 | name_index, | ||
1794 | name, xs); | ||
1795 | |||
1796 | if (ret && ret != -ENODATA) { | ||
1797 | xs->xattr_bh = NULL; | ||
1798 | goto cleanup; | ||
1799 | } | ||
1800 | xs->not_found = ret; | ||
1801 | return 0; | ||
1802 | cleanup: | ||
1803 | brelse(blk_bh); | ||
1804 | |||
1805 | return ret; | ||
1806 | } | ||
1807 | |||
1808 | /* | ||
1809 | * When all the xattrs are deleted from index btree, the ocfs2_xattr_tree | ||
1810 | * will be erased and ocfs2_xattr_block will have its ocfs2_xattr_header | ||
1811 | * re-initialized. | ||
1812 | */ | ||
1813 | static int ocfs2_restore_xattr_block(struct inode *inode, | ||
1814 | struct ocfs2_xattr_search *xs) | ||
1815 | { | ||
1816 | int ret; | ||
1817 | handle_t *handle; | ||
1818 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1819 | struct ocfs2_xattr_block *xb = | ||
1820 | (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; | ||
1821 | struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; | ||
1822 | u16 xb_flags = le16_to_cpu(xb->xb_flags); | ||
1823 | |||
1824 | BUG_ON(!(xb_flags & OCFS2_XATTR_INDEXED) || | ||
1825 | le16_to_cpu(el->l_next_free_rec) != 0); | ||
1826 | |||
1827 | handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_UPDATE_CREDITS); | ||
1828 | if (IS_ERR(handle)) { | ||
1829 | ret = PTR_ERR(handle); | ||
1830 | handle = NULL; | ||
1831 | goto out; | ||
1832 | } | ||
1833 | |||
1834 | ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, | ||
1835 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1836 | if (ret < 0) { | ||
1837 | mlog_errno(ret); | ||
1838 | goto out_commit; | ||
1839 | } | ||
1840 | |||
1841 | memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - | ||
1842 | offsetof(struct ocfs2_xattr_block, xb_attrs)); | ||
1843 | |||
1844 | xb->xb_flags = cpu_to_le16(xb_flags & ~OCFS2_XATTR_INDEXED); | ||
1845 | |||
1846 | ocfs2_journal_dirty(handle, xs->xattr_bh); | ||
1847 | |||
1848 | out_commit: | ||
1849 | ocfs2_commit_trans(osb, handle); | ||
1850 | out: | ||
1851 | return ret; | ||
1852 | } | ||
1853 | |||
1854 | /* | ||
1855 | * ocfs2_xattr_block_set() | ||
1856 | * | ||
1857 | * Set, replace or remove an extended attribute into external block. | ||
1858 | * | ||
1859 | */ | ||
1860 | static int ocfs2_xattr_block_set(struct inode *inode, | ||
1861 | struct ocfs2_xattr_info *xi, | ||
1862 | struct ocfs2_xattr_search *xs) | ||
1863 | { | ||
1864 | struct buffer_head *new_bh = NULL; | ||
1865 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1866 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
1867 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
1868 | handle_t *handle = NULL; | ||
1869 | struct ocfs2_xattr_block *xblk = NULL; | ||
1870 | u16 suballoc_bit_start; | ||
1871 | u32 num_got; | ||
1872 | u64 first_blkno; | ||
1873 | int ret; | ||
1874 | |||
1875 | if (!xs->xattr_bh) { | ||
1876 | /* | ||
1877 | * Alloc one external block for extended attribute | ||
1878 | * outside of inode. | ||
1879 | */ | ||
1880 | ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); | ||
1881 | if (ret < 0) { | ||
1882 | mlog_errno(ret); | ||
1883 | goto out; | ||
1884 | } | ||
1885 | handle = ocfs2_start_trans(osb, | ||
1886 | OCFS2_XATTR_BLOCK_CREATE_CREDITS); | ||
1887 | if (IS_ERR(handle)) { | ||
1888 | ret = PTR_ERR(handle); | ||
1889 | mlog_errno(ret); | ||
1890 | goto out; | ||
1891 | } | ||
1892 | ret = ocfs2_journal_access(handle, inode, xs->inode_bh, | ||
1893 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
1894 | if (ret < 0) { | ||
1895 | mlog_errno(ret); | ||
1896 | goto out_commit; | ||
1897 | } | ||
1898 | |||
1899 | ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, | ||
1900 | &suballoc_bit_start, &num_got, | ||
1901 | &first_blkno); | ||
1902 | if (ret < 0) { | ||
1903 | mlog_errno(ret); | ||
1904 | goto out_commit; | ||
1905 | } | ||
1906 | |||
1907 | new_bh = sb_getblk(inode->i_sb, first_blkno); | ||
1908 | ocfs2_set_new_buffer_uptodate(inode, new_bh); | ||
1909 | |||
1910 | ret = ocfs2_journal_access(handle, inode, new_bh, | ||
1911 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
1912 | if (ret < 0) { | ||
1913 | mlog_errno(ret); | ||
1914 | goto out_commit; | ||
1915 | } | ||
1916 | |||
1917 | /* Initialize ocfs2_xattr_block */ | ||
1918 | xs->xattr_bh = new_bh; | ||
1919 | xblk = (struct ocfs2_xattr_block *)new_bh->b_data; | ||
1920 | memset(xblk, 0, inode->i_sb->s_blocksize); | ||
1921 | strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); | ||
1922 | xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num); | ||
1923 | xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); | ||
1924 | xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation); | ||
1925 | xblk->xb_blkno = cpu_to_le64(first_blkno); | ||
1926 | |||
1927 | xs->header = &xblk->xb_attrs.xb_header; | ||
1928 | xs->base = (void *)xs->header; | ||
1929 | xs->end = (void *)xblk + inode->i_sb->s_blocksize; | ||
1930 | xs->here = xs->header->xh_entries; | ||
1931 | |||
1932 | |||
1933 | ret = ocfs2_journal_dirty(handle, new_bh); | ||
1934 | if (ret < 0) { | ||
1935 | mlog_errno(ret); | ||
1936 | goto out_commit; | ||
1937 | } | ||
1938 | di->i_xattr_loc = cpu_to_le64(first_blkno); | ||
1939 | ret = ocfs2_journal_dirty(handle, xs->inode_bh); | ||
1940 | if (ret < 0) | ||
1941 | mlog_errno(ret); | ||
1942 | out_commit: | ||
1943 | ocfs2_commit_trans(osb, handle); | ||
1944 | out: | ||
1945 | if (meta_ac) | ||
1946 | ocfs2_free_alloc_context(meta_ac); | ||
1947 | if (ret < 0) | ||
1948 | return ret; | ||
1949 | } else | ||
1950 | xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; | ||
1951 | |||
1952 | if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { | ||
1953 | /* Set extended attribute into external block */ | ||
1954 | ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL); | ||
1955 | if (!ret || ret != -ENOSPC) | ||
1956 | goto end; | ||
1957 | |||
1958 | ret = ocfs2_xattr_create_index_block(inode, xs); | ||
1959 | if (ret) | ||
1960 | goto end; | ||
1961 | } | ||
1962 | |||
1963 | ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs); | ||
1964 | if (!ret && xblk->xb_attrs.xb_root.xt_list.l_next_free_rec == 0) | ||
1965 | ret = ocfs2_restore_xattr_block(inode, xs); | ||
1966 | |||
1967 | end: | ||
1968 | |||
1969 | return ret; | ||
1970 | } | ||
1971 | |||
1972 | /* | ||
1973 | * ocfs2_xattr_set() | ||
1974 | * | ||
1975 | * Set, replace or remove an extended attribute for this inode. | ||
1976 | * value is NULL to remove an existing extended attribute, else either | ||
1977 | * create or replace an extended attribute. | ||
1978 | */ | ||
1979 | int ocfs2_xattr_set(struct inode *inode, | ||
1980 | int name_index, | ||
1981 | const char *name, | ||
1982 | const void *value, | ||
1983 | size_t value_len, | ||
1984 | int flags) | ||
1985 | { | ||
1986 | struct buffer_head *di_bh = NULL; | ||
1987 | struct ocfs2_dinode *di; | ||
1988 | int ret; | ||
1989 | u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
1990 | |||
1991 | struct ocfs2_xattr_info xi = { | ||
1992 | .name_index = name_index, | ||
1993 | .name = name, | ||
1994 | .value = value, | ||
1995 | .value_len = value_len, | ||
1996 | }; | ||
1997 | |||
1998 | struct ocfs2_xattr_search xis = { | ||
1999 | .not_found = -ENODATA, | ||
2000 | }; | ||
2001 | |||
2002 | struct ocfs2_xattr_search xbs = { | ||
2003 | .not_found = -ENODATA, | ||
2004 | }; | ||
2005 | |||
2006 | if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) | ||
2007 | return -EOPNOTSUPP; | ||
2008 | |||
2009 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | ||
2010 | if (ret < 0) { | ||
2011 | mlog_errno(ret); | ||
2012 | return ret; | ||
2013 | } | ||
2014 | xis.inode_bh = xbs.inode_bh = di_bh; | ||
2015 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
2016 | |||
2017 | down_write(&OCFS2_I(inode)->ip_xattr_sem); | ||
2018 | /* | ||
2019 | * Scan inode and external block to find the same name | ||
2020 | * extended attribute and collect search infomation. | ||
2021 | */ | ||
2022 | ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); | ||
2023 | if (ret) | ||
2024 | goto cleanup; | ||
2025 | if (xis.not_found) { | ||
2026 | ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); | ||
2027 | if (ret) | ||
2028 | goto cleanup; | ||
2029 | } | ||
2030 | |||
2031 | if (xis.not_found && xbs.not_found) { | ||
2032 | ret = -ENODATA; | ||
2033 | if (flags & XATTR_REPLACE) | ||
2034 | goto cleanup; | ||
2035 | ret = 0; | ||
2036 | if (!value) | ||
2037 | goto cleanup; | ||
2038 | } else { | ||
2039 | ret = -EEXIST; | ||
2040 | if (flags & XATTR_CREATE) | ||
2041 | goto cleanup; | ||
2042 | } | ||
2043 | |||
2044 | if (!value) { | ||
2045 | /* Remove existing extended attribute */ | ||
2046 | if (!xis.not_found) | ||
2047 | ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); | ||
2048 | else if (!xbs.not_found) | ||
2049 | ret = ocfs2_xattr_block_set(inode, &xi, &xbs); | ||
2050 | } else { | ||
2051 | /* We always try to set extended attribute into inode first*/ | ||
2052 | ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); | ||
2053 | if (!ret && !xbs.not_found) { | ||
2054 | /* | ||
2055 | * If succeed and that extended attribute existing in | ||
2056 | * external block, then we will remove it. | ||
2057 | */ | ||
2058 | xi.value = NULL; | ||
2059 | xi.value_len = 0; | ||
2060 | ret = ocfs2_xattr_block_set(inode, &xi, &xbs); | ||
2061 | } else if (ret == -ENOSPC) { | ||
2062 | if (di->i_xattr_loc && !xbs.xattr_bh) { | ||
2063 | ret = ocfs2_xattr_block_find(inode, name_index, | ||
2064 | name, &xbs); | ||
2065 | if (ret) | ||
2066 | goto cleanup; | ||
2067 | } | ||
2068 | /* | ||
2069 | * If no space in inode, we will set extended attribute | ||
2070 | * into external block. | ||
2071 | */ | ||
2072 | ret = ocfs2_xattr_block_set(inode, &xi, &xbs); | ||
2073 | if (ret) | ||
2074 | goto cleanup; | ||
2075 | if (!xis.not_found) { | ||
2076 | /* | ||
2077 | * If succeed and that extended attribute | ||
2078 | * existing in inode, we will remove it. | ||
2079 | */ | ||
2080 | xi.value = NULL; | ||
2081 | xi.value_len = 0; | ||
2082 | ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); | ||
2083 | } | ||
2084 | } | ||
2085 | } | ||
2086 | cleanup: | ||
2087 | up_write(&OCFS2_I(inode)->ip_xattr_sem); | ||
2088 | ocfs2_inode_unlock(inode, 1); | ||
2089 | brelse(di_bh); | ||
2090 | brelse(xbs.xattr_bh); | ||
2091 | for (i = 0; i < blk_per_bucket; i++) | ||
2092 | brelse(xbs.bucket.bhs[i]); | ||
2093 | |||
2094 | return ret; | ||
2095 | } | ||
2096 | |||
2097 | /* | ||
2098 | * Find the xattr extent rec which may contains name_hash. | ||
2099 | * e_cpos will be the first name hash of the xattr rec. | ||
2100 | * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. | ||
2101 | */ | ||
2102 | static int ocfs2_xattr_get_rec(struct inode *inode, | ||
2103 | u32 name_hash, | ||
2104 | u64 *p_blkno, | ||
2105 | u32 *e_cpos, | ||
2106 | u32 *num_clusters, | ||
2107 | struct ocfs2_extent_list *el) | ||
2108 | { | ||
2109 | int ret = 0, i; | ||
2110 | struct buffer_head *eb_bh = NULL; | ||
2111 | struct ocfs2_extent_block *eb; | ||
2112 | struct ocfs2_extent_rec *rec = NULL; | ||
2113 | u64 e_blkno = 0; | ||
2114 | |||
2115 | if (el->l_tree_depth) { | ||
2116 | ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh); | ||
2117 | if (ret) { | ||
2118 | mlog_errno(ret); | ||
2119 | goto out; | ||
2120 | } | ||
2121 | |||
2122 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
2123 | el = &eb->h_list; | ||
2124 | |||
2125 | if (el->l_tree_depth) { | ||
2126 | ocfs2_error(inode->i_sb, | ||
2127 | "Inode %lu has non zero tree depth in " | ||
2128 | "xattr tree block %llu\n", inode->i_ino, | ||
2129 | (unsigned long long)eb_bh->b_blocknr); | ||
2130 | ret = -EROFS; | ||
2131 | goto out; | ||
2132 | } | ||
2133 | } | ||
2134 | |||
2135 | for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { | ||
2136 | rec = &el->l_recs[i]; | ||
2137 | |||
2138 | if (le32_to_cpu(rec->e_cpos) <= name_hash) { | ||
2139 | e_blkno = le64_to_cpu(rec->e_blkno); | ||
2140 | break; | ||
2141 | } | ||
2142 | } | ||
2143 | |||
2144 | if (!e_blkno) { | ||
2145 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " | ||
2146 | "record (%u, %u, 0) in xattr", inode->i_ino, | ||
2147 | le32_to_cpu(rec->e_cpos), | ||
2148 | ocfs2_rec_clusters(el, rec)); | ||
2149 | ret = -EROFS; | ||
2150 | goto out; | ||
2151 | } | ||
2152 | |||
2153 | *p_blkno = le64_to_cpu(rec->e_blkno); | ||
2154 | *num_clusters = le16_to_cpu(rec->e_leaf_clusters); | ||
2155 | if (e_cpos) | ||
2156 | *e_cpos = le32_to_cpu(rec->e_cpos); | ||
2157 | out: | ||
2158 | brelse(eb_bh); | ||
2159 | return ret; | ||
2160 | } | ||
2161 | |||
2162 | typedef int (xattr_bucket_func)(struct inode *inode, | ||
2163 | struct ocfs2_xattr_bucket *bucket, | ||
2164 | void *para); | ||
2165 | |||
2166 | static int ocfs2_find_xe_in_bucket(struct inode *inode, | ||
2167 | struct buffer_head *header_bh, | ||
2168 | int name_index, | ||
2169 | const char *name, | ||
2170 | u32 name_hash, | ||
2171 | u16 *xe_index, | ||
2172 | int *found) | ||
2173 | { | ||
2174 | int i, ret = 0, cmp = 1, block_off, new_offset; | ||
2175 | struct ocfs2_xattr_header *xh = | ||
2176 | (struct ocfs2_xattr_header *)header_bh->b_data; | ||
2177 | size_t name_len = strlen(name); | ||
2178 | struct ocfs2_xattr_entry *xe = NULL; | ||
2179 | struct buffer_head *name_bh = NULL; | ||
2180 | char *xe_name; | ||
2181 | |||
2182 | /* | ||
2183 | * We don't use binary search in the bucket because there | ||
2184 | * may be multiple entries with the same name hash. | ||
2185 | */ | ||
2186 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { | ||
2187 | xe = &xh->xh_entries[i]; | ||
2188 | |||
2189 | if (name_hash > le32_to_cpu(xe->xe_name_hash)) | ||
2190 | continue; | ||
2191 | else if (name_hash < le32_to_cpu(xe->xe_name_hash)) | ||
2192 | break; | ||
2193 | |||
2194 | cmp = name_index - ocfs2_xattr_get_type(xe); | ||
2195 | if (!cmp) | ||
2196 | cmp = name_len - xe->xe_name_len; | ||
2197 | if (cmp) | ||
2198 | continue; | ||
2199 | |||
2200 | ret = ocfs2_xattr_bucket_get_name_value(inode, | ||
2201 | xh, | ||
2202 | i, | ||
2203 | &block_off, | ||
2204 | &new_offset); | ||
2205 | if (ret) { | ||
2206 | mlog_errno(ret); | ||
2207 | break; | ||
2208 | } | ||
2209 | |||
2210 | ret = ocfs2_read_block(inode, header_bh->b_blocknr + block_off, | ||
2211 | &name_bh); | ||
2212 | if (ret) { | ||
2213 | mlog_errno(ret); | ||
2214 | break; | ||
2215 | } | ||
2216 | xe_name = name_bh->b_data + new_offset; | ||
2217 | |||
2218 | cmp = memcmp(name, xe_name, name_len); | ||
2219 | brelse(name_bh); | ||
2220 | name_bh = NULL; | ||
2221 | |||
2222 | if (cmp == 0) { | ||
2223 | *xe_index = i; | ||
2224 | *found = 1; | ||
2225 | ret = 0; | ||
2226 | break; | ||
2227 | } | ||
2228 | } | ||
2229 | |||
2230 | return ret; | ||
2231 | } | ||
2232 | |||
2233 | /* | ||
2234 | * Find the specified xattr entry in a series of buckets. | ||
2235 | * This series start from p_blkno and last for num_clusters. | ||
2236 | * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains | ||
2237 | * the num of the valid buckets. | ||
2238 | * | ||
2239 | * Return the buffer_head this xattr should reside in. And if the xattr's | ||
2240 | * hash is in the gap of 2 buckets, return the lower bucket. | ||
2241 | */ | ||
2242 | static int ocfs2_xattr_bucket_find(struct inode *inode, | ||
2243 | int name_index, | ||
2244 | const char *name, | ||
2245 | u32 name_hash, | ||
2246 | u64 p_blkno, | ||
2247 | u32 first_hash, | ||
2248 | u32 num_clusters, | ||
2249 | struct ocfs2_xattr_search *xs) | ||
2250 | { | ||
2251 | int ret, found = 0; | ||
2252 | struct buffer_head *bh = NULL; | ||
2253 | struct buffer_head *lower_bh = NULL; | ||
2254 | struct ocfs2_xattr_header *xh = NULL; | ||
2255 | struct ocfs2_xattr_entry *xe = NULL; | ||
2256 | u16 index = 0; | ||
2257 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
2258 | int low_bucket = 0, bucket, high_bucket; | ||
2259 | u32 last_hash; | ||
2260 | u64 blkno; | ||
2261 | |||
2262 | ret = ocfs2_read_block(inode, p_blkno, &bh); | ||
2263 | if (ret) { | ||
2264 | mlog_errno(ret); | ||
2265 | goto out; | ||
2266 | } | ||
2267 | |||
2268 | xh = (struct ocfs2_xattr_header *)bh->b_data; | ||
2269 | high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; | ||
2270 | |||
2271 | while (low_bucket <= high_bucket) { | ||
2272 | brelse(bh); | ||
2273 | bh = NULL; | ||
2274 | bucket = (low_bucket + high_bucket) / 2; | ||
2275 | |||
2276 | blkno = p_blkno + bucket * blk_per_bucket; | ||
2277 | |||
2278 | ret = ocfs2_read_block(inode, blkno, &bh); | ||
2279 | if (ret) { | ||
2280 | mlog_errno(ret); | ||
2281 | goto out; | ||
2282 | } | ||
2283 | |||
2284 | xh = (struct ocfs2_xattr_header *)bh->b_data; | ||
2285 | xe = &xh->xh_entries[0]; | ||
2286 | if (name_hash < le32_to_cpu(xe->xe_name_hash)) { | ||
2287 | high_bucket = bucket - 1; | ||
2288 | continue; | ||
2289 | } | ||
2290 | |||
2291 | /* | ||
2292 | * Check whether the hash of the last entry in our | ||
2293 | * bucket is larger than the search one. for an empty | ||
2294 | * bucket, the last one is also the first one. | ||
2295 | */ | ||
2296 | if (xh->xh_count) | ||
2297 | xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; | ||
2298 | |||
2299 | last_hash = le32_to_cpu(xe->xe_name_hash); | ||
2300 | |||
2301 | /* record lower_bh which may be the insert place. */ | ||
2302 | brelse(lower_bh); | ||
2303 | lower_bh = bh; | ||
2304 | bh = NULL; | ||
2305 | |||
2306 | if (name_hash > le32_to_cpu(xe->xe_name_hash)) { | ||
2307 | low_bucket = bucket + 1; | ||
2308 | continue; | ||
2309 | } | ||
2310 | |||
2311 | /* the searched xattr should reside in this bucket if exists. */ | ||
2312 | ret = ocfs2_find_xe_in_bucket(inode, lower_bh, | ||
2313 | name_index, name, name_hash, | ||
2314 | &index, &found); | ||
2315 | if (ret) { | ||
2316 | mlog_errno(ret); | ||
2317 | goto out; | ||
2318 | } | ||
2319 | break; | ||
2320 | } | ||
2321 | |||
2322 | /* | ||
2323 | * Record the bucket we have found. | ||
2324 | * When the xattr's hash value is in the gap of 2 buckets, we will | ||
2325 | * always set it to the previous bucket. | ||
2326 | */ | ||
2327 | if (!lower_bh) { | ||
2328 | /* | ||
2329 | * We can't find any bucket whose first name_hash is less | ||
2330 | * than the find name_hash. | ||
2331 | */ | ||
2332 | BUG_ON(bh->b_blocknr != p_blkno); | ||
2333 | lower_bh = bh; | ||
2334 | bh = NULL; | ||
2335 | } | ||
2336 | xs->bucket.bhs[0] = lower_bh; | ||
2337 | xs->bucket.xh = (struct ocfs2_xattr_header *) | ||
2338 | xs->bucket.bhs[0]->b_data; | ||
2339 | lower_bh = NULL; | ||
2340 | |||
2341 | xs->header = xs->bucket.xh; | ||
2342 | xs->base = xs->bucket.bhs[0]->b_data; | ||
2343 | xs->end = xs->base + inode->i_sb->s_blocksize; | ||
2344 | |||
2345 | if (found) { | ||
2346 | /* | ||
2347 | * If we have found the xattr enty, read all the blocks in | ||
2348 | * this bucket. | ||
2349 | */ | ||
2350 | ret = ocfs2_read_blocks(inode, xs->bucket.bhs[0]->b_blocknr + 1, | ||
2351 | blk_per_bucket - 1, &xs->bucket.bhs[1], | ||
2352 | OCFS2_BH_CACHED); | ||
2353 | if (ret) { | ||
2354 | mlog_errno(ret); | ||
2355 | goto out; | ||
2356 | } | ||
2357 | |||
2358 | xs->here = &xs->header->xh_entries[index]; | ||
2359 | mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name, | ||
2360 | (unsigned long long)xs->bucket.bhs[0]->b_blocknr, index); | ||
2361 | } else | ||
2362 | ret = -ENODATA; | ||
2363 | |||
2364 | out: | ||
2365 | brelse(bh); | ||
2366 | brelse(lower_bh); | ||
2367 | return ret; | ||
2368 | } | ||
2369 | |||
2370 | static int ocfs2_xattr_index_block_find(struct inode *inode, | ||
2371 | struct buffer_head *root_bh, | ||
2372 | int name_index, | ||
2373 | const char *name, | ||
2374 | struct ocfs2_xattr_search *xs) | ||
2375 | { | ||
2376 | int ret; | ||
2377 | struct ocfs2_xattr_block *xb = | ||
2378 | (struct ocfs2_xattr_block *)root_bh->b_data; | ||
2379 | struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; | ||
2380 | struct ocfs2_extent_list *el = &xb_root->xt_list; | ||
2381 | u64 p_blkno = 0; | ||
2382 | u32 first_hash, num_clusters = 0; | ||
2383 | u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); | ||
2384 | |||
2385 | if (le16_to_cpu(el->l_next_free_rec) == 0) | ||
2386 | return -ENODATA; | ||
2387 | |||
2388 | mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n", | ||
2389 | name, name_hash, name_index); | ||
2390 | |||
2391 | ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, | ||
2392 | &num_clusters, el); | ||
2393 | if (ret) { | ||
2394 | mlog_errno(ret); | ||
2395 | goto out; | ||
2396 | } | ||
2397 | |||
2398 | BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); | ||
2399 | |||
2400 | mlog(0, "find xattr extent rec %u clusters from %llu, the first hash " | ||
2401 | "in the rec is %u\n", num_clusters, p_blkno, first_hash); | ||
2402 | |||
2403 | ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, | ||
2404 | p_blkno, first_hash, num_clusters, xs); | ||
2405 | |||
2406 | out: | ||
2407 | return ret; | ||
2408 | } | ||
2409 | |||
2410 | static int ocfs2_iterate_xattr_buckets(struct inode *inode, | ||
2411 | u64 blkno, | ||
2412 | u32 clusters, | ||
2413 | xattr_bucket_func *func, | ||
2414 | void *para) | ||
2415 | { | ||
2416 | int i, j, ret = 0; | ||
2417 | int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
2418 | u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); | ||
2419 | u32 num_buckets = clusters * bpc; | ||
2420 | struct ocfs2_xattr_bucket bucket; | ||
2421 | |||
2422 | memset(&bucket, 0, sizeof(bucket)); | ||
2423 | |||
2424 | mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n", | ||
2425 | clusters, blkno); | ||
2426 | |||
2427 | for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) { | ||
2428 | ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, | ||
2429 | bucket.bhs, OCFS2_BH_CACHED); | ||
2430 | if (ret) { | ||
2431 | mlog_errno(ret); | ||
2432 | goto out; | ||
2433 | } | ||
2434 | |||
2435 | bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data; | ||
2436 | /* | ||
2437 | * The real bucket num in this series of blocks is stored | ||
2438 | * in the 1st bucket. | ||
2439 | */ | ||
2440 | if (i == 0) | ||
2441 | num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets); | ||
2442 | |||
2443 | mlog(0, "iterating xattr bucket %llu, first hash %u\n", blkno, | ||
2444 | le32_to_cpu(bucket.xh->xh_entries[0].xe_name_hash)); | ||
2445 | if (func) { | ||
2446 | ret = func(inode, &bucket, para); | ||
2447 | if (ret) { | ||
2448 | mlog_errno(ret); | ||
2449 | break; | ||
2450 | } | ||
2451 | } | ||
2452 | |||
2453 | for (j = 0; j < blk_per_bucket; j++) | ||
2454 | brelse(bucket.bhs[j]); | ||
2455 | memset(&bucket, 0, sizeof(bucket)); | ||
2456 | } | ||
2457 | |||
2458 | out: | ||
2459 | for (j = 0; j < blk_per_bucket; j++) | ||
2460 | brelse(bucket.bhs[j]); | ||
2461 | |||
2462 | return ret; | ||
2463 | } | ||
2464 | |||
2465 | struct ocfs2_xattr_tree_list { | ||
2466 | char *buffer; | ||
2467 | size_t buffer_size; | ||
2468 | size_t result; | ||
2469 | }; | ||
2470 | |||
2471 | static int ocfs2_xattr_bucket_get_name_value(struct inode *inode, | ||
2472 | struct ocfs2_xattr_header *xh, | ||
2473 | int index, | ||
2474 | int *block_off, | ||
2475 | int *new_offset) | ||
2476 | { | ||
2477 | u16 name_offset; | ||
2478 | |||
2479 | if (index < 0 || index >= le16_to_cpu(xh->xh_count)) | ||
2480 | return -EINVAL; | ||
2481 | |||
2482 | name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); | ||
2483 | |||
2484 | *block_off = name_offset >> inode->i_sb->s_blocksize_bits; | ||
2485 | *new_offset = name_offset % inode->i_sb->s_blocksize; | ||
2486 | |||
2487 | return 0; | ||
2488 | } | ||
2489 | |||
2490 | static int ocfs2_list_xattr_bucket(struct inode *inode, | ||
2491 | struct ocfs2_xattr_bucket *bucket, | ||
2492 | void *para) | ||
2493 | { | ||
2494 | int ret = 0, type; | ||
2495 | struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; | ||
2496 | int i, block_off, new_offset; | ||
2497 | const char *prefix, *name; | ||
2498 | |||
2499 | for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) { | ||
2500 | struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i]; | ||
2501 | type = ocfs2_xattr_get_type(entry); | ||
2502 | prefix = ocfs2_xattr_prefix(type); | ||
2503 | |||
2504 | if (prefix) { | ||
2505 | ret = ocfs2_xattr_bucket_get_name_value(inode, | ||
2506 | bucket->xh, | ||
2507 | i, | ||
2508 | &block_off, | ||
2509 | &new_offset); | ||
2510 | if (ret) | ||
2511 | break; | ||
2512 | |||
2513 | name = (const char *)bucket->bhs[block_off]->b_data + | ||
2514 | new_offset; | ||
2515 | ret = ocfs2_xattr_list_entry(xl->buffer, | ||
2516 | xl->buffer_size, | ||
2517 | &xl->result, | ||
2518 | prefix, name, | ||
2519 | entry->xe_name_len); | ||
2520 | if (ret) | ||
2521 | break; | ||
2522 | } | ||
2523 | } | ||
2524 | |||
2525 | return ret; | ||
2526 | } | ||
2527 | |||
2528 | static int ocfs2_xattr_tree_list_index_block(struct inode *inode, | ||
2529 | struct ocfs2_xattr_tree_root *xt, | ||
2530 | char *buffer, | ||
2531 | size_t buffer_size) | ||
2532 | { | ||
2533 | struct ocfs2_extent_list *el = &xt->xt_list; | ||
2534 | int ret = 0; | ||
2535 | u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; | ||
2536 | u64 p_blkno = 0; | ||
2537 | struct ocfs2_xattr_tree_list xl = { | ||
2538 | .buffer = buffer, | ||
2539 | .buffer_size = buffer_size, | ||
2540 | .result = 0, | ||
2541 | }; | ||
2542 | |||
2543 | if (le16_to_cpu(el->l_next_free_rec) == 0) | ||
2544 | return 0; | ||
2545 | |||
2546 | while (name_hash > 0) { | ||
2547 | ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, | ||
2548 | &e_cpos, &num_clusters, el); | ||
2549 | if (ret) { | ||
2550 | mlog_errno(ret); | ||
2551 | goto out; | ||
2552 | } | ||
2553 | |||
2554 | ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters, | ||
2555 | ocfs2_list_xattr_bucket, | ||
2556 | &xl); | ||
2557 | if (ret) { | ||
2558 | mlog_errno(ret); | ||
2559 | goto out; | ||
2560 | } | ||
2561 | |||
2562 | if (e_cpos == 0) | ||
2563 | break; | ||
2564 | |||
2565 | name_hash = e_cpos - 1; | ||
2566 | } | ||
2567 | |||
2568 | ret = xl.result; | ||
2569 | out: | ||
2570 | return ret; | ||
2571 | } | ||
2572 | |||
2573 | static int cmp_xe(const void *a, const void *b) | ||
2574 | { | ||
2575 | const struct ocfs2_xattr_entry *l = a, *r = b; | ||
2576 | u32 l_hash = le32_to_cpu(l->xe_name_hash); | ||
2577 | u32 r_hash = le32_to_cpu(r->xe_name_hash); | ||
2578 | |||
2579 | if (l_hash > r_hash) | ||
2580 | return 1; | ||
2581 | if (l_hash < r_hash) | ||
2582 | return -1; | ||
2583 | return 0; | ||
2584 | } | ||
2585 | |||
2586 | static void swap_xe(void *a, void *b, int size) | ||
2587 | { | ||
2588 | struct ocfs2_xattr_entry *l = a, *r = b, tmp; | ||
2589 | |||
2590 | tmp = *l; | ||
2591 | memcpy(l, r, sizeof(struct ocfs2_xattr_entry)); | ||
2592 | memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry)); | ||
2593 | } | ||
2594 | |||
2595 | /* | ||
2596 | * When the ocfs2_xattr_block is filled up, new bucket will be created | ||
2597 | * and all the xattr entries will be moved to the new bucket. | ||
2598 | * Note: we need to sort the entries since they are not saved in order | ||
2599 | * in the ocfs2_xattr_block. | ||
2600 | */ | ||
2601 | static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, | ||
2602 | struct buffer_head *xb_bh, | ||
2603 | struct buffer_head *xh_bh, | ||
2604 | struct buffer_head *data_bh) | ||
2605 | { | ||
2606 | int i, blocksize = inode->i_sb->s_blocksize; | ||
2607 | u16 offset, size, off_change; | ||
2608 | struct ocfs2_xattr_entry *xe; | ||
2609 | struct ocfs2_xattr_block *xb = | ||
2610 | (struct ocfs2_xattr_block *)xb_bh->b_data; | ||
2611 | struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; | ||
2612 | struct ocfs2_xattr_header *xh = | ||
2613 | (struct ocfs2_xattr_header *)xh_bh->b_data; | ||
2614 | u16 count = le16_to_cpu(xb_xh->xh_count); | ||
2615 | char *target = xh_bh->b_data, *src = xb_bh->b_data; | ||
2616 | |||
2617 | mlog(0, "cp xattr from block %llu to bucket %llu\n", | ||
2618 | (unsigned long long)xb_bh->b_blocknr, | ||
2619 | (unsigned long long)xh_bh->b_blocknr); | ||
2620 | |||
2621 | memset(xh_bh->b_data, 0, blocksize); | ||
2622 | if (data_bh) | ||
2623 | memset(data_bh->b_data, 0, blocksize); | ||
2624 | /* | ||
2625 | * Since the xe_name_offset is based on ocfs2_xattr_header, | ||
2626 | * there is a offset change corresponding to the change of | ||
2627 | * ocfs2_xattr_header's position. | ||
2628 | */ | ||
2629 | off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); | ||
2630 | xe = &xb_xh->xh_entries[count - 1]; | ||
2631 | offset = le16_to_cpu(xe->xe_name_offset) + off_change; | ||
2632 | size = blocksize - offset; | ||
2633 | |||
2634 | /* copy all the names and values. */ | ||
2635 | if (data_bh) | ||
2636 | target = data_bh->b_data; | ||
2637 | memcpy(target + offset, src + offset, size); | ||
2638 | |||
2639 | /* Init new header now. */ | ||
2640 | xh->xh_count = xb_xh->xh_count; | ||
2641 | xh->xh_num_buckets = cpu_to_le16(1); | ||
2642 | xh->xh_name_value_len = cpu_to_le16(size); | ||
2643 | xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); | ||
2644 | |||
2645 | /* copy all the entries. */ | ||
2646 | target = xh_bh->b_data; | ||
2647 | offset = offsetof(struct ocfs2_xattr_header, xh_entries); | ||
2648 | size = count * sizeof(struct ocfs2_xattr_entry); | ||
2649 | memcpy(target + offset, (char *)xb_xh + offset, size); | ||
2650 | |||
2651 | /* Change the xe offset for all the xe because of the move. */ | ||
2652 | off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize + | ||
2653 | offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); | ||
2654 | for (i = 0; i < count; i++) | ||
2655 | le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); | ||
2656 | |||
2657 | mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n", | ||
2658 | offset, size, off_change); | ||
2659 | |||
2660 | sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), | ||
2661 | cmp_xe, swap_xe); | ||
2662 | } | ||
2663 | |||
2664 | /* | ||
2665 | * After we move xattr from block to index btree, we have to | ||
2666 | * update ocfs2_xattr_search to the new xe and base. | ||
2667 | * | ||
2668 | * When the entry is in xattr block, xattr_bh indicates the storage place. | ||
2669 | * While if the entry is in index b-tree, "bucket" indicates the | ||
2670 | * real place of the xattr. | ||
2671 | */ | ||
2672 | static int ocfs2_xattr_update_xattr_search(struct inode *inode, | ||
2673 | struct ocfs2_xattr_search *xs, | ||
2674 | struct buffer_head *old_bh, | ||
2675 | struct buffer_head *new_bh) | ||
2676 | { | ||
2677 | int ret = 0; | ||
2678 | char *buf = old_bh->b_data; | ||
2679 | struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; | ||
2680 | struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; | ||
2681 | int i, blocksize = inode->i_sb->s_blocksize; | ||
2682 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
2683 | |||
2684 | xs->bucket.bhs[0] = new_bh; | ||
2685 | get_bh(new_bh); | ||
2686 | xs->bucket.xh = (struct ocfs2_xattr_header *)xs->bucket.bhs[0]->b_data; | ||
2687 | xs->header = xs->bucket.xh; | ||
2688 | |||
2689 | xs->base = new_bh->b_data; | ||
2690 | xs->end = xs->base + inode->i_sb->s_blocksize; | ||
2691 | |||
2692 | if (!xs->not_found) { | ||
2693 | if (OCFS2_XATTR_BUCKET_SIZE != blocksize) { | ||
2694 | ret = ocfs2_read_blocks(inode, | ||
2695 | xs->bucket.bhs[0]->b_blocknr + 1, | ||
2696 | blk_per_bucket - 1, &xs->bucket.bhs[1], | ||
2697 | OCFS2_BH_CACHED); | ||
2698 | if (ret) { | ||
2699 | mlog_errno(ret); | ||
2700 | return ret; | ||
2701 | } | ||
2702 | |||
2703 | i = xs->here - old_xh->xh_entries; | ||
2704 | xs->here = &xs->header->xh_entries[i]; | ||
2705 | } | ||
2706 | } | ||
2707 | |||
2708 | return ret; | ||
2709 | } | ||
2710 | |||
2711 | static int ocfs2_xattr_create_index_block(struct inode *inode, | ||
2712 | struct ocfs2_xattr_search *xs) | ||
2713 | { | ||
2714 | int ret, credits = OCFS2_SUBALLOC_ALLOC; | ||
2715 | u32 bit_off, len; | ||
2716 | u64 blkno; | ||
2717 | handle_t *handle; | ||
2718 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
2719 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
2720 | struct ocfs2_alloc_context *data_ac; | ||
2721 | struct buffer_head *xh_bh = NULL, *data_bh = NULL; | ||
2722 | struct buffer_head *xb_bh = xs->xattr_bh; | ||
2723 | struct ocfs2_xattr_block *xb = | ||
2724 | (struct ocfs2_xattr_block *)xb_bh->b_data; | ||
2725 | struct ocfs2_xattr_tree_root *xr; | ||
2726 | u16 xb_flags = le16_to_cpu(xb->xb_flags); | ||
2727 | u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
2728 | |||
2729 | mlog(0, "create xattr index block for %llu\n", | ||
2730 | (unsigned long long)xb_bh->b_blocknr); | ||
2731 | |||
2732 | BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); | ||
2733 | |||
2734 | ret = ocfs2_reserve_clusters(osb, 1, &data_ac); | ||
2735 | if (ret) { | ||
2736 | mlog_errno(ret); | ||
2737 | goto out; | ||
2738 | } | ||
2739 | |||
2740 | /* | ||
2741 | * XXX: | ||
2742 | * We can use this lock for now, and maybe move to a dedicated mutex | ||
2743 | * if performance becomes a problem later. | ||
2744 | */ | ||
2745 | down_write(&oi->ip_alloc_sem); | ||
2746 | |||
2747 | /* | ||
2748 | * 3 more credits, one for xattr block update, one for the 1st block | ||
2749 | * of the new xattr bucket and one for the value/data. | ||
2750 | */ | ||
2751 | credits += 3; | ||
2752 | handle = ocfs2_start_trans(osb, credits); | ||
2753 | if (IS_ERR(handle)) { | ||
2754 | ret = PTR_ERR(handle); | ||
2755 | mlog_errno(ret); | ||
2756 | goto out_sem; | ||
2757 | } | ||
2758 | |||
2759 | ret = ocfs2_journal_access(handle, inode, xb_bh, | ||
2760 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2761 | if (ret) { | ||
2762 | mlog_errno(ret); | ||
2763 | goto out_commit; | ||
2764 | } | ||
2765 | |||
2766 | ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len); | ||
2767 | if (ret) { | ||
2768 | mlog_errno(ret); | ||
2769 | goto out_commit; | ||
2770 | } | ||
2771 | |||
2772 | /* | ||
2773 | * The bucket may spread in many blocks, and | ||
2774 | * we will only touch the 1st block and the last block | ||
2775 | * in the whole bucket(one for entry and one for data). | ||
2776 | */ | ||
2777 | blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); | ||
2778 | |||
2779 | mlog(0, "allocate 1 cluster from %llu to xattr block\n", blkno); | ||
2780 | |||
2781 | xh_bh = sb_getblk(inode->i_sb, blkno); | ||
2782 | if (!xh_bh) { | ||
2783 | ret = -EIO; | ||
2784 | mlog_errno(ret); | ||
2785 | goto out_commit; | ||
2786 | } | ||
2787 | |||
2788 | ocfs2_set_new_buffer_uptodate(inode, xh_bh); | ||
2789 | |||
2790 | ret = ocfs2_journal_access(handle, inode, xh_bh, | ||
2791 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
2792 | if (ret) { | ||
2793 | mlog_errno(ret); | ||
2794 | goto out_commit; | ||
2795 | } | ||
2796 | |||
2797 | if (bpb > 1) { | ||
2798 | data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1); | ||
2799 | if (!data_bh) { | ||
2800 | ret = -EIO; | ||
2801 | mlog_errno(ret); | ||
2802 | goto out_commit; | ||
2803 | } | ||
2804 | |||
2805 | ocfs2_set_new_buffer_uptodate(inode, data_bh); | ||
2806 | |||
2807 | ret = ocfs2_journal_access(handle, inode, data_bh, | ||
2808 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
2809 | if (ret) { | ||
2810 | mlog_errno(ret); | ||
2811 | goto out_commit; | ||
2812 | } | ||
2813 | } | ||
2814 | |||
2815 | ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh); | ||
2816 | |||
2817 | ocfs2_journal_dirty(handle, xh_bh); | ||
2818 | if (data_bh) | ||
2819 | ocfs2_journal_dirty(handle, data_bh); | ||
2820 | |||
2821 | ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh); | ||
2822 | |||
2823 | /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ | ||
2824 | memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - | ||
2825 | offsetof(struct ocfs2_xattr_block, xb_attrs)); | ||
2826 | |||
2827 | xr = &xb->xb_attrs.xb_root; | ||
2828 | xr->xt_clusters = cpu_to_le32(1); | ||
2829 | xr->xt_last_eb_blk = 0; | ||
2830 | xr->xt_list.l_tree_depth = 0; | ||
2831 | xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb)); | ||
2832 | xr->xt_list.l_next_free_rec = cpu_to_le16(1); | ||
2833 | |||
2834 | xr->xt_list.l_recs[0].e_cpos = 0; | ||
2835 | xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno); | ||
2836 | xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); | ||
2837 | |||
2838 | xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); | ||
2839 | |||
2840 | ret = ocfs2_journal_dirty(handle, xb_bh); | ||
2841 | if (ret) { | ||
2842 | mlog_errno(ret); | ||
2843 | goto out_commit; | ||
2844 | } | ||
2845 | |||
2846 | out_commit: | ||
2847 | ocfs2_commit_trans(osb, handle); | ||
2848 | |||
2849 | out_sem: | ||
2850 | up_write(&oi->ip_alloc_sem); | ||
2851 | |||
2852 | out: | ||
2853 | if (data_ac) | ||
2854 | ocfs2_free_alloc_context(data_ac); | ||
2855 | |||
2856 | brelse(xh_bh); | ||
2857 | brelse(data_bh); | ||
2858 | |||
2859 | return ret; | ||
2860 | } | ||
2861 | |||
2862 | static int cmp_xe_offset(const void *a, const void *b) | ||
2863 | { | ||
2864 | const struct ocfs2_xattr_entry *l = a, *r = b; | ||
2865 | u32 l_name_offset = le16_to_cpu(l->xe_name_offset); | ||
2866 | u32 r_name_offset = le16_to_cpu(r->xe_name_offset); | ||
2867 | |||
2868 | if (l_name_offset < r_name_offset) | ||
2869 | return 1; | ||
2870 | if (l_name_offset > r_name_offset) | ||
2871 | return -1; | ||
2872 | return 0; | ||
2873 | } | ||
2874 | |||
2875 | /* | ||
2876 | * defrag a xattr bucket if we find that the bucket has some | ||
2877 | * holes beteen name/value pairs. | ||
2878 | * We will move all the name/value pairs to the end of the bucket | ||
2879 | * so that we can spare some space for insertion. | ||
2880 | */ | ||
2881 | static int ocfs2_defrag_xattr_bucket(struct inode *inode, | ||
2882 | struct ocfs2_xattr_bucket *bucket) | ||
2883 | { | ||
2884 | int ret, i; | ||
2885 | size_t end, offset, len, value_len; | ||
2886 | struct ocfs2_xattr_header *xh; | ||
2887 | char *entries, *buf, *bucket_buf = NULL; | ||
2888 | u64 blkno = bucket->bhs[0]->b_blocknr; | ||
2889 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
2890 | u16 xh_free_start; | ||
2891 | size_t blocksize = inode->i_sb->s_blocksize; | ||
2892 | handle_t *handle; | ||
2893 | struct buffer_head **bhs; | ||
2894 | struct ocfs2_xattr_entry *xe; | ||
2895 | |||
2896 | bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, | ||
2897 | GFP_NOFS); | ||
2898 | if (!bhs) | ||
2899 | return -ENOMEM; | ||
2900 | |||
2901 | ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs, | ||
2902 | OCFS2_BH_CACHED); | ||
2903 | if (ret) | ||
2904 | goto out; | ||
2905 | |||
2906 | /* | ||
2907 | * In order to make the operation more efficient and generic, | ||
2908 | * we copy all the blocks into a contiguous memory and do the | ||
2909 | * defragment there, so if anything is error, we will not touch | ||
2910 | * the real block. | ||
2911 | */ | ||
2912 | bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS); | ||
2913 | if (!bucket_buf) { | ||
2914 | ret = -EIO; | ||
2915 | goto out; | ||
2916 | } | ||
2917 | |||
2918 | buf = bucket_buf; | ||
2919 | for (i = 0; i < blk_per_bucket; i++, buf += blocksize) | ||
2920 | memcpy(buf, bhs[i]->b_data, blocksize); | ||
2921 | |||
2922 | handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket); | ||
2923 | if (IS_ERR(handle)) { | ||
2924 | ret = PTR_ERR(handle); | ||
2925 | handle = NULL; | ||
2926 | mlog_errno(ret); | ||
2927 | goto out; | ||
2928 | } | ||
2929 | |||
2930 | for (i = 0; i < blk_per_bucket; i++) { | ||
2931 | ret = ocfs2_journal_access(handle, inode, bhs[i], | ||
2932 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2933 | if (ret < 0) { | ||
2934 | mlog_errno(ret); | ||
2935 | goto commit; | ||
2936 | } | ||
2937 | } | ||
2938 | |||
2939 | xh = (struct ocfs2_xattr_header *)bucket_buf; | ||
2940 | entries = (char *)xh->xh_entries; | ||
2941 | xh_free_start = le16_to_cpu(xh->xh_free_start); | ||
2942 | |||
2943 | mlog(0, "adjust xattr bucket in %llu, count = %u, " | ||
2944 | "xh_free_start = %u, xh_name_value_len = %u.\n", | ||
2945 | blkno, le16_to_cpu(xh->xh_count), xh_free_start, | ||
2946 | le16_to_cpu(xh->xh_name_value_len)); | ||
2947 | |||
2948 | /* | ||
2949 | * sort all the entries by their offset. | ||
2950 | * the largest will be the first, so that we can | ||
2951 | * move them to the end one by one. | ||
2952 | */ | ||
2953 | sort(entries, le16_to_cpu(xh->xh_count), | ||
2954 | sizeof(struct ocfs2_xattr_entry), | ||
2955 | cmp_xe_offset, swap_xe); | ||
2956 | |||
2957 | /* Move all name/values to the end of the bucket. */ | ||
2958 | xe = xh->xh_entries; | ||
2959 | end = OCFS2_XATTR_BUCKET_SIZE; | ||
2960 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { | ||
2961 | offset = le16_to_cpu(xe->xe_name_offset); | ||
2962 | if (ocfs2_xattr_is_local(xe)) | ||
2963 | value_len = OCFS2_XATTR_SIZE( | ||
2964 | le64_to_cpu(xe->xe_value_size)); | ||
2965 | else | ||
2966 | value_len = OCFS2_XATTR_ROOT_SIZE; | ||
2967 | len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len; | ||
2968 | |||
2969 | /* | ||
2970 | * We must make sure that the name/value pair | ||
2971 | * exist in the same block. So adjust end to | ||
2972 | * the previous block end if needed. | ||
2973 | */ | ||
2974 | if (((end - len) / blocksize != | ||
2975 | (end - 1) / blocksize)) | ||
2976 | end = end - end % blocksize; | ||
2977 | |||
2978 | if (end > offset + len) { | ||
2979 | memmove(bucket_buf + end - len, | ||
2980 | bucket_buf + offset, len); | ||
2981 | xe->xe_name_offset = cpu_to_le16(end - len); | ||
2982 | } | ||
2983 | |||
2984 | mlog_bug_on_msg(end < offset + len, "Defrag check failed for " | ||
2985 | "bucket %llu\n", (unsigned long long)blkno); | ||
2986 | |||
2987 | end -= len; | ||
2988 | } | ||
2989 | |||
2990 | mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for " | ||
2991 | "bucket %llu\n", (unsigned long long)blkno); | ||
2992 | |||
2993 | if (xh_free_start == end) | ||
2994 | goto commit; | ||
2995 | |||
2996 | memset(bucket_buf + xh_free_start, 0, end - xh_free_start); | ||
2997 | xh->xh_free_start = cpu_to_le16(end); | ||
2998 | |||
2999 | /* sort the entries by their name_hash. */ | ||
3000 | sort(entries, le16_to_cpu(xh->xh_count), | ||
3001 | sizeof(struct ocfs2_xattr_entry), | ||
3002 | cmp_xe, swap_xe); | ||
3003 | |||
3004 | buf = bucket_buf; | ||
3005 | for (i = 0; i < blk_per_bucket; i++, buf += blocksize) { | ||
3006 | memcpy(bhs[i]->b_data, buf, blocksize); | ||
3007 | ocfs2_journal_dirty(handle, bhs[i]); | ||
3008 | } | ||
3009 | |||
3010 | commit: | ||
3011 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
3012 | out: | ||
3013 | |||
3014 | if (bhs) { | ||
3015 | for (i = 0; i < blk_per_bucket; i++) | ||
3016 | brelse(bhs[i]); | ||
3017 | } | ||
3018 | kfree(bhs); | ||
3019 | |||
3020 | kfree(bucket_buf); | ||
3021 | return ret; | ||
3022 | } | ||
3023 | |||
3024 | /* | ||
3025 | * Move half nums of the xattr bucket in the previous cluster to this new | ||
3026 | * cluster. We only touch the last cluster of the previous extend record. | ||
3027 | * | ||
3028 | * first_bh is the first buffer_head of a series of bucket in the same | ||
3029 | * extent rec and header_bh is the header of one bucket in this cluster. | ||
3030 | * They will be updated if we move the data header_bh contains to the new | ||
3031 | * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster. | ||
3032 | */ | ||
3033 | static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, | ||
3034 | handle_t *handle, | ||
3035 | struct buffer_head **first_bh, | ||
3036 | struct buffer_head **header_bh, | ||
3037 | u64 new_blkno, | ||
3038 | u64 prev_blkno, | ||
3039 | u32 num_clusters, | ||
3040 | u32 *first_hash) | ||
3041 | { | ||
3042 | int i, ret, credits; | ||
3043 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
3044 | int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
3045 | int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); | ||
3046 | int blocksize = inode->i_sb->s_blocksize; | ||
3047 | struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL; | ||
3048 | struct ocfs2_xattr_header *new_xh; | ||
3049 | struct ocfs2_xattr_header *xh = | ||
3050 | (struct ocfs2_xattr_header *)((*first_bh)->b_data); | ||
3051 | |||
3052 | BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets); | ||
3053 | BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize); | ||
3054 | |||
3055 | prev_bh = *first_bh; | ||
3056 | get_bh(prev_bh); | ||
3057 | xh = (struct ocfs2_xattr_header *)prev_bh->b_data; | ||
3058 | |||
3059 | prev_blkno += (num_clusters - 1) * bpc + bpc / 2; | ||
3060 | |||
3061 | mlog(0, "move half of xattrs in cluster %llu to %llu\n", | ||
3062 | prev_blkno, new_blkno); | ||
3063 | |||
3064 | /* | ||
3065 | * We need to update the 1st half of the new cluster and | ||
3066 | * 1 more for the update of the 1st bucket of the previous | ||
3067 | * extent record. | ||
3068 | */ | ||
3069 | credits = bpc / 2 + 1; | ||
3070 | ret = ocfs2_extend_trans(handle, credits); | ||
3071 | if (ret) { | ||
3072 | mlog_errno(ret); | ||
3073 | goto out; | ||
3074 | } | ||
3075 | |||
3076 | ret = ocfs2_journal_access(handle, inode, prev_bh, | ||
3077 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3078 | if (ret) { | ||
3079 | mlog_errno(ret); | ||
3080 | goto out; | ||
3081 | } | ||
3082 | |||
3083 | for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) { | ||
3084 | old_bh = new_bh = NULL; | ||
3085 | new_bh = sb_getblk(inode->i_sb, new_blkno); | ||
3086 | if (!new_bh) { | ||
3087 | ret = -EIO; | ||
3088 | mlog_errno(ret); | ||
3089 | goto out; | ||
3090 | } | ||
3091 | |||
3092 | ocfs2_set_new_buffer_uptodate(inode, new_bh); | ||
3093 | |||
3094 | ret = ocfs2_journal_access(handle, inode, new_bh, | ||
3095 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
3096 | if (ret < 0) { | ||
3097 | mlog_errno(ret); | ||
3098 | brelse(new_bh); | ||
3099 | goto out; | ||
3100 | } | ||
3101 | |||
3102 | ret = ocfs2_read_block(inode, prev_blkno, &old_bh); | ||
3103 | if (ret < 0) { | ||
3104 | mlog_errno(ret); | ||
3105 | brelse(new_bh); | ||
3106 | goto out; | ||
3107 | } | ||
3108 | |||
3109 | memcpy(new_bh->b_data, old_bh->b_data, blocksize); | ||
3110 | |||
3111 | if (i == 0) { | ||
3112 | new_xh = (struct ocfs2_xattr_header *)new_bh->b_data; | ||
3113 | new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2); | ||
3114 | |||
3115 | if (first_hash) | ||
3116 | *first_hash = le32_to_cpu( | ||
3117 | new_xh->xh_entries[0].xe_name_hash); | ||
3118 | new_first_bh = new_bh; | ||
3119 | get_bh(new_first_bh); | ||
3120 | } | ||
3121 | |||
3122 | ocfs2_journal_dirty(handle, new_bh); | ||
3123 | |||
3124 | if (*header_bh == old_bh) { | ||
3125 | brelse(*header_bh); | ||
3126 | *header_bh = new_bh; | ||
3127 | get_bh(*header_bh); | ||
3128 | |||
3129 | brelse(*first_bh); | ||
3130 | *first_bh = new_first_bh; | ||
3131 | get_bh(*first_bh); | ||
3132 | } | ||
3133 | brelse(new_bh); | ||
3134 | brelse(old_bh); | ||
3135 | } | ||
3136 | |||
3137 | le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2)); | ||
3138 | |||
3139 | ocfs2_journal_dirty(handle, prev_bh); | ||
3140 | out: | ||
3141 | brelse(prev_bh); | ||
3142 | brelse(new_first_bh); | ||
3143 | return ret; | ||
3144 | } | ||
3145 | |||
3146 | static int ocfs2_read_xattr_bucket(struct inode *inode, | ||
3147 | u64 blkno, | ||
3148 | struct buffer_head **bhs, | ||
3149 | int new) | ||
3150 | { | ||
3151 | int ret = 0; | ||
3152 | u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
3153 | |||
3154 | if (!new) | ||
3155 | return ocfs2_read_blocks(inode, blkno, | ||
3156 | blk_per_bucket, bhs, | ||
3157 | OCFS2_BH_CACHED); | ||
3158 | |||
3159 | for (i = 0; i < blk_per_bucket; i++) { | ||
3160 | bhs[i] = sb_getblk(inode->i_sb, blkno + i); | ||
3161 | if (bhs[i] == NULL) { | ||
3162 | ret = -EIO; | ||
3163 | mlog_errno(ret); | ||
3164 | break; | ||
3165 | } | ||
3166 | ocfs2_set_new_buffer_uptodate(inode, bhs[i]); | ||
3167 | } | ||
3168 | |||
3169 | return ret; | ||
3170 | } | ||
3171 | |||
3172 | /* | ||
3173 | * Move half num of the xattrs in old bucket(blk) to new bucket(new_blk). | ||
3174 | * first_hash will record the 1st hash of the new bucket. | ||
3175 | */ | ||
3176 | static int ocfs2_half_xattr_bucket(struct inode *inode, | ||
3177 | handle_t *handle, | ||
3178 | u64 blk, | ||
3179 | u64 new_blk, | ||
3180 | u32 *first_hash, | ||
3181 | int new_bucket_head) | ||
3182 | { | ||
3183 | int ret, i; | ||
3184 | u16 count, start, len, name_value_len, xe_len, name_offset; | ||
3185 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
3186 | struct buffer_head **s_bhs, **t_bhs = NULL; | ||
3187 | struct ocfs2_xattr_header *xh; | ||
3188 | struct ocfs2_xattr_entry *xe; | ||
3189 | int blocksize = inode->i_sb->s_blocksize; | ||
3190 | |||
3191 | mlog(0, "move half of xattrs from bucket %llu to %llu\n", | ||
3192 | blk, new_blk); | ||
3193 | |||
3194 | s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); | ||
3195 | if (!s_bhs) | ||
3196 | return -ENOMEM; | ||
3197 | |||
3198 | ret = ocfs2_read_xattr_bucket(inode, blk, s_bhs, 0); | ||
3199 | if (ret) { | ||
3200 | mlog_errno(ret); | ||
3201 | goto out; | ||
3202 | } | ||
3203 | |||
3204 | ret = ocfs2_journal_access(handle, inode, s_bhs[0], | ||
3205 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3206 | if (ret) { | ||
3207 | mlog_errno(ret); | ||
3208 | goto out; | ||
3209 | } | ||
3210 | |||
3211 | t_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); | ||
3212 | if (!t_bhs) { | ||
3213 | ret = -ENOMEM; | ||
3214 | goto out; | ||
3215 | } | ||
3216 | |||
3217 | ret = ocfs2_read_xattr_bucket(inode, new_blk, t_bhs, new_bucket_head); | ||
3218 | if (ret) { | ||
3219 | mlog_errno(ret); | ||
3220 | goto out; | ||
3221 | } | ||
3222 | |||
3223 | for (i = 0; i < blk_per_bucket; i++) { | ||
3224 | ret = ocfs2_journal_access(handle, inode, t_bhs[i], | ||
3225 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
3226 | if (ret) { | ||
3227 | mlog_errno(ret); | ||
3228 | goto out; | ||
3229 | } | ||
3230 | } | ||
3231 | |||
3232 | /* copy the whole bucket to the new first. */ | ||
3233 | for (i = 0; i < blk_per_bucket; i++) | ||
3234 | memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize); | ||
3235 | |||
3236 | /* update the new bucket. */ | ||
3237 | xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; | ||
3238 | count = le16_to_cpu(xh->xh_count); | ||
3239 | start = count / 2; | ||
3240 | |||
3241 | /* | ||
3242 | * Calculate the total name/value len and xh_free_start for | ||
3243 | * the old bucket first. | ||
3244 | */ | ||
3245 | name_offset = OCFS2_XATTR_BUCKET_SIZE; | ||
3246 | name_value_len = 0; | ||
3247 | for (i = 0; i < start; i++) { | ||
3248 | xe = &xh->xh_entries[i]; | ||
3249 | xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len); | ||
3250 | if (ocfs2_xattr_is_local(xe)) | ||
3251 | xe_len += | ||
3252 | OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); | ||
3253 | else | ||
3254 | xe_len += OCFS2_XATTR_ROOT_SIZE; | ||
3255 | name_value_len += xe_len; | ||
3256 | if (le16_to_cpu(xe->xe_name_offset) < name_offset) | ||
3257 | name_offset = le16_to_cpu(xe->xe_name_offset); | ||
3258 | } | ||
3259 | |||
3260 | /* | ||
3261 | * Now begin the modification to the new bucket. | ||
3262 | * | ||
3263 | * In the new bucket, We just move the xattr entry to the beginning | ||
3264 | * and don't touch the name/value. So there will be some holes in the | ||
3265 | * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is | ||
3266 | * called. | ||
3267 | */ | ||
3268 | xe = &xh->xh_entries[start]; | ||
3269 | len = sizeof(struct ocfs2_xattr_entry) * (count - start); | ||
3270 | mlog(0, "mv xattr entry len %d from %d to %d\n", len, | ||
3271 | (int)((char *)xe - (char *)xh), | ||
3272 | (int)((char *)xh->xh_entries - (char *)xh)); | ||
3273 | memmove((char *)xh->xh_entries, (char *)xe, len); | ||
3274 | xe = &xh->xh_entries[count - start]; | ||
3275 | len = sizeof(struct ocfs2_xattr_entry) * start; | ||
3276 | memset((char *)xe, 0, len); | ||
3277 | |||
3278 | le16_add_cpu(&xh->xh_count, -start); | ||
3279 | le16_add_cpu(&xh->xh_name_value_len, -name_value_len); | ||
3280 | |||
3281 | /* Calculate xh_free_start for the new bucket. */ | ||
3282 | xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); | ||
3283 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { | ||
3284 | xe = &xh->xh_entries[i]; | ||
3285 | xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len); | ||
3286 | if (ocfs2_xattr_is_local(xe)) | ||
3287 | xe_len += | ||
3288 | OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); | ||
3289 | else | ||
3290 | xe_len += OCFS2_XATTR_ROOT_SIZE; | ||
3291 | if (le16_to_cpu(xe->xe_name_offset) < | ||
3292 | le16_to_cpu(xh->xh_free_start)) | ||
3293 | xh->xh_free_start = xe->xe_name_offset; | ||
3294 | } | ||
3295 | |||
3296 | /* set xh->xh_num_buckets for the new xh. */ | ||
3297 | if (new_bucket_head) | ||
3298 | xh->xh_num_buckets = cpu_to_le16(1); | ||
3299 | else | ||
3300 | xh->xh_num_buckets = 0; | ||
3301 | |||
3302 | for (i = 0; i < blk_per_bucket; i++) { | ||
3303 | ocfs2_journal_dirty(handle, t_bhs[i]); | ||
3304 | if (ret) | ||
3305 | mlog_errno(ret); | ||
3306 | } | ||
3307 | |||
3308 | /* store the first_hash of the new bucket. */ | ||
3309 | if (first_hash) | ||
3310 | *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); | ||
3311 | |||
3312 | /* | ||
3313 | * Now only update the 1st block of the old bucket. | ||
3314 | * Please note that the entry has been sorted already above. | ||
3315 | */ | ||
3316 | xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; | ||
3317 | memset(&xh->xh_entries[start], 0, | ||
3318 | sizeof(struct ocfs2_xattr_entry) * (count - start)); | ||
3319 | xh->xh_count = cpu_to_le16(start); | ||
3320 | xh->xh_free_start = cpu_to_le16(name_offset); | ||
3321 | xh->xh_name_value_len = cpu_to_le16(name_value_len); | ||
3322 | |||
3323 | ocfs2_journal_dirty(handle, s_bhs[0]); | ||
3324 | if (ret) | ||
3325 | mlog_errno(ret); | ||
3326 | |||
3327 | out: | ||
3328 | if (s_bhs) { | ||
3329 | for (i = 0; i < blk_per_bucket; i++) | ||
3330 | brelse(s_bhs[i]); | ||
3331 | } | ||
3332 | kfree(s_bhs); | ||
3333 | |||
3334 | if (t_bhs) { | ||
3335 | for (i = 0; i < blk_per_bucket; i++) | ||
3336 | brelse(t_bhs[i]); | ||
3337 | } | ||
3338 | kfree(t_bhs); | ||
3339 | |||
3340 | return ret; | ||
3341 | } | ||
3342 | |||
3343 | /* | ||
3344 | * Copy xattr from one bucket to another bucket. | ||
3345 | * | ||
3346 | * The caller must make sure that the journal transaction | ||
3347 | * has enough space for journaling. | ||
3348 | */ | ||
3349 | static int ocfs2_cp_xattr_bucket(struct inode *inode, | ||
3350 | handle_t *handle, | ||
3351 | u64 s_blkno, | ||
3352 | u64 t_blkno, | ||
3353 | int t_is_new) | ||
3354 | { | ||
3355 | int ret, i; | ||
3356 | int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
3357 | int blocksize = inode->i_sb->s_blocksize; | ||
3358 | struct buffer_head **s_bhs, **t_bhs = NULL; | ||
3359 | |||
3360 | BUG_ON(s_blkno == t_blkno); | ||
3361 | |||
3362 | mlog(0, "cp bucket %llu to %llu, target is %d\n", | ||
3363 | s_blkno, t_blkno, t_is_new); | ||
3364 | |||
3365 | s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, | ||
3366 | GFP_NOFS); | ||
3367 | if (!s_bhs) | ||
3368 | return -ENOMEM; | ||
3369 | |||
3370 | ret = ocfs2_read_xattr_bucket(inode, s_blkno, s_bhs, 0); | ||
3371 | if (ret) | ||
3372 | goto out; | ||
3373 | |||
3374 | t_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, | ||
3375 | GFP_NOFS); | ||
3376 | if (!t_bhs) { | ||
3377 | ret = -ENOMEM; | ||
3378 | goto out; | ||
3379 | } | ||
3380 | |||
3381 | ret = ocfs2_read_xattr_bucket(inode, t_blkno, t_bhs, t_is_new); | ||
3382 | if (ret) | ||
3383 | goto out; | ||
3384 | |||
3385 | for (i = 0; i < blk_per_bucket; i++) { | ||
3386 | ret = ocfs2_journal_access(handle, inode, t_bhs[i], | ||
3387 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3388 | if (ret) | ||
3389 | goto out; | ||
3390 | } | ||
3391 | |||
3392 | for (i = 0; i < blk_per_bucket; i++) { | ||
3393 | memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize); | ||
3394 | ocfs2_journal_dirty(handle, t_bhs[i]); | ||
3395 | } | ||
3396 | |||
3397 | out: | ||
3398 | if (s_bhs) { | ||
3399 | for (i = 0; i < blk_per_bucket; i++) | ||
3400 | brelse(s_bhs[i]); | ||
3401 | } | ||
3402 | kfree(s_bhs); | ||
3403 | |||
3404 | if (t_bhs) { | ||
3405 | for (i = 0; i < blk_per_bucket; i++) | ||
3406 | brelse(t_bhs[i]); | ||
3407 | } | ||
3408 | kfree(t_bhs); | ||
3409 | |||
3410 | return ret; | ||
3411 | } | ||
3412 | |||
3413 | /* | ||
3414 | * Copy one xattr cluster from src_blk to to_blk. | ||
3415 | * The to_blk will become the first bucket header of the cluster, so its | ||
3416 | * xh_num_buckets will be initialized as the bucket num in the cluster. | ||
3417 | */ | ||
3418 | static int ocfs2_cp_xattr_cluster(struct inode *inode, | ||
3419 | handle_t *handle, | ||
3420 | struct buffer_head *first_bh, | ||
3421 | u64 src_blk, | ||
3422 | u64 to_blk, | ||
3423 | u32 *first_hash) | ||
3424 | { | ||
3425 | int i, ret, credits; | ||
3426 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
3427 | int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
3428 | int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); | ||
3429 | struct buffer_head *bh = NULL; | ||
3430 | struct ocfs2_xattr_header *xh; | ||
3431 | u64 to_blk_start = to_blk; | ||
3432 | |||
3433 | mlog(0, "cp xattrs from cluster %llu to %llu\n", src_blk, to_blk); | ||
3434 | |||
3435 | /* | ||
3436 | * We need to update the new cluster and 1 more for the update of | ||
3437 | * the 1st bucket of the previous extent rec. | ||
3438 | */ | ||
3439 | credits = bpc + 1; | ||
3440 | ret = ocfs2_extend_trans(handle, credits); | ||
3441 | if (ret) { | ||
3442 | mlog_errno(ret); | ||
3443 | goto out; | ||
3444 | } | ||
3445 | |||
3446 | ret = ocfs2_journal_access(handle, inode, first_bh, | ||
3447 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3448 | if (ret) { | ||
3449 | mlog_errno(ret); | ||
3450 | goto out; | ||
3451 | } | ||
3452 | |||
3453 | for (i = 0; i < num_buckets; i++) { | ||
3454 | ret = ocfs2_cp_xattr_bucket(inode, handle, | ||
3455 | src_blk, to_blk, 1); | ||
3456 | if (ret) { | ||
3457 | mlog_errno(ret); | ||
3458 | goto out; | ||
3459 | } | ||
3460 | |||
3461 | src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
3462 | to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
3463 | } | ||
3464 | |||
3465 | /* update the old bucket header. */ | ||
3466 | xh = (struct ocfs2_xattr_header *)first_bh->b_data; | ||
3467 | le16_add_cpu(&xh->xh_num_buckets, -num_buckets); | ||
3468 | |||
3469 | ocfs2_journal_dirty(handle, first_bh); | ||
3470 | |||
3471 | /* update the new bucket header. */ | ||
3472 | ret = ocfs2_read_block(inode, to_blk_start, &bh); | ||
3473 | if (ret < 0) { | ||
3474 | mlog_errno(ret); | ||
3475 | goto out; | ||
3476 | } | ||
3477 | |||
3478 | ret = ocfs2_journal_access(handle, inode, bh, | ||
3479 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3480 | if (ret) { | ||
3481 | mlog_errno(ret); | ||
3482 | goto out; | ||
3483 | } | ||
3484 | |||
3485 | xh = (struct ocfs2_xattr_header *)bh->b_data; | ||
3486 | xh->xh_num_buckets = cpu_to_le16(num_buckets); | ||
3487 | |||
3488 | ocfs2_journal_dirty(handle, bh); | ||
3489 | |||
3490 | if (first_hash) | ||
3491 | *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); | ||
3492 | out: | ||
3493 | brelse(bh); | ||
3494 | return ret; | ||
3495 | } | ||
3496 | |||
3497 | /* | ||
3498 | * Move half of the xattrs in this cluster to the new cluster. | ||
3499 | * This function should only be called when bucket size == cluster size. | ||
3500 | * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. | ||
3501 | */ | ||
3502 | static int ocfs2_half_xattr_cluster(struct inode *inode, | ||
3503 | handle_t *handle, | ||
3504 | u64 prev_blk, | ||
3505 | u64 new_blk, | ||
3506 | u32 *first_hash) | ||
3507 | { | ||
3508 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
3509 | int ret, credits = 2 * blk_per_bucket; | ||
3510 | |||
3511 | BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); | ||
3512 | |||
3513 | ret = ocfs2_extend_trans(handle, credits); | ||
3514 | if (ret) { | ||
3515 | mlog_errno(ret); | ||
3516 | return ret; | ||
3517 | } | ||
3518 | |||
3519 | /* Move half of the xattr in start_blk to the next bucket. */ | ||
3520 | return ocfs2_half_xattr_bucket(inode, handle, prev_blk, | ||
3521 | new_blk, first_hash, 1); | ||
3522 | } | ||
3523 | |||
3524 | /* | ||
3525 | * Move some xattrs from the old cluster to the new one since they are not | ||
3526 | * contiguous in ocfs2 xattr tree. | ||
3527 | * | ||
3528 | * new_blk starts a new separate cluster, and we will move some xattrs from | ||
3529 | * prev_blk to it. v_start will be set as the first name hash value in this | ||
3530 | * new cluster so that it can be used as e_cpos during tree insertion and | ||
3531 | * don't collide with our original b-tree operations. first_bh and header_bh | ||
3532 | * will also be updated since they will be used in ocfs2_extend_xattr_bucket | ||
3533 | * to extend the insert bucket. | ||
3534 | * | ||
3535 | * The problem is how much xattr should we move to the new one and when should | ||
3536 | * we update first_bh and header_bh? | ||
3537 | * 1. If cluster size > bucket size, that means the previous cluster has more | ||
3538 | * than 1 bucket, so just move half nums of bucket into the new cluster and | ||
3539 | * update the first_bh and header_bh if the insert bucket has been moved | ||
3540 | * to the new cluster. | ||
3541 | * 2. If cluster_size == bucket_size: | ||
3542 | * a) If the previous extent rec has more than one cluster and the insert | ||
3543 | * place isn't in the last cluster, copy the entire last cluster to the | ||
3544 | * new one. This time, we don't need to upate the first_bh and header_bh | ||
3545 | * since they will not be moved into the new cluster. | ||
3546 | * b) Otherwise, move the bottom half of the xattrs in the last cluster into | ||
3547 | * the new one. And we set the extend flag to zero if the insert place is | ||
3548 | * moved into the new allocated cluster since no extend is needed. | ||
3549 | */ | ||
3550 | static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, | ||
3551 | handle_t *handle, | ||
3552 | struct buffer_head **first_bh, | ||
3553 | struct buffer_head **header_bh, | ||
3554 | u64 new_blk, | ||
3555 | u64 prev_blk, | ||
3556 | u32 prev_clusters, | ||
3557 | u32 *v_start, | ||
3558 | int *extend) | ||
3559 | { | ||
3560 | int ret = 0; | ||
3561 | int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
3562 | |||
3563 | mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n", | ||
3564 | prev_blk, prev_clusters, new_blk); | ||
3565 | |||
3566 | if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) | ||
3567 | ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, | ||
3568 | handle, | ||
3569 | first_bh, | ||
3570 | header_bh, | ||
3571 | new_blk, | ||
3572 | prev_blk, | ||
3573 | prev_clusters, | ||
3574 | v_start); | ||
3575 | else { | ||
3576 | u64 last_blk = prev_blk + bpc * (prev_clusters - 1); | ||
3577 | |||
3578 | if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk) | ||
3579 | ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh, | ||
3580 | last_blk, new_blk, | ||
3581 | v_start); | ||
3582 | else { | ||
3583 | ret = ocfs2_half_xattr_cluster(inode, handle, | ||
3584 | last_blk, new_blk, | ||
3585 | v_start); | ||
3586 | |||
3587 | if ((*header_bh)->b_blocknr == last_blk && extend) | ||
3588 | *extend = 0; | ||
3589 | } | ||
3590 | } | ||
3591 | |||
3592 | return ret; | ||
3593 | } | ||
3594 | |||
3595 | /* | ||
3596 | * Add a new cluster for xattr storage. | ||
3597 | * | ||
3598 | * If the new cluster is contiguous with the previous one, it will be | ||
3599 | * appended to the same extent record, and num_clusters will be updated. | ||
3600 | * If not, we will insert a new extent for it and move some xattrs in | ||
3601 | * the last cluster into the new allocated one. | ||
3602 | * We also need to limit the maximum size of a btree leaf, otherwise we'll | ||
3603 | * lose the benefits of hashing because we'll have to search large leaves. | ||
3604 | * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize, | ||
3605 | * if it's bigger). | ||
3606 | * | ||
3607 | * first_bh is the first block of the previous extent rec and header_bh | ||
3608 | * indicates the bucket we will insert the new xattrs. They will be updated | ||
3609 | * when the header_bh is moved into the new cluster. | ||
3610 | */ | ||
3611 | static int ocfs2_add_new_xattr_cluster(struct inode *inode, | ||
3612 | struct buffer_head *root_bh, | ||
3613 | struct buffer_head **first_bh, | ||
3614 | struct buffer_head **header_bh, | ||
3615 | u32 *num_clusters, | ||
3616 | u32 prev_cpos, | ||
3617 | u64 prev_blkno, | ||
3618 | int *extend) | ||
3619 | { | ||
3620 | int ret, credits; | ||
3621 | u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
3622 | u32 prev_clusters = *num_clusters; | ||
3623 | u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; | ||
3624 | u64 block; | ||
3625 | handle_t *handle = NULL; | ||
3626 | struct ocfs2_alloc_context *data_ac = NULL; | ||
3627 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
3628 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
3629 | struct ocfs2_extent_tree et; | ||
3630 | |||
3631 | mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, " | ||
3632 | "previous xattr blkno = %llu\n", | ||
3633 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
3634 | prev_cpos, prev_blkno); | ||
3635 | |||
3636 | ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); | ||
3637 | |||
3638 | ret = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, | ||
3639 | &data_ac, &meta_ac); | ||
3640 | if (ret) { | ||
3641 | mlog_errno(ret); | ||
3642 | goto leave; | ||
3643 | } | ||
3644 | |||
3645 | credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el, | ||
3646 | clusters_to_add); | ||
3647 | handle = ocfs2_start_trans(osb, credits); | ||
3648 | if (IS_ERR(handle)) { | ||
3649 | ret = PTR_ERR(handle); | ||
3650 | handle = NULL; | ||
3651 | mlog_errno(ret); | ||
3652 | goto leave; | ||
3653 | } | ||
3654 | |||
3655 | ret = ocfs2_journal_access(handle, inode, root_bh, | ||
3656 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3657 | if (ret < 0) { | ||
3658 | mlog_errno(ret); | ||
3659 | goto leave; | ||
3660 | } | ||
3661 | |||
3662 | ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1, | ||
3663 | clusters_to_add, &bit_off, &num_bits); | ||
3664 | if (ret < 0) { | ||
3665 | if (ret != -ENOSPC) | ||
3666 | mlog_errno(ret); | ||
3667 | goto leave; | ||
3668 | } | ||
3669 | |||
3670 | BUG_ON(num_bits > clusters_to_add); | ||
3671 | |||
3672 | block = ocfs2_clusters_to_blocks(osb->sb, bit_off); | ||
3673 | mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n", | ||
3674 | num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
3675 | |||
3676 | if (prev_blkno + prev_clusters * bpc == block && | ||
3677 | (prev_clusters + num_bits) << osb->s_clustersize_bits <= | ||
3678 | OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { | ||
3679 | /* | ||
3680 | * If this cluster is contiguous with the old one and | ||
3681 | * adding this new cluster, we don't surpass the limit of | ||
3682 | * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be | ||
3683 | * initialized and used like other buckets in the previous | ||
3684 | * cluster. | ||
3685 | * So add it as a contiguous one. The caller will handle | ||
3686 | * its init process. | ||
3687 | */ | ||
3688 | v_start = prev_cpos + prev_clusters; | ||
3689 | *num_clusters = prev_clusters + num_bits; | ||
3690 | mlog(0, "Add contiguous %u clusters to previous extent rec.\n", | ||
3691 | num_bits); | ||
3692 | } else { | ||
3693 | ret = ocfs2_adjust_xattr_cross_cluster(inode, | ||
3694 | handle, | ||
3695 | first_bh, | ||
3696 | header_bh, | ||
3697 | block, | ||
3698 | prev_blkno, | ||
3699 | prev_clusters, | ||
3700 | &v_start, | ||
3701 | extend); | ||
3702 | if (ret) { | ||
3703 | mlog_errno(ret); | ||
3704 | goto leave; | ||
3705 | } | ||
3706 | } | ||
3707 | |||
3708 | if (handle->h_buffer_credits < credits) { | ||
3709 | /* | ||
3710 | * The journal has been restarted before, and don't | ||
3711 | * have enough space for the insertion, so extend it | ||
3712 | * here. | ||
3713 | */ | ||
3714 | ret = ocfs2_extend_trans(handle, credits); | ||
3715 | if (ret) { | ||
3716 | mlog_errno(ret); | ||
3717 | goto leave; | ||
3718 | } | ||
3719 | } | ||
3720 | mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", | ||
3721 | num_bits, block, v_start); | ||
3722 | ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block, | ||
3723 | num_bits, 0, meta_ac); | ||
3724 | if (ret < 0) { | ||
3725 | mlog_errno(ret); | ||
3726 | goto leave; | ||
3727 | } | ||
3728 | |||
3729 | ret = ocfs2_journal_dirty(handle, root_bh); | ||
3730 | if (ret < 0) { | ||
3731 | mlog_errno(ret); | ||
3732 | goto leave; | ||
3733 | } | ||
3734 | |||
3735 | leave: | ||
3736 | if (handle) | ||
3737 | ocfs2_commit_trans(osb, handle); | ||
3738 | if (data_ac) | ||
3739 | ocfs2_free_alloc_context(data_ac); | ||
3740 | if (meta_ac) | ||
3741 | ocfs2_free_alloc_context(meta_ac); | ||
3742 | |||
3743 | return ret; | ||
3744 | } | ||
3745 | |||
3746 | /* | ||
3747 | * Extend a new xattr bucket and move xattrs to the end one by one until | ||
3748 | * We meet with start_bh. Only move half of the xattrs to the bucket after it. | ||
3749 | */ | ||
3750 | static int ocfs2_extend_xattr_bucket(struct inode *inode, | ||
3751 | struct buffer_head *first_bh, | ||
3752 | struct buffer_head *start_bh, | ||
3753 | u32 num_clusters) | ||
3754 | { | ||
3755 | int ret, credits; | ||
3756 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
3757 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
3758 | u64 start_blk = start_bh->b_blocknr, end_blk; | ||
3759 | u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb); | ||
3760 | handle_t *handle; | ||
3761 | struct ocfs2_xattr_header *first_xh = | ||
3762 | (struct ocfs2_xattr_header *)first_bh->b_data; | ||
3763 | u16 bucket = le16_to_cpu(first_xh->xh_num_buckets); | ||
3764 | |||
3765 | mlog(0, "extend xattr bucket in %llu, xattr extend rec starting " | ||
3766 | "from %llu, len = %u\n", start_blk, | ||
3767 | (unsigned long long)first_bh->b_blocknr, num_clusters); | ||
3768 | |||
3769 | BUG_ON(bucket >= num_buckets); | ||
3770 | |||
3771 | end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket; | ||
3772 | |||
3773 | /* | ||
3774 | * We will touch all the buckets after the start_bh(include it). | ||
3775 | * Add one more bucket and modify the first_bh. | ||
3776 | */ | ||
3777 | credits = end_blk - start_blk + 2 * blk_per_bucket + 1; | ||
3778 | handle = ocfs2_start_trans(osb, credits); | ||
3779 | if (IS_ERR(handle)) { | ||
3780 | ret = PTR_ERR(handle); | ||
3781 | handle = NULL; | ||
3782 | mlog_errno(ret); | ||
3783 | goto out; | ||
3784 | } | ||
3785 | |||
3786 | ret = ocfs2_journal_access(handle, inode, first_bh, | ||
3787 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3788 | if (ret) { | ||
3789 | mlog_errno(ret); | ||
3790 | goto commit; | ||
3791 | } | ||
3792 | |||
3793 | while (end_blk != start_blk) { | ||
3794 | ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, | ||
3795 | end_blk + blk_per_bucket, 0); | ||
3796 | if (ret) | ||
3797 | goto commit; | ||
3798 | end_blk -= blk_per_bucket; | ||
3799 | } | ||
3800 | |||
3801 | /* Move half of the xattr in start_blk to the next bucket. */ | ||
3802 | ret = ocfs2_half_xattr_bucket(inode, handle, start_blk, | ||
3803 | start_blk + blk_per_bucket, NULL, 0); | ||
3804 | |||
3805 | le16_add_cpu(&first_xh->xh_num_buckets, 1); | ||
3806 | ocfs2_journal_dirty(handle, first_bh); | ||
3807 | |||
3808 | commit: | ||
3809 | ocfs2_commit_trans(osb, handle); | ||
3810 | out: | ||
3811 | return ret; | ||
3812 | } | ||
3813 | |||
3814 | /* | ||
3815 | * Add new xattr bucket in an extent record and adjust the buckets accordingly. | ||
3816 | * xb_bh is the ocfs2_xattr_block. | ||
3817 | * We will move all the buckets starting from header_bh to the next place. As | ||
3818 | * for this one, half num of its xattrs will be moved to the next one. | ||
3819 | * | ||
3820 | * We will allocate a new cluster if current cluster is full and adjust | ||
3821 | * header_bh and first_bh if the insert place is moved to the new cluster. | ||
3822 | */ | ||
3823 | static int ocfs2_add_new_xattr_bucket(struct inode *inode, | ||
3824 | struct buffer_head *xb_bh, | ||
3825 | struct buffer_head *header_bh) | ||
3826 | { | ||
3827 | struct ocfs2_xattr_header *first_xh = NULL; | ||
3828 | struct buffer_head *first_bh = NULL; | ||
3829 | struct ocfs2_xattr_block *xb = | ||
3830 | (struct ocfs2_xattr_block *)xb_bh->b_data; | ||
3831 | struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; | ||
3832 | struct ocfs2_extent_list *el = &xb_root->xt_list; | ||
3833 | struct ocfs2_xattr_header *xh = | ||
3834 | (struct ocfs2_xattr_header *)header_bh->b_data; | ||
3835 | u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); | ||
3836 | struct super_block *sb = inode->i_sb; | ||
3837 | struct ocfs2_super *osb = OCFS2_SB(sb); | ||
3838 | int ret, num_buckets, extend = 1; | ||
3839 | u64 p_blkno; | ||
3840 | u32 e_cpos, num_clusters; | ||
3841 | |||
3842 | mlog(0, "Add new xattr bucket starting form %llu\n", | ||
3843 | (unsigned long long)header_bh->b_blocknr); | ||
3844 | |||
3845 | /* | ||
3846 | * Add refrence for header_bh here because it may be | ||
3847 | * changed in ocfs2_add_new_xattr_cluster and we need | ||
3848 | * to free it in the end. | ||
3849 | */ | ||
3850 | get_bh(header_bh); | ||
3851 | |||
3852 | ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, | ||
3853 | &num_clusters, el); | ||
3854 | if (ret) { | ||
3855 | mlog_errno(ret); | ||
3856 | goto out; | ||
3857 | } | ||
3858 | |||
3859 | ret = ocfs2_read_block(inode, p_blkno, &first_bh); | ||
3860 | if (ret) { | ||
3861 | mlog_errno(ret); | ||
3862 | goto out; | ||
3863 | } | ||
3864 | |||
3865 | num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; | ||
3866 | first_xh = (struct ocfs2_xattr_header *)first_bh->b_data; | ||
3867 | |||
3868 | if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) { | ||
3869 | ret = ocfs2_add_new_xattr_cluster(inode, | ||
3870 | xb_bh, | ||
3871 | &first_bh, | ||
3872 | &header_bh, | ||
3873 | &num_clusters, | ||
3874 | e_cpos, | ||
3875 | p_blkno, | ||
3876 | &extend); | ||
3877 | if (ret) { | ||
3878 | mlog_errno(ret); | ||
3879 | goto out; | ||
3880 | } | ||
3881 | } | ||
3882 | |||
3883 | if (extend) | ||
3884 | ret = ocfs2_extend_xattr_bucket(inode, | ||
3885 | first_bh, | ||
3886 | header_bh, | ||
3887 | num_clusters); | ||
3888 | if (ret) | ||
3889 | mlog_errno(ret); | ||
3890 | out: | ||
3891 | brelse(first_bh); | ||
3892 | brelse(header_bh); | ||
3893 | return ret; | ||
3894 | } | ||
3895 | |||
3896 | static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode, | ||
3897 | struct ocfs2_xattr_bucket *bucket, | ||
3898 | int offs) | ||
3899 | { | ||
3900 | int block_off = offs >> inode->i_sb->s_blocksize_bits; | ||
3901 | |||
3902 | offs = offs % inode->i_sb->s_blocksize; | ||
3903 | return bucket->bhs[block_off]->b_data + offs; | ||
3904 | } | ||
3905 | |||
3906 | /* | ||
3907 | * Handle the normal xattr set, including replace, delete and new. | ||
3908 | * | ||
3909 | * Note: "local" indicates the real data's locality. So we can't | ||
3910 | * just its bucket locality by its length. | ||
3911 | */ | ||
3912 | static void ocfs2_xattr_set_entry_normal(struct inode *inode, | ||
3913 | struct ocfs2_xattr_info *xi, | ||
3914 | struct ocfs2_xattr_search *xs, | ||
3915 | u32 name_hash, | ||
3916 | int local) | ||
3917 | { | ||
3918 | struct ocfs2_xattr_entry *last, *xe; | ||
3919 | int name_len = strlen(xi->name); | ||
3920 | struct ocfs2_xattr_header *xh = xs->header; | ||
3921 | u16 count = le16_to_cpu(xh->xh_count), start; | ||
3922 | size_t blocksize = inode->i_sb->s_blocksize; | ||
3923 | char *val; | ||
3924 | size_t offs, size, new_size; | ||
3925 | |||
3926 | last = &xh->xh_entries[count]; | ||
3927 | if (!xs->not_found) { | ||
3928 | xe = xs->here; | ||
3929 | offs = le16_to_cpu(xe->xe_name_offset); | ||
3930 | if (ocfs2_xattr_is_local(xe)) | ||
3931 | size = OCFS2_XATTR_SIZE(name_len) + | ||
3932 | OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); | ||
3933 | else | ||
3934 | size = OCFS2_XATTR_SIZE(name_len) + | ||
3935 | OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE); | ||
3936 | |||
3937 | /* | ||
3938 | * If the new value will be stored outside, xi->value has been | ||
3939 | * initalized as an empty ocfs2_xattr_value_root, and the same | ||
3940 | * goes with xi->value_len, so we can set new_size safely here. | ||
3941 | * See ocfs2_xattr_set_in_bucket. | ||
3942 | */ | ||
3943 | new_size = OCFS2_XATTR_SIZE(name_len) + | ||
3944 | OCFS2_XATTR_SIZE(xi->value_len); | ||
3945 | |||
3946 | le16_add_cpu(&xh->xh_name_value_len, -size); | ||
3947 | if (xi->value) { | ||
3948 | if (new_size > size) | ||
3949 | goto set_new_name_value; | ||
3950 | |||
3951 | /* Now replace the old value with new one. */ | ||
3952 | if (local) | ||
3953 | xe->xe_value_size = cpu_to_le64(xi->value_len); | ||
3954 | else | ||
3955 | xe->xe_value_size = 0; | ||
3956 | |||
3957 | val = ocfs2_xattr_bucket_get_val(inode, | ||
3958 | &xs->bucket, offs); | ||
3959 | memset(val + OCFS2_XATTR_SIZE(name_len), 0, | ||
3960 | size - OCFS2_XATTR_SIZE(name_len)); | ||
3961 | if (OCFS2_XATTR_SIZE(xi->value_len) > 0) | ||
3962 | memcpy(val + OCFS2_XATTR_SIZE(name_len), | ||
3963 | xi->value, xi->value_len); | ||
3964 | |||
3965 | le16_add_cpu(&xh->xh_name_value_len, new_size); | ||
3966 | ocfs2_xattr_set_local(xe, local); | ||
3967 | return; | ||
3968 | } else { | ||
3969 | /* | ||
3970 | * Remove the old entry if there is more than one. | ||
3971 | * We don't remove the last entry so that we can | ||
3972 | * use it to indicate the hash value of the empty | ||
3973 | * bucket. | ||
3974 | */ | ||
3975 | last -= 1; | ||
3976 | le16_add_cpu(&xh->xh_count, -1); | ||
3977 | if (xh->xh_count) { | ||
3978 | memmove(xe, xe + 1, | ||
3979 | (void *)last - (void *)xe); | ||
3980 | memset(last, 0, | ||
3981 | sizeof(struct ocfs2_xattr_entry)); | ||
3982 | } else | ||
3983 | xh->xh_free_start = | ||
3984 | cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); | ||
3985 | |||
3986 | return; | ||
3987 | } | ||
3988 | } else { | ||
3989 | /* find a new entry for insert. */ | ||
3990 | int low = 0, high = count - 1, tmp; | ||
3991 | struct ocfs2_xattr_entry *tmp_xe; | ||
3992 | |||
3993 | while (low <= high && count) { | ||
3994 | tmp = (low + high) / 2; | ||
3995 | tmp_xe = &xh->xh_entries[tmp]; | ||
3996 | |||
3997 | if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) | ||
3998 | low = tmp + 1; | ||
3999 | else if (name_hash < | ||
4000 | le32_to_cpu(tmp_xe->xe_name_hash)) | ||
4001 | high = tmp - 1; | ||
4002 | else { | ||
4003 | low = tmp; | ||
4004 | break; | ||
4005 | } | ||
4006 | } | ||
4007 | |||
4008 | xe = &xh->xh_entries[low]; | ||
4009 | if (low != count) | ||
4010 | memmove(xe + 1, xe, (void *)last - (void *)xe); | ||
4011 | |||
4012 | le16_add_cpu(&xh->xh_count, 1); | ||
4013 | memset(xe, 0, sizeof(struct ocfs2_xattr_entry)); | ||
4014 | xe->xe_name_hash = cpu_to_le32(name_hash); | ||
4015 | xe->xe_name_len = name_len; | ||
4016 | ocfs2_xattr_set_type(xe, xi->name_index); | ||
4017 | } | ||
4018 | |||
4019 | set_new_name_value: | ||
4020 | /* Insert the new name+value. */ | ||
4021 | size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len); | ||
4022 | |||
4023 | /* | ||
4024 | * We must make sure that the name/value pair | ||
4025 | * exists in the same block. | ||
4026 | */ | ||
4027 | offs = le16_to_cpu(xh->xh_free_start); | ||
4028 | start = offs - size; | ||
4029 | |||
4030 | if (start >> inode->i_sb->s_blocksize_bits != | ||
4031 | (offs - 1) >> inode->i_sb->s_blocksize_bits) { | ||
4032 | offs = offs - offs % blocksize; | ||
4033 | xh->xh_free_start = cpu_to_le16(offs); | ||
4034 | } | ||
4035 | |||
4036 | val = ocfs2_xattr_bucket_get_val(inode, | ||
4037 | &xs->bucket, offs - size); | ||
4038 | xe->xe_name_offset = cpu_to_le16(offs - size); | ||
4039 | |||
4040 | memset(val, 0, size); | ||
4041 | memcpy(val, xi->name, name_len); | ||
4042 | memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len); | ||
4043 | |||
4044 | xe->xe_value_size = cpu_to_le64(xi->value_len); | ||
4045 | ocfs2_xattr_set_local(xe, local); | ||
4046 | xs->here = xe; | ||
4047 | le16_add_cpu(&xh->xh_free_start, -size); | ||
4048 | le16_add_cpu(&xh->xh_name_value_len, size); | ||
4049 | |||
4050 | return; | ||
4051 | } | ||
4052 | |||
4053 | static int ocfs2_xattr_bucket_handle_journal(struct inode *inode, | ||
4054 | handle_t *handle, | ||
4055 | struct ocfs2_xattr_search *xs, | ||
4056 | struct buffer_head **bhs, | ||
4057 | u16 bh_num) | ||
4058 | { | ||
4059 | int ret = 0, off, block_off; | ||
4060 | struct ocfs2_xattr_entry *xe = xs->here; | ||
4061 | |||
4062 | /* | ||
4063 | * First calculate all the blocks we should journal_access | ||
4064 | * and journal_dirty. The first block should always be touched. | ||
4065 | */ | ||
4066 | ret = ocfs2_journal_dirty(handle, bhs[0]); | ||
4067 | if (ret) | ||
4068 | mlog_errno(ret); | ||
4069 | |||
4070 | /* calc the data. */ | ||
4071 | off = le16_to_cpu(xe->xe_name_offset); | ||
4072 | block_off = off >> inode->i_sb->s_blocksize_bits; | ||
4073 | ret = ocfs2_journal_dirty(handle, bhs[block_off]); | ||
4074 | if (ret) | ||
4075 | mlog_errno(ret); | ||
4076 | |||
4077 | return ret; | ||
4078 | } | ||
4079 | |||
4080 | /* | ||
4081 | * Set the xattr entry in the specified bucket. | ||
4082 | * The bucket is indicated by xs->bucket and it should have the enough | ||
4083 | * space for the xattr insertion. | ||
4084 | */ | ||
4085 | static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, | ||
4086 | struct ocfs2_xattr_info *xi, | ||
4087 | struct ocfs2_xattr_search *xs, | ||
4088 | u32 name_hash, | ||
4089 | int local) | ||
4090 | { | ||
4091 | int i, ret; | ||
4092 | handle_t *handle = NULL; | ||
4093 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
4094 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
4095 | |||
4096 | mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n", | ||
4097 | (unsigned long)xi->value_len, xi->name_index, | ||
4098 | (unsigned long long)xs->bucket.bhs[0]->b_blocknr); | ||
4099 | |||
4100 | if (!xs->bucket.bhs[1]) { | ||
4101 | ret = ocfs2_read_blocks(inode, | ||
4102 | xs->bucket.bhs[0]->b_blocknr + 1, | ||
4103 | blk_per_bucket - 1, &xs->bucket.bhs[1], | ||
4104 | OCFS2_BH_CACHED); | ||
4105 | if (ret) { | ||
4106 | mlog_errno(ret); | ||
4107 | goto out; | ||
4108 | } | ||
4109 | } | ||
4110 | |||
4111 | handle = ocfs2_start_trans(osb, blk_per_bucket); | ||
4112 | if (IS_ERR(handle)) { | ||
4113 | ret = PTR_ERR(handle); | ||
4114 | handle = NULL; | ||
4115 | mlog_errno(ret); | ||
4116 | goto out; | ||
4117 | } | ||
4118 | |||
4119 | for (i = 0; i < blk_per_bucket; i++) { | ||
4120 | ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[i], | ||
4121 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
4122 | if (ret < 0) { | ||
4123 | mlog_errno(ret); | ||
4124 | goto out; | ||
4125 | } | ||
4126 | } | ||
4127 | |||
4128 | ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local); | ||
4129 | |||
4130 | /*Only dirty the blocks we have touched in set xattr. */ | ||
4131 | ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs, | ||
4132 | xs->bucket.bhs, blk_per_bucket); | ||
4133 | if (ret) | ||
4134 | mlog_errno(ret); | ||
4135 | out: | ||
4136 | ocfs2_commit_trans(osb, handle); | ||
4137 | |||
4138 | return ret; | ||
4139 | } | ||
4140 | |||
4141 | static int ocfs2_xattr_value_update_size(struct inode *inode, | ||
4142 | struct buffer_head *xe_bh, | ||
4143 | struct ocfs2_xattr_entry *xe, | ||
4144 | u64 new_size) | ||
4145 | { | ||
4146 | int ret; | ||
4147 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
4148 | handle_t *handle = NULL; | ||
4149 | |||
4150 | handle = ocfs2_start_trans(osb, 1); | ||
4151 | if (handle == NULL) { | ||
4152 | ret = -ENOMEM; | ||
4153 | mlog_errno(ret); | ||
4154 | goto out; | ||
4155 | } | ||
4156 | |||
4157 | ret = ocfs2_journal_access(handle, inode, xe_bh, | ||
4158 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
4159 | if (ret < 0) { | ||
4160 | mlog_errno(ret); | ||
4161 | goto out_commit; | ||
4162 | } | ||
4163 | |||
4164 | xe->xe_value_size = cpu_to_le64(new_size); | ||
4165 | |||
4166 | ret = ocfs2_journal_dirty(handle, xe_bh); | ||
4167 | if (ret < 0) | ||
4168 | mlog_errno(ret); | ||
4169 | |||
4170 | out_commit: | ||
4171 | ocfs2_commit_trans(osb, handle); | ||
4172 | out: | ||
4173 | return ret; | ||
4174 | } | ||
4175 | |||
4176 | /* | ||
4177 | * Truncate the specified xe_off entry in xattr bucket. | ||
4178 | * bucket is indicated by header_bh and len is the new length. | ||
4179 | * Both the ocfs2_xattr_value_root and the entry will be updated here. | ||
4180 | * | ||
4181 | * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. | ||
4182 | */ | ||
4183 | static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, | ||
4184 | struct buffer_head *header_bh, | ||
4185 | int xe_off, | ||
4186 | int len) | ||
4187 | { | ||
4188 | int ret, offset; | ||
4189 | u64 value_blk; | ||
4190 | struct buffer_head *value_bh = NULL; | ||
4191 | struct ocfs2_xattr_value_root *xv; | ||
4192 | struct ocfs2_xattr_entry *xe; | ||
4193 | struct ocfs2_xattr_header *xh = | ||
4194 | (struct ocfs2_xattr_header *)header_bh->b_data; | ||
4195 | size_t blocksize = inode->i_sb->s_blocksize; | ||
4196 | |||
4197 | xe = &xh->xh_entries[xe_off]; | ||
4198 | |||
4199 | BUG_ON(!xe || ocfs2_xattr_is_local(xe)); | ||
4200 | |||
4201 | offset = le16_to_cpu(xe->xe_name_offset) + | ||
4202 | OCFS2_XATTR_SIZE(xe->xe_name_len); | ||
4203 | |||
4204 | value_blk = offset / blocksize; | ||
4205 | |||
4206 | /* We don't allow ocfs2_xattr_value to be stored in different block. */ | ||
4207 | BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); | ||
4208 | value_blk += header_bh->b_blocknr; | ||
4209 | |||
4210 | ret = ocfs2_read_block(inode, value_blk, &value_bh); | ||
4211 | if (ret) { | ||
4212 | mlog_errno(ret); | ||
4213 | goto out; | ||
4214 | } | ||
4215 | |||
4216 | xv = (struct ocfs2_xattr_value_root *) | ||
4217 | (value_bh->b_data + offset % blocksize); | ||
4218 | |||
4219 | mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n", | ||
4220 | xe_off, (unsigned long long)header_bh->b_blocknr, len); | ||
4221 | ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len); | ||
4222 | if (ret) { | ||
4223 | mlog_errno(ret); | ||
4224 | goto out; | ||
4225 | } | ||
4226 | |||
4227 | ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len); | ||
4228 | if (ret) { | ||
4229 | mlog_errno(ret); | ||
4230 | goto out; | ||
4231 | } | ||
4232 | |||
4233 | out: | ||
4234 | brelse(value_bh); | ||
4235 | return ret; | ||
4236 | } | ||
4237 | |||
4238 | static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode, | ||
4239 | struct ocfs2_xattr_search *xs, | ||
4240 | int len) | ||
4241 | { | ||
4242 | int ret, offset; | ||
4243 | struct ocfs2_xattr_entry *xe = xs->here; | ||
4244 | struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base; | ||
4245 | |||
4246 | BUG_ON(!xs->bucket.bhs[0] || !xe || ocfs2_xattr_is_local(xe)); | ||
4247 | |||
4248 | offset = xe - xh->xh_entries; | ||
4249 | ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bhs[0], | ||
4250 | offset, len); | ||
4251 | if (ret) | ||
4252 | mlog_errno(ret); | ||
4253 | |||
4254 | return ret; | ||
4255 | } | ||
4256 | |||
4257 | static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode, | ||
4258 | struct ocfs2_xattr_search *xs, | ||
4259 | char *val, | ||
4260 | int value_len) | ||
4261 | { | ||
4262 | int offset; | ||
4263 | struct ocfs2_xattr_value_root *xv; | ||
4264 | struct ocfs2_xattr_entry *xe = xs->here; | ||
4265 | |||
4266 | BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe)); | ||
4267 | |||
4268 | offset = le16_to_cpu(xe->xe_name_offset) + | ||
4269 | OCFS2_XATTR_SIZE(xe->xe_name_len); | ||
4270 | |||
4271 | xv = (struct ocfs2_xattr_value_root *)(xs->base + offset); | ||
4272 | |||
4273 | return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len); | ||
4274 | } | ||
4275 | |||
4276 | static int ocfs2_rm_xattr_cluster(struct inode *inode, | ||
4277 | struct buffer_head *root_bh, | ||
4278 | u64 blkno, | ||
4279 | u32 cpos, | ||
4280 | u32 len) | ||
4281 | { | ||
4282 | int ret; | ||
4283 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
4284 | struct inode *tl_inode = osb->osb_tl_inode; | ||
4285 | handle_t *handle; | ||
4286 | struct ocfs2_xattr_block *xb = | ||
4287 | (struct ocfs2_xattr_block *)root_bh->b_data; | ||
4288 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
4289 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
4290 | struct ocfs2_extent_tree et; | ||
4291 | |||
4292 | ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); | ||
4293 | |||
4294 | ocfs2_init_dealloc_ctxt(&dealloc); | ||
4295 | |||
4296 | mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n", | ||
4297 | cpos, len, (unsigned long long)blkno); | ||
4298 | |||
4299 | ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len); | ||
4300 | |||
4301 | ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); | ||
4302 | if (ret) { | ||
4303 | mlog_errno(ret); | ||
4304 | return ret; | ||
4305 | } | ||
4306 | |||
4307 | mutex_lock(&tl_inode->i_mutex); | ||
4308 | |||
4309 | if (ocfs2_truncate_log_needs_flush(osb)) { | ||
4310 | ret = __ocfs2_flush_truncate_log(osb); | ||
4311 | if (ret < 0) { | ||
4312 | mlog_errno(ret); | ||
4313 | goto out; | ||
4314 | } | ||
4315 | } | ||
4316 | |||
4317 | handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); | ||
4318 | if (handle == NULL) { | ||
4319 | ret = -ENOMEM; | ||
4320 | mlog_errno(ret); | ||
4321 | goto out; | ||
4322 | } | ||
4323 | |||
4324 | ret = ocfs2_journal_access(handle, inode, root_bh, | ||
4325 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
4326 | if (ret) { | ||
4327 | mlog_errno(ret); | ||
4328 | goto out_commit; | ||
4329 | } | ||
4330 | |||
4331 | ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac, | ||
4332 | &dealloc); | ||
4333 | if (ret) { | ||
4334 | mlog_errno(ret); | ||
4335 | goto out_commit; | ||
4336 | } | ||
4337 | |||
4338 | le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); | ||
4339 | |||
4340 | ret = ocfs2_journal_dirty(handle, root_bh); | ||
4341 | if (ret) { | ||
4342 | mlog_errno(ret); | ||
4343 | goto out_commit; | ||
4344 | } | ||
4345 | |||
4346 | ret = ocfs2_truncate_log_append(osb, handle, blkno, len); | ||
4347 | if (ret) | ||
4348 | mlog_errno(ret); | ||
4349 | |||
4350 | out_commit: | ||
4351 | ocfs2_commit_trans(osb, handle); | ||
4352 | out: | ||
4353 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
4354 | |||
4355 | mutex_unlock(&tl_inode->i_mutex); | ||
4356 | |||
4357 | if (meta_ac) | ||
4358 | ocfs2_free_alloc_context(meta_ac); | ||
4359 | |||
4360 | ocfs2_run_deallocs(osb, &dealloc); | ||
4361 | |||
4362 | return ret; | ||
4363 | } | ||
4364 | |||
4365 | static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, | ||
4366 | struct ocfs2_xattr_search *xs) | ||
4367 | { | ||
4368 | handle_t *handle = NULL; | ||
4369 | struct ocfs2_xattr_header *xh = xs->bucket.xh; | ||
4370 | struct ocfs2_xattr_entry *last = &xh->xh_entries[ | ||
4371 | le16_to_cpu(xh->xh_count) - 1]; | ||
4372 | int ret = 0; | ||
4373 | |||
4374 | handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1); | ||
4375 | if (IS_ERR(handle)) { | ||
4376 | ret = PTR_ERR(handle); | ||
4377 | mlog_errno(ret); | ||
4378 | return; | ||
4379 | } | ||
4380 | |||
4381 | ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[0], | ||
4382 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
4383 | if (ret) { | ||
4384 | mlog_errno(ret); | ||
4385 | goto out_commit; | ||
4386 | } | ||
4387 | |||
4388 | /* Remove the old entry. */ | ||
4389 | memmove(xs->here, xs->here + 1, | ||
4390 | (void *)last - (void *)xs->here); | ||
4391 | memset(last, 0, sizeof(struct ocfs2_xattr_entry)); | ||
4392 | le16_add_cpu(&xh->xh_count, -1); | ||
4393 | |||
4394 | ret = ocfs2_journal_dirty(handle, xs->bucket.bhs[0]); | ||
4395 | if (ret < 0) | ||
4396 | mlog_errno(ret); | ||
4397 | out_commit: | ||
4398 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
4399 | } | ||
4400 | |||
4401 | /* | ||
4402 | * Set the xattr name/value in the bucket specified in xs. | ||
4403 | * | ||
4404 | * As the new value in xi may be stored in the bucket or in an outside cluster, | ||
4405 | * we divide the whole process into 3 steps: | ||
4406 | * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket) | ||
4407 | * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs) | ||
4408 | * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside) | ||
4409 | * 4. If the clusters for the new outside value can't be allocated, we need | ||
4410 | * to free the xattr we allocated in set. | ||
4411 | */ | ||
4412 | static int ocfs2_xattr_set_in_bucket(struct inode *inode, | ||
4413 | struct ocfs2_xattr_info *xi, | ||
4414 | struct ocfs2_xattr_search *xs) | ||
4415 | { | ||
4416 | int ret, local = 1; | ||
4417 | size_t value_len; | ||
4418 | char *val = (char *)xi->value; | ||
4419 | struct ocfs2_xattr_entry *xe = xs->here; | ||
4420 | u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name, | ||
4421 | strlen(xi->name)); | ||
4422 | |||
4423 | if (!xs->not_found && !ocfs2_xattr_is_local(xe)) { | ||
4424 | /* | ||
4425 | * We need to truncate the xattr storage first. | ||
4426 | * | ||
4427 | * If both the old and new value are stored to | ||
4428 | * outside block, we only need to truncate | ||
4429 | * the storage and then set the value outside. | ||
4430 | * | ||
4431 | * If the new value should be stored within block, | ||
4432 | * we should free all the outside block first and | ||
4433 | * the modification to the xattr block will be done | ||
4434 | * by following steps. | ||
4435 | */ | ||
4436 | if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) | ||
4437 | value_len = xi->value_len; | ||
4438 | else | ||
4439 | value_len = 0; | ||
4440 | |||
4441 | ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs, | ||
4442 | value_len); | ||
4443 | if (ret) | ||
4444 | goto out; | ||
4445 | |||
4446 | if (value_len) | ||
4447 | goto set_value_outside; | ||
4448 | } | ||
4449 | |||
4450 | value_len = xi->value_len; | ||
4451 | /* So we have to handle the inside block change now. */ | ||
4452 | if (value_len > OCFS2_XATTR_INLINE_SIZE) { | ||
4453 | /* | ||
4454 | * If the new value will be stored outside of block, | ||
4455 | * initalize a new empty value root and insert it first. | ||
4456 | */ | ||
4457 | local = 0; | ||
4458 | xi->value = &def_xv; | ||
4459 | xi->value_len = OCFS2_XATTR_ROOT_SIZE; | ||
4460 | } | ||
4461 | |||
4462 | ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash, local); | ||
4463 | if (ret) { | ||
4464 | mlog_errno(ret); | ||
4465 | goto out; | ||
4466 | } | ||
4467 | |||
4468 | if (value_len <= OCFS2_XATTR_INLINE_SIZE) | ||
4469 | goto out; | ||
4470 | |||
4471 | /* allocate the space now for the outside block storage. */ | ||
4472 | ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs, | ||
4473 | value_len); | ||
4474 | if (ret) { | ||
4475 | mlog_errno(ret); | ||
4476 | |||
4477 | if (xs->not_found) { | ||
4478 | /* | ||
4479 | * We can't allocate enough clusters for outside | ||
4480 | * storage and we have allocated xattr already, | ||
4481 | * so need to remove it. | ||
4482 | */ | ||
4483 | ocfs2_xattr_bucket_remove_xs(inode, xs); | ||
4484 | } | ||
4485 | goto out; | ||
4486 | } | ||
4487 | |||
4488 | set_value_outside: | ||
4489 | ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len); | ||
4490 | out: | ||
4491 | return ret; | ||
4492 | } | ||
4493 | |||
4494 | /* check whether the xattr bucket is filled up with the same hash value. */ | ||
4495 | static int ocfs2_check_xattr_bucket_collision(struct inode *inode, | ||
4496 | struct ocfs2_xattr_bucket *bucket) | ||
4497 | { | ||
4498 | struct ocfs2_xattr_header *xh = bucket->xh; | ||
4499 | |||
4500 | if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == | ||
4501 | xh->xh_entries[0].xe_name_hash) { | ||
4502 | mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " | ||
4503 | "hash = %u\n", | ||
4504 | (unsigned long long)bucket->bhs[0]->b_blocknr, | ||
4505 | le32_to_cpu(xh->xh_entries[0].xe_name_hash)); | ||
4506 | return -ENOSPC; | ||
4507 | } | ||
4508 | |||
4509 | return 0; | ||
4510 | } | ||
4511 | |||
4512 | static int ocfs2_xattr_set_entry_index_block(struct inode *inode, | ||
4513 | struct ocfs2_xattr_info *xi, | ||
4514 | struct ocfs2_xattr_search *xs) | ||
4515 | { | ||
4516 | struct ocfs2_xattr_header *xh; | ||
4517 | struct ocfs2_xattr_entry *xe; | ||
4518 | u16 count, header_size, xh_free_start; | ||
4519 | int i, free, max_free, need, old; | ||
4520 | size_t value_size = 0, name_len = strlen(xi->name); | ||
4521 | size_t blocksize = inode->i_sb->s_blocksize; | ||
4522 | int ret, allocation = 0; | ||
4523 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
4524 | |||
4525 | mlog_entry("Set xattr %s in xattr index block\n", xi->name); | ||
4526 | |||
4527 | try_again: | ||
4528 | xh = xs->header; | ||
4529 | count = le16_to_cpu(xh->xh_count); | ||
4530 | xh_free_start = le16_to_cpu(xh->xh_free_start); | ||
4531 | header_size = sizeof(struct ocfs2_xattr_header) + | ||
4532 | count * sizeof(struct ocfs2_xattr_entry); | ||
4533 | max_free = OCFS2_XATTR_BUCKET_SIZE - | ||
4534 | le16_to_cpu(xh->xh_name_value_len) - header_size; | ||
4535 | |||
4536 | mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size " | ||
4537 | "of %u which exceed block size\n", | ||
4538 | (unsigned long long)xs->bucket.bhs[0]->b_blocknr, | ||
4539 | header_size); | ||
4540 | |||
4541 | if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) | ||
4542 | value_size = OCFS2_XATTR_ROOT_SIZE; | ||
4543 | else if (xi->value) | ||
4544 | value_size = OCFS2_XATTR_SIZE(xi->value_len); | ||
4545 | |||
4546 | if (xs->not_found) | ||
4547 | need = sizeof(struct ocfs2_xattr_entry) + | ||
4548 | OCFS2_XATTR_SIZE(name_len) + value_size; | ||
4549 | else { | ||
4550 | need = value_size + OCFS2_XATTR_SIZE(name_len); | ||
4551 | |||
4552 | /* | ||
4553 | * We only replace the old value if the new length is smaller | ||
4554 | * than the old one. Otherwise we will allocate new space in the | ||
4555 | * bucket to store it. | ||
4556 | */ | ||
4557 | xe = xs->here; | ||
4558 | if (ocfs2_xattr_is_local(xe)) | ||
4559 | old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); | ||
4560 | else | ||
4561 | old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE); | ||
4562 | |||
4563 | if (old >= value_size) | ||
4564 | need = 0; | ||
4565 | } | ||
4566 | |||
4567 | free = xh_free_start - header_size; | ||
4568 | /* | ||
4569 | * We need to make sure the new name/value pair | ||
4570 | * can exist in the same block. | ||
4571 | */ | ||
4572 | if (xh_free_start % blocksize < need) | ||
4573 | free -= xh_free_start % blocksize; | ||
4574 | |||
4575 | mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, " | ||
4576 | "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len =" | ||
4577 | " %u\n", xs->not_found, | ||
4578 | (unsigned long long)xs->bucket.bhs[0]->b_blocknr, | ||
4579 | free, need, max_free, le16_to_cpu(xh->xh_free_start), | ||
4580 | le16_to_cpu(xh->xh_name_value_len)); | ||
4581 | |||
4582 | if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) { | ||
4583 | if (need <= max_free && | ||
4584 | count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) { | ||
4585 | /* | ||
4586 | * We can create the space by defragment. Since only the | ||
4587 | * name/value will be moved, the xe shouldn't be changed | ||
4588 | * in xs. | ||
4589 | */ | ||
4590 | ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket); | ||
4591 | if (ret) { | ||
4592 | mlog_errno(ret); | ||
4593 | goto out; | ||
4594 | } | ||
4595 | |||
4596 | xh_free_start = le16_to_cpu(xh->xh_free_start); | ||
4597 | free = xh_free_start - header_size; | ||
4598 | if (xh_free_start % blocksize < need) | ||
4599 | free -= xh_free_start % blocksize; | ||
4600 | |||
4601 | if (free >= need) | ||
4602 | goto xattr_set; | ||
4603 | |||
4604 | mlog(0, "Can't get enough space for xattr insert by " | ||
4605 | "defragment. Need %u bytes, but we have %d, so " | ||
4606 | "allocate new bucket for it.\n", need, free); | ||
4607 | } | ||
4608 | |||
4609 | /* | ||
4610 | * We have to add new buckets or clusters and one | ||
4611 | * allocation should leave us enough space for insert. | ||
4612 | */ | ||
4613 | BUG_ON(allocation); | ||
4614 | |||
4615 | /* | ||
4616 | * We do not allow for overlapping ranges between buckets. And | ||
4617 | * the maximum number of collisions we will allow for then is | ||
4618 | * one bucket's worth, so check it here whether we need to | ||
4619 | * add a new bucket for the insert. | ||
4620 | */ | ||
4621 | ret = ocfs2_check_xattr_bucket_collision(inode, &xs->bucket); | ||
4622 | if (ret) { | ||
4623 | mlog_errno(ret); | ||
4624 | goto out; | ||
4625 | } | ||
4626 | |||
4627 | ret = ocfs2_add_new_xattr_bucket(inode, | ||
4628 | xs->xattr_bh, | ||
4629 | xs->bucket.bhs[0]); | ||
4630 | if (ret) { | ||
4631 | mlog_errno(ret); | ||
4632 | goto out; | ||
4633 | } | ||
4634 | |||
4635 | for (i = 0; i < blk_per_bucket; i++) | ||
4636 | brelse(xs->bucket.bhs[i]); | ||
4637 | |||
4638 | memset(&xs->bucket, 0, sizeof(xs->bucket)); | ||
4639 | |||
4640 | ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, | ||
4641 | xi->name_index, | ||
4642 | xi->name, xs); | ||
4643 | if (ret && ret != -ENODATA) | ||
4644 | goto out; | ||
4645 | xs->not_found = ret; | ||
4646 | allocation = 1; | ||
4647 | goto try_again; | ||
4648 | } | ||
4649 | |||
4650 | xattr_set: | ||
4651 | ret = ocfs2_xattr_set_in_bucket(inode, xi, xs); | ||
4652 | out: | ||
4653 | mlog_exit(ret); | ||
4654 | return ret; | ||
4655 | } | ||
4656 | |||
4657 | static int ocfs2_delete_xattr_in_bucket(struct inode *inode, | ||
4658 | struct ocfs2_xattr_bucket *bucket, | ||
4659 | void *para) | ||
4660 | { | ||
4661 | int ret = 0; | ||
4662 | struct ocfs2_xattr_header *xh = bucket->xh; | ||
4663 | u16 i; | ||
4664 | struct ocfs2_xattr_entry *xe; | ||
4665 | |||
4666 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { | ||
4667 | xe = &xh->xh_entries[i]; | ||
4668 | if (ocfs2_xattr_is_local(xe)) | ||
4669 | continue; | ||
4670 | |||
4671 | ret = ocfs2_xattr_bucket_value_truncate(inode, | ||
4672 | bucket->bhs[0], | ||
4673 | i, 0); | ||
4674 | if (ret) { | ||
4675 | mlog_errno(ret); | ||
4676 | break; | ||
4677 | } | ||
4678 | } | ||
4679 | |||
4680 | return ret; | ||
4681 | } | ||
4682 | |||
4683 | static int ocfs2_delete_xattr_index_block(struct inode *inode, | ||
4684 | struct buffer_head *xb_bh) | ||
4685 | { | ||
4686 | struct ocfs2_xattr_block *xb = | ||
4687 | (struct ocfs2_xattr_block *)xb_bh->b_data; | ||
4688 | struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; | ||
4689 | int ret = 0; | ||
4690 | u32 name_hash = UINT_MAX, e_cpos, num_clusters; | ||
4691 | u64 p_blkno; | ||
4692 | |||
4693 | if (le16_to_cpu(el->l_next_free_rec) == 0) | ||
4694 | return 0; | ||
4695 | |||
4696 | while (name_hash > 0) { | ||
4697 | ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, | ||
4698 | &e_cpos, &num_clusters, el); | ||
4699 | if (ret) { | ||
4700 | mlog_errno(ret); | ||
4701 | goto out; | ||
4702 | } | ||
4703 | |||
4704 | ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters, | ||
4705 | ocfs2_delete_xattr_in_bucket, | ||
4706 | NULL); | ||
4707 | if (ret) { | ||
4708 | mlog_errno(ret); | ||
4709 | goto out; | ||
4710 | } | ||
4711 | |||
4712 | ret = ocfs2_rm_xattr_cluster(inode, xb_bh, | ||
4713 | p_blkno, e_cpos, num_clusters); | ||
4714 | if (ret) { | ||
4715 | mlog_errno(ret); | ||
4716 | break; | ||
4717 | } | ||
4718 | |||
4719 | if (e_cpos == 0) | ||
4720 | break; | ||
4721 | |||
4722 | name_hash = e_cpos - 1; | ||
4723 | } | ||
4724 | |||
4725 | out: | ||
4726 | return ret; | ||
4727 | } | ||
4728 | |||
4729 | /* | ||
4730 | * 'trusted' attributes support | ||
4731 | */ | ||
4732 | |||
4733 | #define XATTR_TRUSTED_PREFIX "trusted." | ||
4734 | |||
4735 | static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list, | ||
4736 | size_t list_size, const char *name, | ||
4737 | size_t name_len) | ||
4738 | { | ||
4739 | const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX) - 1; | ||
4740 | const size_t total_len = prefix_len + name_len + 1; | ||
4741 | |||
4742 | if (list && total_len <= list_size) { | ||
4743 | memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len); | ||
4744 | memcpy(list + prefix_len, name, name_len); | ||
4745 | list[prefix_len + name_len] = '\0'; | ||
4746 | } | ||
4747 | return total_len; | ||
4748 | } | ||
4749 | |||
4750 | static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name, | ||
4751 | void *buffer, size_t size) | ||
4752 | { | ||
4753 | if (strcmp(name, "") == 0) | ||
4754 | return -EINVAL; | ||
4755 | return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name, | ||
4756 | buffer, size); | ||
4757 | } | ||
4758 | |||
4759 | static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name, | ||
4760 | const void *value, size_t size, int flags) | ||
4761 | { | ||
4762 | if (strcmp(name, "") == 0) | ||
4763 | return -EINVAL; | ||
4764 | |||
4765 | return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value, | ||
4766 | size, flags); | ||
4767 | } | ||
4768 | |||
4769 | struct xattr_handler ocfs2_xattr_trusted_handler = { | ||
4770 | .prefix = XATTR_TRUSTED_PREFIX, | ||
4771 | .list = ocfs2_xattr_trusted_list, | ||
4772 | .get = ocfs2_xattr_trusted_get, | ||
4773 | .set = ocfs2_xattr_trusted_set, | ||
4774 | }; | ||
4775 | |||
4776 | |||
4777 | /* | ||
4778 | * 'user' attributes support | ||
4779 | */ | ||
4780 | |||
4781 | #define XATTR_USER_PREFIX "user." | ||
4782 | |||
4783 | static size_t ocfs2_xattr_user_list(struct inode *inode, char *list, | ||
4784 | size_t list_size, const char *name, | ||
4785 | size_t name_len) | ||
4786 | { | ||
4787 | const size_t prefix_len = sizeof(XATTR_USER_PREFIX) - 1; | ||
4788 | const size_t total_len = prefix_len + name_len + 1; | ||
4789 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
4790 | |||
4791 | if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) | ||
4792 | return 0; | ||
4793 | |||
4794 | if (list && total_len <= list_size) { | ||
4795 | memcpy(list, XATTR_USER_PREFIX, prefix_len); | ||
4796 | memcpy(list + prefix_len, name, name_len); | ||
4797 | list[prefix_len + name_len] = '\0'; | ||
4798 | } | ||
4799 | return total_len; | ||
4800 | } | ||
4801 | |||
4802 | static int ocfs2_xattr_user_get(struct inode *inode, const char *name, | ||
4803 | void *buffer, size_t size) | ||
4804 | { | ||
4805 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
4806 | |||
4807 | if (strcmp(name, "") == 0) | ||
4808 | return -EINVAL; | ||
4809 | if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) | ||
4810 | return -EOPNOTSUPP; | ||
4811 | return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, | ||
4812 | buffer, size); | ||
4813 | } | ||
4814 | |||
4815 | static int ocfs2_xattr_user_set(struct inode *inode, const char *name, | ||
4816 | const void *value, size_t size, int flags) | ||
4817 | { | ||
4818 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
4819 | |||
4820 | if (strcmp(name, "") == 0) | ||
4821 | return -EINVAL; | ||
4822 | if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) | ||
4823 | return -EOPNOTSUPP; | ||
4824 | |||
4825 | return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value, | ||
4826 | size, flags); | ||
4827 | } | ||
4828 | |||
4829 | struct xattr_handler ocfs2_xattr_user_handler = { | ||
4830 | .prefix = XATTR_USER_PREFIX, | ||
4831 | .list = ocfs2_xattr_user_list, | ||
4832 | .get = ocfs2_xattr_user_get, | ||
4833 | .set = ocfs2_xattr_user_set, | ||
4834 | }; | ||
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h new file mode 100644 index 000000000000..c25c7c62a059 --- /dev/null +++ b/fs/ocfs2/xattr.h | |||
@@ -0,0 +1,68 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * xattr.h | ||
5 | * | ||
6 | * Function prototypes | ||
7 | * | ||
8 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #ifndef OCFS2_XATTR_H | ||
27 | #define OCFS2_XATTR_H | ||
28 | |||
29 | #include <linux/init.h> | ||
30 | #include <linux/xattr.h> | ||
31 | |||
32 | enum ocfs2_xattr_type { | ||
33 | OCFS2_XATTR_INDEX_USER = 1, | ||
34 | OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS, | ||
35 | OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, | ||
36 | OCFS2_XATTR_INDEX_TRUSTED, | ||
37 | OCFS2_XATTR_INDEX_SECURITY, | ||
38 | OCFS2_XATTR_MAX | ||
39 | }; | ||
40 | |||
41 | extern struct xattr_handler ocfs2_xattr_user_handler; | ||
42 | extern struct xattr_handler ocfs2_xattr_trusted_handler; | ||
43 | |||
44 | extern ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); | ||
45 | extern int ocfs2_xattr_get(struct inode *, int, const char *, void *, size_t); | ||
46 | extern int ocfs2_xattr_set(struct inode *, int, const char *, const void *, | ||
47 | size_t, int); | ||
48 | extern int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh); | ||
49 | extern struct xattr_handler *ocfs2_xattr_handlers[]; | ||
50 | |||
51 | static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) | ||
52 | { | ||
53 | return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; | ||
54 | } | ||
55 | |||
56 | static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) | ||
57 | { | ||
58 | return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); | ||
59 | } | ||
60 | |||
61 | static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb) | ||
62 | { | ||
63 | u16 len = sb->s_blocksize - | ||
64 | offsetof(struct ocfs2_xattr_header, xh_entries); | ||
65 | |||
66 | return len / sizeof(struct ocfs2_xattr_entry); | ||
67 | } | ||
68 | #endif /* OCFS2_XATTR_H */ | ||
diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c index 697663b01bae..e1c0ec0ae989 100644 --- a/fs/omfs/bitmap.c +++ b/fs/omfs/bitmap.c | |||
@@ -92,7 +92,7 @@ int omfs_allocate_block(struct super_block *sb, u64 block) | |||
92 | struct buffer_head *bh; | 92 | struct buffer_head *bh; |
93 | struct omfs_sb_info *sbi = OMFS_SB(sb); | 93 | struct omfs_sb_info *sbi = OMFS_SB(sb); |
94 | int bits_per_entry = 8 * sb->s_blocksize; | 94 | int bits_per_entry = 8 * sb->s_blocksize; |
95 | int map, bit; | 95 | unsigned int map, bit; |
96 | int ret = 0; | 96 | int ret = 0; |
97 | u64 tmp; | 97 | u64 tmp; |
98 | 98 | ||
@@ -176,7 +176,8 @@ int omfs_clear_range(struct super_block *sb, u64 block, int count) | |||
176 | struct omfs_sb_info *sbi = OMFS_SB(sb); | 176 | struct omfs_sb_info *sbi = OMFS_SB(sb); |
177 | int bits_per_entry = 8 * sb->s_blocksize; | 177 | int bits_per_entry = 8 * sb->s_blocksize; |
178 | u64 tmp; | 178 | u64 tmp; |
179 | int map, bit, ret; | 179 | unsigned int map, bit; |
180 | int ret; | ||
180 | 181 | ||
181 | tmp = block; | 182 | tmp = block; |
182 | bit = do_div(tmp, bits_per_entry); | 183 | bit = do_div(tmp, bits_per_entry); |
diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 7e2499053e4d..834b2331f6b3 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c | |||
@@ -26,6 +26,13 @@ static int omfs_sync_file(struct file *file, struct dentry *dentry, | |||
26 | return err ? -EIO : 0; | 26 | return err ? -EIO : 0; |
27 | } | 27 | } |
28 | 28 | ||
29 | static u32 omfs_max_extents(struct omfs_sb_info *sbi, int offset) | ||
30 | { | ||
31 | return (sbi->s_sys_blocksize - offset - | ||
32 | sizeof(struct omfs_extent)) / | ||
33 | sizeof(struct omfs_extent_entry) + 1; | ||
34 | } | ||
35 | |||
29 | void omfs_make_empty_table(struct buffer_head *bh, int offset) | 36 | void omfs_make_empty_table(struct buffer_head *bh, int offset) |
30 | { | 37 | { |
31 | struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset]; | 38 | struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset]; |
@@ -45,6 +52,7 @@ int omfs_shrink_inode(struct inode *inode) | |||
45 | struct buffer_head *bh; | 52 | struct buffer_head *bh; |
46 | u64 next, last; | 53 | u64 next, last; |
47 | u32 extent_count; | 54 | u32 extent_count; |
55 | u32 max_extents; | ||
48 | int ret; | 56 | int ret; |
49 | 57 | ||
50 | /* traverse extent table, freeing each entry that is greater | 58 | /* traverse extent table, freeing each entry that is greater |
@@ -62,15 +70,18 @@ int omfs_shrink_inode(struct inode *inode) | |||
62 | goto out; | 70 | goto out; |
63 | 71 | ||
64 | oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]); | 72 | oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]); |
73 | max_extents = omfs_max_extents(sbi, OMFS_EXTENT_START); | ||
65 | 74 | ||
66 | for (;;) { | 75 | for (;;) { |
67 | 76 | ||
68 | if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next)) { | 77 | if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next)) |
69 | brelse(bh); | 78 | goto out_brelse; |
70 | goto out; | ||
71 | } | ||
72 | 79 | ||
73 | extent_count = be32_to_cpu(oe->e_extent_count); | 80 | extent_count = be32_to_cpu(oe->e_extent_count); |
81 | |||
82 | if (extent_count > max_extents) | ||
83 | goto out_brelse; | ||
84 | |||
74 | last = next; | 85 | last = next; |
75 | next = be64_to_cpu(oe->e_next); | 86 | next = be64_to_cpu(oe->e_next); |
76 | entry = &oe->e_entry; | 87 | entry = &oe->e_entry; |
@@ -98,10 +109,14 @@ int omfs_shrink_inode(struct inode *inode) | |||
98 | if (!bh) | 109 | if (!bh) |
99 | goto out; | 110 | goto out; |
100 | oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]); | 111 | oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]); |
112 | max_extents = omfs_max_extents(sbi, OMFS_EXTENT_CONT); | ||
101 | } | 113 | } |
102 | ret = 0; | 114 | ret = 0; |
103 | out: | 115 | out: |
104 | return ret; | 116 | return ret; |
117 | out_brelse: | ||
118 | brelse(bh); | ||
119 | return ret; | ||
105 | } | 120 | } |
106 | 121 | ||
107 | static void omfs_truncate(struct inode *inode) | 122 | static void omfs_truncate(struct inode *inode) |
@@ -154,9 +169,7 @@ static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe, | |||
154 | goto out; | 169 | goto out; |
155 | } | 170 | } |
156 | } | 171 | } |
157 | max_count = (sbi->s_sys_blocksize - OMFS_EXTENT_START - | 172 | max_count = omfs_max_extents(sbi, OMFS_EXTENT_START); |
158 | sizeof(struct omfs_extent)) / | ||
159 | sizeof(struct omfs_extent_entry) + 1; | ||
160 | 173 | ||
161 | /* TODO: add a continuation block here */ | 174 | /* TODO: add a continuation block here */ |
162 | if (be32_to_cpu(oe->e_extent_count) > max_count-1) | 175 | if (be32_to_cpu(oe->e_extent_count) > max_count-1) |
@@ -225,6 +238,7 @@ static int omfs_get_block(struct inode *inode, sector_t block, | |||
225 | sector_t next, offset; | 238 | sector_t next, offset; |
226 | int ret; | 239 | int ret; |
227 | u64 new_block; | 240 | u64 new_block; |
241 | u32 max_extents; | ||
228 | int extent_count; | 242 | int extent_count; |
229 | struct omfs_extent *oe; | 243 | struct omfs_extent *oe; |
230 | struct omfs_extent_entry *entry; | 244 | struct omfs_extent_entry *entry; |
@@ -238,6 +252,7 @@ static int omfs_get_block(struct inode *inode, sector_t block, | |||
238 | goto out; | 252 | goto out; |
239 | 253 | ||
240 | oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]); | 254 | oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]); |
255 | max_extents = omfs_max_extents(sbi, OMFS_EXTENT_START); | ||
241 | next = inode->i_ino; | 256 | next = inode->i_ino; |
242 | 257 | ||
243 | for (;;) { | 258 | for (;;) { |
@@ -249,6 +264,9 @@ static int omfs_get_block(struct inode *inode, sector_t block, | |||
249 | next = be64_to_cpu(oe->e_next); | 264 | next = be64_to_cpu(oe->e_next); |
250 | entry = &oe->e_entry; | 265 | entry = &oe->e_entry; |
251 | 266 | ||
267 | if (extent_count > max_extents) | ||
268 | goto out_brelse; | ||
269 | |||
252 | offset = find_block(inode, entry, block, extent_count, &remain); | 270 | offset = find_block(inode, entry, block, extent_count, &remain); |
253 | if (offset > 0) { | 271 | if (offset > 0) { |
254 | ret = 0; | 272 | ret = 0; |
@@ -266,6 +284,7 @@ static int omfs_get_block(struct inode *inode, sector_t block, | |||
266 | if (!bh) | 284 | if (!bh) |
267 | goto out; | 285 | goto out; |
268 | oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]); | 286 | oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]); |
287 | max_extents = omfs_max_extents(sbi, OMFS_EXTENT_CONT); | ||
269 | } | 288 | } |
270 | if (create) { | 289 | if (create) { |
271 | ret = omfs_grow_extent(inode, oe, &new_block); | 290 | ret = omfs_grow_extent(inode, oe, &new_block); |
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index a95fe5984f4b..cbf047a847c5 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c | |||
@@ -232,8 +232,7 @@ struct inode *omfs_iget(struct super_block *sb, ino_t ino) | |||
232 | inode->i_mode = S_IFDIR | (S_IRWXUGO & ~sbi->s_dmask); | 232 | inode->i_mode = S_IFDIR | (S_IRWXUGO & ~sbi->s_dmask); |
233 | inode->i_op = &omfs_dir_inops; | 233 | inode->i_op = &omfs_dir_inops; |
234 | inode->i_fop = &omfs_dir_operations; | 234 | inode->i_fop = &omfs_dir_operations; |
235 | inode->i_size = be32_to_cpu(oi->i_head.h_body_size) + | 235 | inode->i_size = sbi->s_sys_blocksize; |
236 | sizeof(struct omfs_header); | ||
237 | inc_nlink(inode); | 236 | inc_nlink(inode); |
238 | break; | 237 | break; |
239 | case OMFS_FILE: | 238 | case OMFS_FILE: |
@@ -347,7 +346,7 @@ enum { | |||
347 | Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask | 346 | Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask |
348 | }; | 347 | }; |
349 | 348 | ||
350 | static match_table_t tokens = { | 349 | static const match_table_t tokens = { |
351 | {Opt_uid, "uid=%u"}, | 350 | {Opt_uid, "uid=%u"}, |
352 | {Opt_gid, "gid=%u"}, | 351 | {Opt_gid, "gid=%u"}, |
353 | {Opt_umask, "umask=%o"}, | 352 | {Opt_umask, "umask=%o"}, |
@@ -1141,8 +1141,7 @@ EXPORT_SYMBOL(sys_close); | |||
1141 | asmlinkage long sys_vhangup(void) | 1141 | asmlinkage long sys_vhangup(void) |
1142 | { | 1142 | { |
1143 | if (capable(CAP_SYS_TTY_CONFIG)) { | 1143 | if (capable(CAP_SYS_TTY_CONFIG)) { |
1144 | /* XXX: this needs locking */ | 1144 | tty_vhangup_self(); |
1145 | tty_vhangup(current->signal->tty); | ||
1146 | return 0; | 1145 | return 0; |
1147 | } | 1146 | } |
1148 | return -EPERM; | 1147 | return -EPERM; |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 7d6b34e201db..7408227c49c9 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -120,22 +120,21 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) = | |||
120 | * a pointer to that same buffer (for convenience). | 120 | * a pointer to that same buffer (for convenience). |
121 | */ | 121 | */ |
122 | 122 | ||
123 | char *disk_name(struct gendisk *hd, int part, char *buf) | 123 | char *disk_name(struct gendisk *hd, int partno, char *buf) |
124 | { | 124 | { |
125 | if (!part) | 125 | if (!partno) |
126 | snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); | 126 | snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); |
127 | else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) | 127 | else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) |
128 | snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, part); | 128 | snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); |
129 | else | 129 | else |
130 | snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, part); | 130 | snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); |
131 | 131 | ||
132 | return buf; | 132 | return buf; |
133 | } | 133 | } |
134 | 134 | ||
135 | const char *bdevname(struct block_device *bdev, char *buf) | 135 | const char *bdevname(struct block_device *bdev, char *buf) |
136 | { | 136 | { |
137 | int part = MINOR(bdev->bd_dev) - bdev->bd_disk->first_minor; | 137 | return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); |
138 | return disk_name(bdev->bd_disk, part, buf); | ||
139 | } | 138 | } |
140 | 139 | ||
141 | EXPORT_SYMBOL(bdevname); | 140 | EXPORT_SYMBOL(bdevname); |
@@ -169,7 +168,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
169 | if (isdigit(state->name[strlen(state->name)-1])) | 168 | if (isdigit(state->name[strlen(state->name)-1])) |
170 | sprintf(state->name, "p"); | 169 | sprintf(state->name, "p"); |
171 | 170 | ||
172 | state->limit = hd->minors; | 171 | state->limit = disk_max_parts(hd); |
173 | i = res = err = 0; | 172 | i = res = err = 0; |
174 | while (!res && check_part[i]) { | 173 | while (!res && check_part[i]) { |
175 | memset(&state->parts, 0, sizeof(state->parts)); | 174 | memset(&state->parts, 0, sizeof(state->parts)); |
@@ -204,21 +203,22 @@ static ssize_t part_start_show(struct device *dev, | |||
204 | return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); | 203 | return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); |
205 | } | 204 | } |
206 | 205 | ||
207 | static ssize_t part_size_show(struct device *dev, | 206 | ssize_t part_size_show(struct device *dev, |
208 | struct device_attribute *attr, char *buf) | 207 | struct device_attribute *attr, char *buf) |
209 | { | 208 | { |
210 | struct hd_struct *p = dev_to_part(dev); | 209 | struct hd_struct *p = dev_to_part(dev); |
211 | return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); | 210 | return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); |
212 | } | 211 | } |
213 | 212 | ||
214 | static ssize_t part_stat_show(struct device *dev, | 213 | ssize_t part_stat_show(struct device *dev, |
215 | struct device_attribute *attr, char *buf) | 214 | struct device_attribute *attr, char *buf) |
216 | { | 215 | { |
217 | struct hd_struct *p = dev_to_part(dev); | 216 | struct hd_struct *p = dev_to_part(dev); |
217 | int cpu; | ||
218 | 218 | ||
219 | preempt_disable(); | 219 | cpu = part_stat_lock(); |
220 | part_round_stats(p); | 220 | part_round_stats(cpu, p); |
221 | preempt_enable(); | 221 | part_stat_unlock(); |
222 | return sprintf(buf, | 222 | return sprintf(buf, |
223 | "%8lu %8lu %8llu %8u " | 223 | "%8lu %8lu %8llu %8u " |
224 | "%8lu %8lu %8llu %8u " | 224 | "%8lu %8lu %8llu %8u " |
@@ -238,17 +238,17 @@ static ssize_t part_stat_show(struct device *dev, | |||
238 | } | 238 | } |
239 | 239 | ||
240 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 240 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
241 | static ssize_t part_fail_show(struct device *dev, | 241 | ssize_t part_fail_show(struct device *dev, |
242 | struct device_attribute *attr, char *buf) | 242 | struct device_attribute *attr, char *buf) |
243 | { | 243 | { |
244 | struct hd_struct *p = dev_to_part(dev); | 244 | struct hd_struct *p = dev_to_part(dev); |
245 | 245 | ||
246 | return sprintf(buf, "%d\n", p->make_it_fail); | 246 | return sprintf(buf, "%d\n", p->make_it_fail); |
247 | } | 247 | } |
248 | 248 | ||
249 | static ssize_t part_fail_store(struct device *dev, | 249 | ssize_t part_fail_store(struct device *dev, |
250 | struct device_attribute *attr, | 250 | struct device_attribute *attr, |
251 | const char *buf, size_t count) | 251 | const char *buf, size_t count) |
252 | { | 252 | { |
253 | struct hd_struct *p = dev_to_part(dev); | 253 | struct hd_struct *p = dev_to_part(dev); |
254 | int i; | 254 | int i; |
@@ -300,40 +300,34 @@ struct device_type part_type = { | |||
300 | .release = part_release, | 300 | .release = part_release, |
301 | }; | 301 | }; |
302 | 302 | ||
303 | static inline void partition_sysfs_add_subdir(struct hd_struct *p) | 303 | static void delete_partition_rcu_cb(struct rcu_head *head) |
304 | { | ||
305 | struct kobject *k; | ||
306 | |||
307 | k = kobject_get(&p->dev.kobj); | ||
308 | p->holder_dir = kobject_create_and_add("holders", k); | ||
309 | kobject_put(k); | ||
310 | } | ||
311 | |||
312 | static inline void disk_sysfs_add_subdirs(struct gendisk *disk) | ||
313 | { | 304 | { |
314 | struct kobject *k; | 305 | struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); |
315 | 306 | ||
316 | k = kobject_get(&disk->dev.kobj); | 307 | part->start_sect = 0; |
317 | disk->holder_dir = kobject_create_and_add("holders", k); | 308 | part->nr_sects = 0; |
318 | disk->slave_dir = kobject_create_and_add("slaves", k); | 309 | part_stat_set_all(part, 0); |
319 | kobject_put(k); | 310 | put_device(part_to_dev(part)); |
320 | } | 311 | } |
321 | 312 | ||
322 | void delete_partition(struct gendisk *disk, int part) | 313 | void delete_partition(struct gendisk *disk, int partno) |
323 | { | 314 | { |
324 | struct hd_struct *p = disk->part[part-1]; | 315 | struct disk_part_tbl *ptbl = disk->part_tbl; |
316 | struct hd_struct *part; | ||
325 | 317 | ||
326 | if (!p) | 318 | if (partno >= ptbl->len) |
327 | return; | 319 | return; |
328 | if (!p->nr_sects) | 320 | |
321 | part = ptbl->part[partno]; | ||
322 | if (!part) | ||
329 | return; | 323 | return; |
330 | disk->part[part-1] = NULL; | 324 | |
331 | p->start_sect = 0; | 325 | blk_free_devt(part_devt(part)); |
332 | p->nr_sects = 0; | 326 | rcu_assign_pointer(ptbl->part[partno], NULL); |
333 | part_stat_set_all(p, 0); | 327 | kobject_put(part->holder_dir); |
334 | kobject_put(p->holder_dir); | 328 | device_del(part_to_dev(part)); |
335 | device_del(&p->dev); | 329 | |
336 | put_device(&p->dev); | 330 | call_rcu(&part->rcu_head, delete_partition_rcu_cb); |
337 | } | 331 | } |
338 | 332 | ||
339 | static ssize_t whole_disk_show(struct device *dev, | 333 | static ssize_t whole_disk_show(struct device *dev, |
@@ -344,102 +338,132 @@ static ssize_t whole_disk_show(struct device *dev, | |||
344 | static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, | 338 | static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, |
345 | whole_disk_show, NULL); | 339 | whole_disk_show, NULL); |
346 | 340 | ||
347 | int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) | 341 | int add_partition(struct gendisk *disk, int partno, |
342 | sector_t start, sector_t len, int flags) | ||
348 | { | 343 | { |
349 | struct hd_struct *p; | 344 | struct hd_struct *p; |
345 | dev_t devt = MKDEV(0, 0); | ||
346 | struct device *ddev = disk_to_dev(disk); | ||
347 | struct device *pdev; | ||
348 | struct disk_part_tbl *ptbl; | ||
349 | const char *dname; | ||
350 | int err; | 350 | int err; |
351 | 351 | ||
352 | err = disk_expand_part_tbl(disk, partno); | ||
353 | if (err) | ||
354 | return err; | ||
355 | ptbl = disk->part_tbl; | ||
356 | |||
357 | if (ptbl->part[partno]) | ||
358 | return -EBUSY; | ||
359 | |||
352 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 360 | p = kzalloc(sizeof(*p), GFP_KERNEL); |
353 | if (!p) | 361 | if (!p) |
354 | return -ENOMEM; | 362 | return -ENOMEM; |
355 | 363 | ||
356 | if (!init_part_stats(p)) { | 364 | if (!init_part_stats(p)) { |
357 | err = -ENOMEM; | 365 | err = -ENOMEM; |
358 | goto out0; | 366 | goto out_free; |
359 | } | 367 | } |
368 | pdev = part_to_dev(p); | ||
369 | |||
360 | p->start_sect = start; | 370 | p->start_sect = start; |
361 | p->nr_sects = len; | 371 | p->nr_sects = len; |
362 | p->partno = part; | 372 | p->partno = partno; |
363 | p->policy = disk->policy; | 373 | p->policy = get_disk_ro(disk); |
364 | 374 | ||
365 | if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1])) | 375 | dname = dev_name(ddev); |
366 | snprintf(p->dev.bus_id, BUS_ID_SIZE, | 376 | if (isdigit(dname[strlen(dname) - 1])) |
367 | "%sp%d", disk->dev.bus_id, part); | 377 | snprintf(pdev->bus_id, BUS_ID_SIZE, "%sp%d", dname, partno); |
368 | else | 378 | else |
369 | snprintf(p->dev.bus_id, BUS_ID_SIZE, | 379 | snprintf(pdev->bus_id, BUS_ID_SIZE, "%s%d", dname, partno); |
370 | "%s%d", disk->dev.bus_id, part); | 380 | |
381 | device_initialize(pdev); | ||
382 | pdev->class = &block_class; | ||
383 | pdev->type = &part_type; | ||
384 | pdev->parent = ddev; | ||
371 | 385 | ||
372 | device_initialize(&p->dev); | 386 | err = blk_alloc_devt(p, &devt); |
373 | p->dev.devt = MKDEV(disk->major, disk->first_minor + part); | 387 | if (err) |
374 | p->dev.class = &block_class; | 388 | goto out_free; |
375 | p->dev.type = &part_type; | 389 | pdev->devt = devt; |
376 | p->dev.parent = &disk->dev; | ||
377 | disk->part[part-1] = p; | ||
378 | 390 | ||
379 | /* delay uevent until 'holders' subdir is created */ | 391 | /* delay uevent until 'holders' subdir is created */ |
380 | p->dev.uevent_suppress = 1; | 392 | pdev->uevent_suppress = 1; |
381 | err = device_add(&p->dev); | 393 | err = device_add(pdev); |
382 | if (err) | 394 | if (err) |
383 | goto out1; | 395 | goto out_put; |
384 | partition_sysfs_add_subdir(p); | 396 | |
385 | p->dev.uevent_suppress = 0; | 397 | err = -ENOMEM; |
398 | p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); | ||
399 | if (!p->holder_dir) | ||
400 | goto out_del; | ||
401 | |||
402 | pdev->uevent_suppress = 0; | ||
386 | if (flags & ADDPART_FLAG_WHOLEDISK) { | 403 | if (flags & ADDPART_FLAG_WHOLEDISK) { |
387 | err = device_create_file(&p->dev, &dev_attr_whole_disk); | 404 | err = device_create_file(pdev, &dev_attr_whole_disk); |
388 | if (err) | 405 | if (err) |
389 | goto out2; | 406 | goto out_del; |
390 | } | 407 | } |
391 | 408 | ||
409 | /* everything is up and running, commence */ | ||
410 | INIT_RCU_HEAD(&p->rcu_head); | ||
411 | rcu_assign_pointer(ptbl->part[partno], p); | ||
412 | |||
392 | /* suppress uevent if the disk supresses it */ | 413 | /* suppress uevent if the disk supresses it */ |
393 | if (!disk->dev.uevent_suppress) | 414 | if (!ddev->uevent_suppress) |
394 | kobject_uevent(&p->dev.kobj, KOBJ_ADD); | 415 | kobject_uevent(&pdev->kobj, KOBJ_ADD); |
395 | 416 | ||
396 | return 0; | 417 | return 0; |
397 | 418 | ||
398 | out2: | 419 | out_free: |
399 | device_del(&p->dev); | ||
400 | out1: | ||
401 | put_device(&p->dev); | ||
402 | free_part_stats(p); | ||
403 | out0: | ||
404 | kfree(p); | 420 | kfree(p); |
405 | return err; | 421 | return err; |
422 | out_del: | ||
423 | kobject_put(p->holder_dir); | ||
424 | device_del(pdev); | ||
425 | out_put: | ||
426 | put_device(pdev); | ||
427 | blk_free_devt(devt); | ||
428 | return err; | ||
406 | } | 429 | } |
407 | 430 | ||
408 | /* Not exported, helper to add_disk(). */ | 431 | /* Not exported, helper to add_disk(). */ |
409 | void register_disk(struct gendisk *disk) | 432 | void register_disk(struct gendisk *disk) |
410 | { | 433 | { |
434 | struct device *ddev = disk_to_dev(disk); | ||
411 | struct block_device *bdev; | 435 | struct block_device *bdev; |
436 | struct disk_part_iter piter; | ||
437 | struct hd_struct *part; | ||
412 | char *s; | 438 | char *s; |
413 | int i; | ||
414 | struct hd_struct *p; | ||
415 | int err; | 439 | int err; |
416 | 440 | ||
417 | disk->dev.parent = disk->driverfs_dev; | 441 | ddev->parent = disk->driverfs_dev; |
418 | disk->dev.devt = MKDEV(disk->major, disk->first_minor); | ||
419 | 442 | ||
420 | strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE); | 443 | strlcpy(ddev->bus_id, disk->disk_name, BUS_ID_SIZE); |
421 | /* ewww... some of these buggers have / in the name... */ | 444 | /* ewww... some of these buggers have / in the name... */ |
422 | s = strchr(disk->dev.bus_id, '/'); | 445 | s = strchr(ddev->bus_id, '/'); |
423 | if (s) | 446 | if (s) |
424 | *s = '!'; | 447 | *s = '!'; |
425 | 448 | ||
426 | /* delay uevents, until we scanned partition table */ | 449 | /* delay uevents, until we scanned partition table */ |
427 | disk->dev.uevent_suppress = 1; | 450 | ddev->uevent_suppress = 1; |
428 | 451 | ||
429 | if (device_add(&disk->dev)) | 452 | if (device_add(ddev)) |
430 | return; | 453 | return; |
431 | #ifndef CONFIG_SYSFS_DEPRECATED | 454 | #ifndef CONFIG_SYSFS_DEPRECATED |
432 | err = sysfs_create_link(block_depr, &disk->dev.kobj, | 455 | err = sysfs_create_link(block_depr, &ddev->kobj, |
433 | kobject_name(&disk->dev.kobj)); | 456 | kobject_name(&ddev->kobj)); |
434 | if (err) { | 457 | if (err) { |
435 | device_del(&disk->dev); | 458 | device_del(ddev); |
436 | return; | 459 | return; |
437 | } | 460 | } |
438 | #endif | 461 | #endif |
439 | disk_sysfs_add_subdirs(disk); | 462 | disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); |
463 | disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); | ||
440 | 464 | ||
441 | /* No minors to use for partitions */ | 465 | /* No minors to use for partitions */ |
442 | if (disk->minors == 1) | 466 | if (!disk_partitionable(disk)) |
443 | goto exit; | 467 | goto exit; |
444 | 468 | ||
445 | /* No such device (e.g., media were just removed) */ | 469 | /* No such device (e.g., media were just removed) */ |
@@ -458,50 +482,66 @@ void register_disk(struct gendisk *disk) | |||
458 | 482 | ||
459 | exit: | 483 | exit: |
460 | /* announce disk after possible partitions are created */ | 484 | /* announce disk after possible partitions are created */ |
461 | disk->dev.uevent_suppress = 0; | 485 | ddev->uevent_suppress = 0; |
462 | kobject_uevent(&disk->dev.kobj, KOBJ_ADD); | 486 | kobject_uevent(&ddev->kobj, KOBJ_ADD); |
463 | 487 | ||
464 | /* announce possible partitions */ | 488 | /* announce possible partitions */ |
465 | for (i = 1; i < disk->minors; i++) { | 489 | disk_part_iter_init(&piter, disk, 0); |
466 | p = disk->part[i-1]; | 490 | while ((part = disk_part_iter_next(&piter))) |
467 | if (!p || !p->nr_sects) | 491 | kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); |
468 | continue; | 492 | disk_part_iter_exit(&piter); |
469 | kobject_uevent(&p->dev.kobj, KOBJ_ADD); | ||
470 | } | ||
471 | } | 493 | } |
472 | 494 | ||
473 | int rescan_partitions(struct gendisk *disk, struct block_device *bdev) | 495 | int rescan_partitions(struct gendisk *disk, struct block_device *bdev) |
474 | { | 496 | { |
497 | struct disk_part_iter piter; | ||
498 | struct hd_struct *part; | ||
475 | struct parsed_partitions *state; | 499 | struct parsed_partitions *state; |
476 | int p, res; | 500 | int p, highest, res; |
477 | 501 | ||
478 | if (bdev->bd_part_count) | 502 | if (bdev->bd_part_count) |
479 | return -EBUSY; | 503 | return -EBUSY; |
480 | res = invalidate_partition(disk, 0); | 504 | res = invalidate_partition(disk, 0); |
481 | if (res) | 505 | if (res) |
482 | return res; | 506 | return res; |
483 | bdev->bd_invalidated = 0; | 507 | |
484 | for (p = 1; p < disk->minors; p++) | 508 | disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); |
485 | delete_partition(disk, p); | 509 | while ((part = disk_part_iter_next(&piter))) |
510 | delete_partition(disk, part->partno); | ||
511 | disk_part_iter_exit(&piter); | ||
512 | |||
486 | if (disk->fops->revalidate_disk) | 513 | if (disk->fops->revalidate_disk) |
487 | disk->fops->revalidate_disk(disk); | 514 | disk->fops->revalidate_disk(disk); |
515 | check_disk_size_change(disk, bdev); | ||
516 | bdev->bd_invalidated = 0; | ||
488 | if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) | 517 | if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) |
489 | return 0; | 518 | return 0; |
490 | if (IS_ERR(state)) /* I/O error reading the partition table */ | 519 | if (IS_ERR(state)) /* I/O error reading the partition table */ |
491 | return -EIO; | 520 | return -EIO; |
492 | 521 | ||
493 | /* tell userspace that the media / partition table may have changed */ | 522 | /* tell userspace that the media / partition table may have changed */ |
494 | kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE); | 523 | kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); |
495 | 524 | ||
525 | /* Detect the highest partition number and preallocate | ||
526 | * disk->part_tbl. This is an optimization and not strictly | ||
527 | * necessary. | ||
528 | */ | ||
529 | for (p = 1, highest = 0; p < state->limit; p++) | ||
530 | if (state->parts[p].size) | ||
531 | highest = p; | ||
532 | |||
533 | disk_expand_part_tbl(disk, highest); | ||
534 | |||
535 | /* add partitions */ | ||
496 | for (p = 1; p < state->limit; p++) { | 536 | for (p = 1; p < state->limit; p++) { |
497 | sector_t size = state->parts[p].size; | 537 | sector_t size = state->parts[p].size; |
498 | sector_t from = state->parts[p].from; | 538 | sector_t from = state->parts[p].from; |
499 | if (!size) | 539 | if (!size) |
500 | continue; | 540 | continue; |
501 | if (from + size > get_capacity(disk)) { | 541 | if (from + size > get_capacity(disk)) { |
502 | printk(KERN_ERR " %s: p%d exceeds device capacity\n", | 542 | printk(KERN_WARNING |
543 | "%s: p%d exceeds device capacity\n", | ||
503 | disk->disk_name, p); | 544 | disk->disk_name, p); |
504 | continue; | ||
505 | } | 545 | } |
506 | res = add_partition(disk, p, from, size, state->parts[p].flags); | 546 | res = add_partition(disk, p, from, size, state->parts[p].flags); |
507 | if (res) { | 547 | if (res) { |
@@ -541,25 +581,31 @@ EXPORT_SYMBOL(read_dev_sector); | |||
541 | 581 | ||
542 | void del_gendisk(struct gendisk *disk) | 582 | void del_gendisk(struct gendisk *disk) |
543 | { | 583 | { |
544 | int p; | 584 | struct disk_part_iter piter; |
585 | struct hd_struct *part; | ||
545 | 586 | ||
546 | /* invalidate stuff */ | 587 | /* invalidate stuff */ |
547 | for (p = disk->minors - 1; p > 0; p--) { | 588 | disk_part_iter_init(&piter, disk, |
548 | invalidate_partition(disk, p); | 589 | DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); |
549 | delete_partition(disk, p); | 590 | while ((part = disk_part_iter_next(&piter))) { |
591 | invalidate_partition(disk, part->partno); | ||
592 | delete_partition(disk, part->partno); | ||
550 | } | 593 | } |
594 | disk_part_iter_exit(&piter); | ||
595 | |||
551 | invalidate_partition(disk, 0); | 596 | invalidate_partition(disk, 0); |
552 | disk->capacity = 0; | 597 | blk_free_devt(disk_to_dev(disk)->devt); |
598 | set_capacity(disk, 0); | ||
553 | disk->flags &= ~GENHD_FL_UP; | 599 | disk->flags &= ~GENHD_FL_UP; |
554 | unlink_gendisk(disk); | 600 | unlink_gendisk(disk); |
555 | disk_stat_set_all(disk, 0); | 601 | part_stat_set_all(&disk->part0, 0); |
556 | disk->stamp = 0; | 602 | disk->part0.stamp = 0; |
557 | 603 | ||
558 | kobject_put(disk->holder_dir); | 604 | kobject_put(disk->part0.holder_dir); |
559 | kobject_put(disk->slave_dir); | 605 | kobject_put(disk->slave_dir); |
560 | disk->driverfs_dev = NULL; | 606 | disk->driverfs_dev = NULL; |
561 | #ifndef CONFIG_SYSFS_DEPRECATED | 607 | #ifndef CONFIG_SYSFS_DEPRECATED |
562 | sysfs_remove_link(block_depr, disk->dev.bus_id); | 608 | sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); |
563 | #endif | 609 | #endif |
564 | device_del(&disk->dev); | 610 | device_del(disk_to_dev(disk)); |
565 | } | 611 | } |
diff --git a/fs/partitions/check.h b/fs/partitions/check.h index 17ae8ecd9e8b..98dbe1a84528 100644 --- a/fs/partitions/check.h +++ b/fs/partitions/check.h | |||
@@ -5,15 +5,13 @@ | |||
5 | * add_gd_partition adds a partitions details to the devices partition | 5 | * add_gd_partition adds a partitions details to the devices partition |
6 | * description. | 6 | * description. |
7 | */ | 7 | */ |
8 | enum { MAX_PART = 256 }; | ||
9 | |||
10 | struct parsed_partitions { | 8 | struct parsed_partitions { |
11 | char name[BDEVNAME_SIZE]; | 9 | char name[BDEVNAME_SIZE]; |
12 | struct { | 10 | struct { |
13 | sector_t from; | 11 | sector_t from; |
14 | sector_t size; | 12 | sector_t size; |
15 | int flags; | 13 | int flags; |
16 | } parts[MAX_PART]; | 14 | } parts[DISK_MAX_PARTS]; |
17 | int next; | 15 | int next; |
18 | int limit; | 16 | int limit; |
19 | }; | 17 | }; |
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 73cd7a418f06..50f8f0600f06 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig | |||
@@ -57,3 +57,13 @@ config PROC_SYSCTL | |||
57 | As it is generally a good thing, you should say Y here unless | 57 | As it is generally a good thing, you should say Y here unless |
58 | building a kernel for install/rescue disks or your system is very | 58 | building a kernel for install/rescue disks or your system is very |
59 | limited in memory. | 59 | limited in memory. |
60 | |||
61 | config PROC_PAGE_MONITOR | ||
62 | default y | ||
63 | depends on PROC_FS && MMU | ||
64 | bool "Enable /proc page monitoring" if EMBEDDED | ||
65 | help | ||
66 | Various /proc files exist to monitor process memory utilization: | ||
67 | /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap, | ||
68 | /proc/kpagecount, and /proc/kpageflags. Disabling these | ||
69 | interfaces will reduce the size of the kernel by approximately 4kb. | ||
diff --git a/fs/proc/array.c b/fs/proc/array.c index 0d6eb33597c6..f4bc0e789539 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -86,11 +86,6 @@ | |||
86 | #include <asm/processor.h> | 86 | #include <asm/processor.h> |
87 | #include "internal.h" | 87 | #include "internal.h" |
88 | 88 | ||
89 | /* Gcc optimizes away "strlen(x)" for constant x */ | ||
90 | #define ADDBUF(buffer, string) \ | ||
91 | do { memcpy(buffer, string, strlen(string)); \ | ||
92 | buffer += strlen(string); } while (0) | ||
93 | |||
94 | static inline void task_name(struct seq_file *m, struct task_struct *p) | 89 | static inline void task_name(struct seq_file *m, struct task_struct *p) |
95 | { | 90 | { |
96 | int i; | 91 | int i; |
@@ -261,7 +256,6 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) | |||
261 | sigemptyset(&ignored); | 256 | sigemptyset(&ignored); |
262 | sigemptyset(&caught); | 257 | sigemptyset(&caught); |
263 | 258 | ||
264 | rcu_read_lock(); | ||
265 | if (lock_task_sighand(p, &flags)) { | 259 | if (lock_task_sighand(p, &flags)) { |
266 | pending = p->pending.signal; | 260 | pending = p->pending.signal; |
267 | shpending = p->signal->shared_pending.signal; | 261 | shpending = p->signal->shared_pending.signal; |
@@ -272,7 +266,6 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) | |||
272 | qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; | 266 | qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; |
273 | unlock_task_sighand(p, &flags); | 267 | unlock_task_sighand(p, &flags); |
274 | } | 268 | } |
275 | rcu_read_unlock(); | ||
276 | 269 | ||
277 | seq_printf(m, "Threads:\t%d\n", num_threads); | 270 | seq_printf(m, "Threads:\t%d\n", num_threads); |
278 | seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim); | 271 | seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim); |
@@ -337,65 +330,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, | |||
337 | return 0; | 330 | return 0; |
338 | } | 331 | } |
339 | 332 | ||
340 | /* | ||
341 | * Use precise platform statistics if available: | ||
342 | */ | ||
343 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | ||
344 | static cputime_t task_utime(struct task_struct *p) | ||
345 | { | ||
346 | return p->utime; | ||
347 | } | ||
348 | |||
349 | static cputime_t task_stime(struct task_struct *p) | ||
350 | { | ||
351 | return p->stime; | ||
352 | } | ||
353 | #else | ||
354 | static cputime_t task_utime(struct task_struct *p) | ||
355 | { | ||
356 | clock_t utime = cputime_to_clock_t(p->utime), | ||
357 | total = utime + cputime_to_clock_t(p->stime); | ||
358 | u64 temp; | ||
359 | |||
360 | /* | ||
361 | * Use CFS's precise accounting: | ||
362 | */ | ||
363 | temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); | ||
364 | |||
365 | if (total) { | ||
366 | temp *= utime; | ||
367 | do_div(temp, total); | ||
368 | } | ||
369 | utime = (clock_t)temp; | ||
370 | |||
371 | p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); | ||
372 | return p->prev_utime; | ||
373 | } | ||
374 | |||
375 | static cputime_t task_stime(struct task_struct *p) | ||
376 | { | ||
377 | clock_t stime; | ||
378 | |||
379 | /* | ||
380 | * Use CFS's precise accounting. (we subtract utime from | ||
381 | * the total, to make sure the total observed by userspace | ||
382 | * grows monotonically - apps rely on that): | ||
383 | */ | ||
384 | stime = nsec_to_clock_t(p->se.sum_exec_runtime) - | ||
385 | cputime_to_clock_t(task_utime(p)); | ||
386 | |||
387 | if (stime >= 0) | ||
388 | p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); | ||
389 | |||
390 | return p->prev_stime; | ||
391 | } | ||
392 | #endif | ||
393 | |||
394 | static cputime_t task_gtime(struct task_struct *p) | ||
395 | { | ||
396 | return p->gtime; | ||
397 | } | ||
398 | |||
399 | static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | 333 | static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, |
400 | struct pid *pid, struct task_struct *task, int whole) | 334 | struct pid *pid, struct task_struct *task, int whole) |
401 | { | 335 | { |
diff --git a/fs/proc/base.c b/fs/proc/base.c index a28840b11b89..b5918ae8ca79 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -148,9 +148,6 @@ static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, | |||
148 | return count; | 148 | return count; |
149 | } | 149 | } |
150 | 150 | ||
151 | int maps_protect; | ||
152 | EXPORT_SYMBOL(maps_protect); | ||
153 | |||
154 | static struct fs_struct *get_fs_struct(struct task_struct *task) | 151 | static struct fs_struct *get_fs_struct(struct task_struct *task) |
155 | { | 152 | { |
156 | struct fs_struct *fs; | 153 | struct fs_struct *fs; |
@@ -164,7 +161,6 @@ static struct fs_struct *get_fs_struct(struct task_struct *task) | |||
164 | 161 | ||
165 | static int get_nr_threads(struct task_struct *tsk) | 162 | static int get_nr_threads(struct task_struct *tsk) |
166 | { | 163 | { |
167 | /* Must be called with the rcu_read_lock held */ | ||
168 | unsigned long flags; | 164 | unsigned long flags; |
169 | int count = 0; | 165 | int count = 0; |
170 | 166 | ||
@@ -471,14 +467,10 @@ static int proc_pid_limits(struct task_struct *task, char *buffer) | |||
471 | 467 | ||
472 | struct rlimit rlim[RLIM_NLIMITS]; | 468 | struct rlimit rlim[RLIM_NLIMITS]; |
473 | 469 | ||
474 | rcu_read_lock(); | 470 | if (!lock_task_sighand(task, &flags)) |
475 | if (!lock_task_sighand(task,&flags)) { | ||
476 | rcu_read_unlock(); | ||
477 | return 0; | 471 | return 0; |
478 | } | ||
479 | memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); | 472 | memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); |
480 | unlock_task_sighand(task, &flags); | 473 | unlock_task_sighand(task, &flags); |
481 | rcu_read_unlock(); | ||
482 | 474 | ||
483 | /* | 475 | /* |
484 | * print the file header | 476 | * print the file header |
@@ -2443,6 +2435,13 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) | |||
2443 | } | 2435 | } |
2444 | #endif /* CONFIG_TASK_IO_ACCOUNTING */ | 2436 | #endif /* CONFIG_TASK_IO_ACCOUNTING */ |
2445 | 2437 | ||
2438 | static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, | ||
2439 | struct pid *pid, struct task_struct *task) | ||
2440 | { | ||
2441 | seq_printf(m, "%08x\n", task->personality); | ||
2442 | return 0; | ||
2443 | } | ||
2444 | |||
2446 | /* | 2445 | /* |
2447 | * Thread groups | 2446 | * Thread groups |
2448 | */ | 2447 | */ |
@@ -2459,6 +2458,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2459 | REG("environ", S_IRUSR, environ), | 2458 | REG("environ", S_IRUSR, environ), |
2460 | INF("auxv", S_IRUSR, pid_auxv), | 2459 | INF("auxv", S_IRUSR, pid_auxv), |
2461 | ONE("status", S_IRUGO, pid_status), | 2460 | ONE("status", S_IRUGO, pid_status), |
2461 | ONE("personality", S_IRUSR, pid_personality), | ||
2462 | INF("limits", S_IRUSR, pid_limits), | 2462 | INF("limits", S_IRUSR, pid_limits), |
2463 | #ifdef CONFIG_SCHED_DEBUG | 2463 | #ifdef CONFIG_SCHED_DEBUG |
2464 | REG("sched", S_IRUGO|S_IWUSR, pid_sched), | 2464 | REG("sched", S_IRUGO|S_IWUSR, pid_sched), |
@@ -2794,6 +2794,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
2794 | REG("environ", S_IRUSR, environ), | 2794 | REG("environ", S_IRUSR, environ), |
2795 | INF("auxv", S_IRUSR, pid_auxv), | 2795 | INF("auxv", S_IRUSR, pid_auxv), |
2796 | ONE("status", S_IRUGO, pid_status), | 2796 | ONE("status", S_IRUGO, pid_status), |
2797 | ONE("personality", S_IRUSR, pid_personality), | ||
2797 | INF("limits", S_IRUSR, pid_limits), | 2798 | INF("limits", S_IRUSR, pid_limits), |
2798 | #ifdef CONFIG_SCHED_DEBUG | 2799 | #ifdef CONFIG_SCHED_DEBUG |
2799 | REG("sched", S_IRUGO|S_IWUSR, pid_sched), | 2800 | REG("sched", S_IRUGO|S_IWUSR, pid_sched), |
@@ -3088,9 +3089,7 @@ static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct | |||
3088 | generic_fillattr(inode, stat); | 3089 | generic_fillattr(inode, stat); |
3089 | 3090 | ||
3090 | if (p) { | 3091 | if (p) { |
3091 | rcu_read_lock(); | ||
3092 | stat->nlink += get_nr_threads(p); | 3092 | stat->nlink += get_nr_threads(p); |
3093 | rcu_read_unlock(); | ||
3094 | put_task_struct(p); | 3093 | put_task_struct(p); |
3095 | } | 3094 | } |
3096 | 3095 | ||
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 4fb81e9c94e3..7821589a17d5 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -330,6 +330,7 @@ retry: | |||
330 | spin_lock(&proc_inum_lock); | 330 | spin_lock(&proc_inum_lock); |
331 | ida_remove(&proc_inum_ida, i); | 331 | ida_remove(&proc_inum_ida, i); |
332 | spin_unlock(&proc_inum_lock); | 332 | spin_unlock(&proc_inum_lock); |
333 | return 0; | ||
333 | } | 334 | } |
334 | return PROC_DYNAMIC_FIRST + i; | 335 | return PROC_DYNAMIC_FIRST + i; |
335 | } | 336 | } |
@@ -546,8 +547,8 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp | |||
546 | 547 | ||
547 | for (tmp = dir->subdir; tmp; tmp = tmp->next) | 548 | for (tmp = dir->subdir; tmp; tmp = tmp->next) |
548 | if (strcmp(tmp->name, dp->name) == 0) { | 549 | if (strcmp(tmp->name, dp->name) == 0) { |
549 | printk(KERN_WARNING "proc_dir_entry '%s' already " | 550 | printk(KERN_WARNING "proc_dir_entry '%s/%s' already registered\n", |
550 | "registered\n", dp->name); | 551 | dir->name, dp->name); |
551 | dump_stack(); | 552 | dump_stack(); |
552 | break; | 553 | break; |
553 | } | 554 | } |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 8bb03f056c28..c6b4fa7e3b49 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -342,7 +342,7 @@ static int proc_reg_open(struct inode *inode, struct file *file) | |||
342 | if (!pde->proc_fops) { | 342 | if (!pde->proc_fops) { |
343 | spin_unlock(&pde->pde_unload_lock); | 343 | spin_unlock(&pde->pde_unload_lock); |
344 | kfree(pdeo); | 344 | kfree(pdeo); |
345 | return rv; | 345 | return -EINVAL; |
346 | } | 346 | } |
347 | pde->pde_users++; | 347 | pde->pde_users++; |
348 | open = pde->proc_fops->open; | 348 | open = pde->proc_fops->open; |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 442202314d53..3bfb7b8747b3 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -45,8 +45,6 @@ do { \ | |||
45 | extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); | 45 | extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); |
46 | #endif | 46 | #endif |
47 | 47 | ||
48 | extern int maps_protect; | ||
49 | |||
50 | extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, | 48 | extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, |
51 | struct pid *pid, struct task_struct *task); | 49 | struct pid *pid, struct task_struct *task); |
52 | extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, | 50 | extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, |
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 79ecd281d2cb..3f87d2632947 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c | |||
@@ -52,14 +52,14 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) | |||
52 | } | 52 | } |
53 | 53 | ||
54 | seq_printf(m, | 54 | seq_printf(m, |
55 | "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", | 55 | "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", |
56 | vma->vm_start, | 56 | vma->vm_start, |
57 | vma->vm_end, | 57 | vma->vm_end, |
58 | flags & VM_READ ? 'r' : '-', | 58 | flags & VM_READ ? 'r' : '-', |
59 | flags & VM_WRITE ? 'w' : '-', | 59 | flags & VM_WRITE ? 'w' : '-', |
60 | flags & VM_EXEC ? 'x' : '-', | 60 | flags & VM_EXEC ? 'x' : '-', |
61 | flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', | 61 | flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', |
62 | vma->vm_pgoff << PAGE_SHIFT, | 62 | ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, |
63 | MAJOR(dev), MINOR(dev), ino, &len); | 63 | MAJOR(dev), MINOR(dev), ino, &len); |
64 | 64 | ||
65 | if (file) { | 65 | if (file) { |
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index ded969862960..b675a49c1823 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/tty.h> | 24 | #include <linux/tty.h> |
25 | #include <linux/string.h> | 25 | #include <linux/string.h> |
26 | #include <linux/mman.h> | 26 | #include <linux/mman.h> |
27 | #include <linux/quicklist.h> | ||
27 | #include <linux/proc_fs.h> | 28 | #include <linux/proc_fs.h> |
28 | #include <linux/ioport.h> | 29 | #include <linux/ioport.h> |
29 | #include <linux/mm.h> | 30 | #include <linux/mm.h> |
@@ -67,7 +68,6 @@ | |||
67 | extern int get_hardware_list(char *); | 68 | extern int get_hardware_list(char *); |
68 | extern int get_stram_list(char *); | 69 | extern int get_stram_list(char *); |
69 | extern int get_exec_domain_list(char *); | 70 | extern int get_exec_domain_list(char *); |
70 | extern int get_dma_list(char *); | ||
71 | 71 | ||
72 | static int proc_calc_metrics(char *page, char **start, off_t off, | 72 | static int proc_calc_metrics(char *page, char **start, off_t off, |
73 | int count, int *eof, int len) | 73 | int count, int *eof, int len) |
@@ -182,6 +182,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off, | |||
182 | "SReclaimable: %8lu kB\n" | 182 | "SReclaimable: %8lu kB\n" |
183 | "SUnreclaim: %8lu kB\n" | 183 | "SUnreclaim: %8lu kB\n" |
184 | "PageTables: %8lu kB\n" | 184 | "PageTables: %8lu kB\n" |
185 | #ifdef CONFIG_QUICKLIST | ||
186 | "Quicklists: %8lu kB\n" | ||
187 | #endif | ||
185 | "NFS_Unstable: %8lu kB\n" | 188 | "NFS_Unstable: %8lu kB\n" |
186 | "Bounce: %8lu kB\n" | 189 | "Bounce: %8lu kB\n" |
187 | "WritebackTmp: %8lu kB\n" | 190 | "WritebackTmp: %8lu kB\n" |
@@ -214,6 +217,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off, | |||
214 | K(global_page_state(NR_SLAB_RECLAIMABLE)), | 217 | K(global_page_state(NR_SLAB_RECLAIMABLE)), |
215 | K(global_page_state(NR_SLAB_UNRECLAIMABLE)), | 218 | K(global_page_state(NR_SLAB_UNRECLAIMABLE)), |
216 | K(global_page_state(NR_PAGETABLE)), | 219 | K(global_page_state(NR_PAGETABLE)), |
220 | #ifdef CONFIG_QUICKLIST | ||
221 | K(quicklist_total_size()), | ||
222 | #endif | ||
217 | K(global_page_state(NR_UNSTABLE_NFS)), | 223 | K(global_page_state(NR_UNSTABLE_NFS)), |
218 | K(global_page_state(NR_BOUNCE)), | 224 | K(global_page_state(NR_BOUNCE)), |
219 | K(global_page_state(NR_WRITEBACK_TEMP)), | 225 | K(global_page_state(NR_WRITEBACK_TEMP)), |
@@ -677,6 +683,7 @@ static int cmdline_read_proc(char *page, char **start, off_t off, | |||
677 | return proc_calc_metrics(page, start, off, count, eof, len); | 683 | return proc_calc_metrics(page, start, off, count, eof, len); |
678 | } | 684 | } |
679 | 685 | ||
686 | #ifdef CONFIG_FILE_LOCKING | ||
680 | static int locks_open(struct inode *inode, struct file *filp) | 687 | static int locks_open(struct inode *inode, struct file *filp) |
681 | { | 688 | { |
682 | return seq_open(filp, &locks_seq_operations); | 689 | return seq_open(filp, &locks_seq_operations); |
@@ -688,6 +695,7 @@ static const struct file_operations proc_locks_operations = { | |||
688 | .llseek = seq_lseek, | 695 | .llseek = seq_lseek, |
689 | .release = seq_release, | 696 | .release = seq_release, |
690 | }; | 697 | }; |
698 | #endif /* CONFIG_FILE_LOCKING */ | ||
691 | 699 | ||
692 | static int execdomains_read_proc(char *page, char **start, off_t off, | 700 | static int execdomains_read_proc(char *page, char **start, off_t off, |
693 | int count, int *eof, void *data) | 701 | int count, int *eof, void *data) |
@@ -881,7 +889,9 @@ void __init proc_misc_init(void) | |||
881 | #ifdef CONFIG_PRINTK | 889 | #ifdef CONFIG_PRINTK |
882 | proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); | 890 | proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); |
883 | #endif | 891 | #endif |
892 | #ifdef CONFIG_FILE_LOCKING | ||
884 | proc_create("locks", 0, NULL, &proc_locks_operations); | 893 | proc_create("locks", 0, NULL, &proc_locks_operations); |
894 | #endif | ||
885 | proc_create("devices", 0, NULL, &proc_devinfo_operations); | 895 | proc_create("devices", 0, NULL, &proc_devinfo_operations); |
886 | proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); | 896 | proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); |
887 | #ifdef CONFIG_BLOCK | 897 | #ifdef CONFIG_BLOCK |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index f9a8b892718f..945a81043ba2 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -66,7 +66,7 @@ static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) | |||
66 | return NULL; | 66 | return NULL; |
67 | } | 67 | } |
68 | 68 | ||
69 | struct ctl_table_header *grab_header(struct inode *inode) | 69 | static struct ctl_table_header *grab_header(struct inode *inode) |
70 | { | 70 | { |
71 | if (PROC_I(inode)->sysctl) | 71 | if (PROC_I(inode)->sysctl) |
72 | return sysctl_head_grab(PROC_I(inode)->sysctl); | 72 | return sysctl_head_grab(PROC_I(inode)->sysctl); |
@@ -395,10 +395,10 @@ static struct dentry_operations proc_sys_dentry_operations = { | |||
395 | .d_compare = proc_sys_compare, | 395 | .d_compare = proc_sys_compare, |
396 | }; | 396 | }; |
397 | 397 | ||
398 | static struct proc_dir_entry *proc_sys_root; | ||
399 | |||
400 | int proc_sys_init(void) | 398 | int proc_sys_init(void) |
401 | { | 399 | { |
400 | struct proc_dir_entry *proc_sys_root; | ||
401 | |||
402 | proc_sys_root = proc_mkdir("sys", NULL); | 402 | proc_sys_root = proc_mkdir("sys", NULL); |
403 | proc_sys_root->proc_iops = &proc_sys_dir_operations; | 403 | proc_sys_root->proc_iops = &proc_sys_dir_operations; |
404 | proc_sys_root->proc_fops = &proc_sys_dir_file_operations; | 404 | proc_sys_root->proc_fops = &proc_sys_dir_file_operations; |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7546a918f790..4806830ea2a1 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -210,23 +210,20 @@ static int show_map(struct seq_file *m, void *v) | |||
210 | dev_t dev = 0; | 210 | dev_t dev = 0; |
211 | int len; | 211 | int len; |
212 | 212 | ||
213 | if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ)) | ||
214 | return -EACCES; | ||
215 | |||
216 | if (file) { | 213 | if (file) { |
217 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 214 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
218 | dev = inode->i_sb->s_dev; | 215 | dev = inode->i_sb->s_dev; |
219 | ino = inode->i_ino; | 216 | ino = inode->i_ino; |
220 | } | 217 | } |
221 | 218 | ||
222 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", | 219 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", |
223 | vma->vm_start, | 220 | vma->vm_start, |
224 | vma->vm_end, | 221 | vma->vm_end, |
225 | flags & VM_READ ? 'r' : '-', | 222 | flags & VM_READ ? 'r' : '-', |
226 | flags & VM_WRITE ? 'w' : '-', | 223 | flags & VM_WRITE ? 'w' : '-', |
227 | flags & VM_EXEC ? 'x' : '-', | 224 | flags & VM_EXEC ? 'x' : '-', |
228 | flags & VM_MAYSHARE ? 's' : 'p', | 225 | flags & VM_MAYSHARE ? 's' : 'p', |
229 | vma->vm_pgoff << PAGE_SHIFT, | 226 | ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, |
230 | MAJOR(dev), MINOR(dev), ino, &len); | 227 | MAJOR(dev), MINOR(dev), ino, &len); |
231 | 228 | ||
232 | /* | 229 | /* |
@@ -742,22 +739,11 @@ const struct file_operations proc_pagemap_operations = { | |||
742 | #ifdef CONFIG_NUMA | 739 | #ifdef CONFIG_NUMA |
743 | extern int show_numa_map(struct seq_file *m, void *v); | 740 | extern int show_numa_map(struct seq_file *m, void *v); |
744 | 741 | ||
745 | static int show_numa_map_checked(struct seq_file *m, void *v) | ||
746 | { | ||
747 | struct proc_maps_private *priv = m->private; | ||
748 | struct task_struct *task = priv->task; | ||
749 | |||
750 | if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ)) | ||
751 | return -EACCES; | ||
752 | |||
753 | return show_numa_map(m, v); | ||
754 | } | ||
755 | |||
756 | static const struct seq_operations proc_pid_numa_maps_op = { | 742 | static const struct seq_operations proc_pid_numa_maps_op = { |
757 | .start = m_start, | 743 | .start = m_start, |
758 | .next = m_next, | 744 | .next = m_next, |
759 | .stop = m_stop, | 745 | .stop = m_stop, |
760 | .show = show_numa_map_checked | 746 | .show = show_numa_map, |
761 | }; | 747 | }; |
762 | 748 | ||
763 | static int numa_maps_open(struct inode *inode, struct file *file) | 749 | static int numa_maps_open(struct inode *inode, struct file *file) |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 5d84e7121df8..219bd79ea894 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
@@ -110,11 +110,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, | |||
110 | static int show_map(struct seq_file *m, void *_vml) | 110 | static int show_map(struct seq_file *m, void *_vml) |
111 | { | 111 | { |
112 | struct vm_list_struct *vml = _vml; | 112 | struct vm_list_struct *vml = _vml; |
113 | struct proc_maps_private *priv = m->private; | ||
114 | struct task_struct *task = priv->task; | ||
115 | |||
116 | if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ)) | ||
117 | return -EACCES; | ||
118 | 113 | ||
119 | return nommu_vma_show(m, vml->vma); | 114 | return nommu_vma_show(m, vml->vma); |
120 | } | 115 | } |
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 9ac0f5e064e0..841368b87a29 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
@@ -165,14 +165,8 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
165 | return acc; | 165 | return acc; |
166 | } | 166 | } |
167 | 167 | ||
168 | static int open_vmcore(struct inode *inode, struct file *filp) | ||
169 | { | ||
170 | return 0; | ||
171 | } | ||
172 | |||
173 | const struct file_operations proc_vmcore_operations = { | 168 | const struct file_operations proc_vmcore_operations = { |
174 | .read = read_vmcore, | 169 | .read = read_vmcore, |
175 | .open = open_vmcore, | ||
176 | }; | 170 | }; |
177 | 171 | ||
178 | static struct vmcore* __init get_new_element(void) | 172 | static struct vmcore* __init get_new_element(void) |
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 52312ec93ff4..5145cb9125af 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c | |||
@@ -58,7 +58,7 @@ const struct inode_operations ramfs_file_inode_operations = { | |||
58 | * size 0 on the assumption that it's going to be used for an mmap of shared | 58 | * size 0 on the assumption that it's going to be used for an mmap of shared |
59 | * memory | 59 | * memory |
60 | */ | 60 | */ |
61 | static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) | 61 | int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) |
62 | { | 62 | { |
63 | struct pagevec lru_pvec; | 63 | struct pagevec lru_pvec; |
64 | unsigned long npages, xpages, loop, limit; | 64 | unsigned long npages, xpages, loop, limit; |
diff --git a/fs/readdir.c b/fs/readdir.c index 4e026e5407fb..93a7559bbfd8 100644 --- a/fs/readdir.c +++ b/fs/readdir.c | |||
@@ -80,8 +80,10 @@ static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset | |||
80 | if (buf->result) | 80 | if (buf->result) |
81 | return -EINVAL; | 81 | return -EINVAL; |
82 | d_ino = ino; | 82 | d_ino = ino; |
83 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | 83 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
84 | buf->result = -EOVERFLOW; | ||
84 | return -EOVERFLOW; | 85 | return -EOVERFLOW; |
86 | } | ||
85 | buf->result++; | 87 | buf->result++; |
86 | dirent = buf->dirent; | 88 | dirent = buf->dirent; |
87 | if (!access_ok(VERIFY_WRITE, dirent, | 89 | if (!access_ok(VERIFY_WRITE, dirent, |
@@ -155,8 +157,10 @@ static int filldir(void * __buf, const char * name, int namlen, loff_t offset, | |||
155 | if (reclen > buf->count) | 157 | if (reclen > buf->count) |
156 | return -EINVAL; | 158 | return -EINVAL; |
157 | d_ino = ino; | 159 | d_ino = ino; |
158 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | 160 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
161 | buf->error = -EOVERFLOW; | ||
159 | return -EOVERFLOW; | 162 | return -EOVERFLOW; |
163 | } | ||
160 | dirent = buf->previous; | 164 | dirent = buf->previous; |
161 | if (dirent) { | 165 | if (dirent) { |
162 | if (__put_user(offset, &dirent->d_off)) | 166 | if (__put_user(offset, &dirent->d_off)) |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 282a13596c70..d318c7e663fa 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -27,7 +27,6 @@ | |||
27 | #include <linux/mnt_namespace.h> | 27 | #include <linux/mnt_namespace.h> |
28 | #include <linux/mount.h> | 28 | #include <linux/mount.h> |
29 | #include <linux/namei.h> | 29 | #include <linux/namei.h> |
30 | #include <linux/quotaops.h> | ||
31 | 30 | ||
32 | struct file_system_type reiserfs_fs_type; | 31 | struct file_system_type reiserfs_fs_type; |
33 | 32 | ||
diff --git a/fs/seq_file.c b/fs/seq_file.c index 3f54dbd6c49b..bd20f7f5a933 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
@@ -108,9 +108,9 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) | |||
108 | goto Done; | 108 | goto Done; |
109 | } | 109 | } |
110 | /* we need at least one record in buffer */ | 110 | /* we need at least one record in buffer */ |
111 | pos = m->index; | ||
112 | p = m->op->start(m, &pos); | ||
111 | while (1) { | 113 | while (1) { |
112 | pos = m->index; | ||
113 | p = m->op->start(m, &pos); | ||
114 | err = PTR_ERR(p); | 114 | err = PTR_ERR(p); |
115 | if (!p || IS_ERR(p)) | 115 | if (!p || IS_ERR(p)) |
116 | break; | 116 | break; |
@@ -119,6 +119,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) | |||
119 | break; | 119 | break; |
120 | if (unlikely(err)) | 120 | if (unlikely(err)) |
121 | m->count = 0; | 121 | m->count = 0; |
122 | if (unlikely(!m->count)) { | ||
123 | p = m->op->next(m, p, &pos); | ||
124 | m->index = pos; | ||
125 | continue; | ||
126 | } | ||
122 | if (m->count < m->size) | 127 | if (m->count < m->size) |
123 | goto Fill; | 128 | goto Fill; |
124 | m->op->stop(m, p); | 129 | m->op->stop(m, p); |
@@ -128,6 +133,8 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) | |||
128 | goto Enomem; | 133 | goto Enomem; |
129 | m->count = 0; | 134 | m->count = 0; |
130 | m->version = 0; | 135 | m->version = 0; |
136 | pos = m->index; | ||
137 | p = m->op->start(m, &pos); | ||
131 | } | 138 | } |
132 | m->op->stop(m, p); | 139 | m->op->stop(m, p); |
133 | m->count = 0; | 140 | m->count = 0; |
@@ -443,6 +450,20 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc) | |||
443 | return -1; | 450 | return -1; |
444 | } | 451 | } |
445 | 452 | ||
453 | int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits) | ||
454 | { | ||
455 | size_t len = bitmap_scnprintf_len(nr_bits); | ||
456 | |||
457 | if (m->count + len < m->size) { | ||
458 | bitmap_scnprintf(m->buf + m->count, m->size - m->count, | ||
459 | bits, nr_bits); | ||
460 | m->count += len; | ||
461 | return 0; | ||
462 | } | ||
463 | m->count = m->size; | ||
464 | return -1; | ||
465 | } | ||
466 | |||
446 | static void *single_start(struct seq_file *p, loff_t *pos) | 467 | static void *single_start(struct seq_file *p, loff_t *pos) |
447 | { | 468 | { |
448 | return NULL + (*pos == 0); | 469 | return NULL + (*pos == 0); |
diff --git a/fs/splice.c b/fs/splice.c index 1bbc6f4bb09c..a1e701c27156 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -898,6 +898,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, | |||
898 | if (unlikely(!(out->f_mode & FMODE_WRITE))) | 898 | if (unlikely(!(out->f_mode & FMODE_WRITE))) |
899 | return -EBADF; | 899 | return -EBADF; |
900 | 900 | ||
901 | if (unlikely(out->f_flags & O_APPEND)) | ||
902 | return -EINVAL; | ||
903 | |||
901 | ret = rw_verify_area(WRITE, out, ppos, len); | 904 | ret = rw_verify_area(WRITE, out, ppos, len); |
902 | if (unlikely(ret < 0)) | 905 | if (unlikely(ret < 0)) |
903 | return ret; | 906 | return ret; |
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index d81fb9ed2b8e..73db464cd08b 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
@@ -263,8 +263,8 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) | |||
263 | 263 | ||
264 | idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; | 264 | idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; |
265 | 265 | ||
266 | /* And make sure we have twice the index size of space reserved */ | 266 | /* And make sure we have thrice the index size of space reserved */ |
267 | idx_size <<= 1; | 267 | idx_size = idx_size + (idx_size << 1); |
268 | 268 | ||
269 | /* | 269 | /* |
270 | * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' | 270 | * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' |
@@ -302,18 +302,6 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) | |||
302 | int subtract_lebs; | 302 | int subtract_lebs; |
303 | long long available; | 303 | long long available; |
304 | 304 | ||
305 | /* | ||
306 | * Force the amount available to the total size reported if the used | ||
307 | * space is zero. | ||
308 | */ | ||
309 | if (c->lst.total_used <= UBIFS_INO_NODE_SZ && | ||
310 | c->budg_data_growth + c->budg_dd_growth == 0) { | ||
311 | /* Do the same calculation as for c->block_cnt */ | ||
312 | available = c->main_lebs - 2; | ||
313 | available *= c->leb_size - c->dark_wm; | ||
314 | return available; | ||
315 | } | ||
316 | |||
317 | available = c->main_bytes - c->lst.total_used; | 305 | available = c->main_bytes - c->lst.total_used; |
318 | 306 | ||
319 | /* | 307 | /* |
@@ -388,11 +376,11 @@ static int can_use_rp(struct ubifs_info *c) | |||
388 | * This function makes sure UBIFS has enough free eraseblocks for index growth | 376 | * This function makes sure UBIFS has enough free eraseblocks for index growth |
389 | * and data. | 377 | * and data. |
390 | * | 378 | * |
391 | * When budgeting index space, UBIFS reserves twice as more LEBs as the index | 379 | * When budgeting index space, UBIFS reserves thrice as many LEBs as the index |
392 | * would take if it was consolidated and written to the flash. This guarantees | 380 | * would take if it was consolidated and written to the flash. This guarantees |
393 | * that the "in-the-gaps" commit method always succeeds and UBIFS will always | 381 | * that the "in-the-gaps" commit method always succeeds and UBIFS will always |
394 | * be able to commit dirty index. So this function basically adds amount of | 382 | * be able to commit dirty index. So this function basically adds amount of |
395 | * budgeted index space to the size of the current index, multiplies this by 2, | 383 | * budgeted index space to the size of the current index, multiplies this by 3, |
396 | * and makes sure this does not exceed the amount of free eraseblocks. | 384 | * and makes sure this does not exceed the amount of free eraseblocks. |
397 | * | 385 | * |
398 | * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: | 386 | * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: |
@@ -543,8 +531,16 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) | |||
543 | int err, idx_growth, data_growth, dd_growth; | 531 | int err, idx_growth, data_growth, dd_growth; |
544 | struct retries_info ri; | 532 | struct retries_info ri; |
545 | 533 | ||
534 | ubifs_assert(req->new_page <= 1); | ||
535 | ubifs_assert(req->dirtied_page <= 1); | ||
536 | ubifs_assert(req->new_dent <= 1); | ||
537 | ubifs_assert(req->mod_dent <= 1); | ||
538 | ubifs_assert(req->new_ino <= 1); | ||
539 | ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA); | ||
546 | ubifs_assert(req->dirtied_ino <= 4); | 540 | ubifs_assert(req->dirtied_ino <= 4); |
547 | ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); | 541 | ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); |
542 | ubifs_assert(!(req->new_ino_d & 7)); | ||
543 | ubifs_assert(!(req->dirtied_ino_d & 7)); | ||
548 | 544 | ||
549 | data_growth = calc_data_growth(c, req); | 545 | data_growth = calc_data_growth(c, req); |
550 | dd_growth = calc_dd_growth(c, req); | 546 | dd_growth = calc_dd_growth(c, req); |
@@ -618,8 +614,16 @@ again: | |||
618 | */ | 614 | */ |
619 | void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) | 615 | void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) |
620 | { | 616 | { |
617 | ubifs_assert(req->new_page <= 1); | ||
618 | ubifs_assert(req->dirtied_page <= 1); | ||
619 | ubifs_assert(req->new_dent <= 1); | ||
620 | ubifs_assert(req->mod_dent <= 1); | ||
621 | ubifs_assert(req->new_ino <= 1); | ||
622 | ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA); | ||
621 | ubifs_assert(req->dirtied_ino <= 4); | 623 | ubifs_assert(req->dirtied_ino <= 4); |
622 | ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); | 624 | ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); |
625 | ubifs_assert(!(req->new_ino_d & 7)); | ||
626 | ubifs_assert(!(req->dirtied_ino_d & 7)); | ||
623 | if (!req->recalculate) { | 627 | if (!req->recalculate) { |
624 | ubifs_assert(req->idx_growth >= 0); | 628 | ubifs_assert(req->idx_growth >= 0); |
625 | ubifs_assert(req->data_growth >= 0); | 629 | ubifs_assert(req->data_growth >= 0); |
@@ -647,7 +651,11 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) | |||
647 | 651 | ||
648 | ubifs_assert(c->budg_idx_growth >= 0); | 652 | ubifs_assert(c->budg_idx_growth >= 0); |
649 | ubifs_assert(c->budg_data_growth >= 0); | 653 | ubifs_assert(c->budg_data_growth >= 0); |
654 | ubifs_assert(c->budg_dd_growth >= 0); | ||
650 | ubifs_assert(c->min_idx_lebs < c->main_lebs); | 655 | ubifs_assert(c->min_idx_lebs < c->main_lebs); |
656 | ubifs_assert(!(c->budg_idx_growth & 7)); | ||
657 | ubifs_assert(!(c->budg_data_growth & 7)); | ||
658 | ubifs_assert(!(c->budg_dd_growth & 7)); | ||
651 | spin_unlock(&c->space_lock); | 659 | spin_unlock(&c->space_lock); |
652 | } | 660 | } |
653 | 661 | ||
@@ -686,41 +694,114 @@ void ubifs_convert_page_budget(struct ubifs_info *c) | |||
686 | void ubifs_release_dirty_inode_budget(struct ubifs_info *c, | 694 | void ubifs_release_dirty_inode_budget(struct ubifs_info *c, |
687 | struct ubifs_inode *ui) | 695 | struct ubifs_inode *ui) |
688 | { | 696 | { |
689 | struct ubifs_budget_req req = {.dd_growth = c->inode_budget, | 697 | struct ubifs_budget_req req; |
690 | .dirtied_ino_d = ui->data_len}; | ||
691 | 698 | ||
699 | memset(&req, 0, sizeof(struct ubifs_budget_req)); | ||
700 | req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); | ||
692 | ubifs_release_budget(c, &req); | 701 | ubifs_release_budget(c, &req); |
693 | } | 702 | } |
694 | 703 | ||
695 | /** | 704 | /** |
696 | * ubifs_budg_get_free_space - return amount of free space. | 705 | * ubifs_reported_space - calculate reported free space. |
706 | * @c: the UBIFS file-system description object | ||
707 | * @free: amount of free space | ||
708 | * | ||
709 | * This function calculates amount of free space which will be reported to | ||
710 | * user-space. User-space application tend to expect that if the file-system | ||
711 | * (e.g., via the 'statfs()' call) reports that it has N bytes available, they | ||
712 | * are able to write a file of size N. UBIFS attaches node headers to each data | ||
713 | * node and it has to write indexind nodes as well. This introduces additional | ||
714 | * overhead, and UBIFS it has to report sligtly less free space to meet the | ||
715 | * above expectetion. | ||
716 | * | ||
717 | * This function assumes free space is made up of uncompressed data nodes and | ||
718 | * full index nodes (one per data node, tripled because we always allow enough | ||
719 | * space to write the index thrice). | ||
720 | * | ||
721 | * Note, the calculation is pessimistic, which means that most of the time | ||
722 | * UBIFS reports less space than it actually has. | ||
723 | */ | ||
724 | long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) | ||
725 | { | ||
726 | int divisor, factor, f; | ||
727 | |||
728 | /* | ||
729 | * Reported space size is @free * X, where X is UBIFS block size | ||
730 | * divided by UBIFS block size + all overhead one data block | ||
731 | * introduces. The overhead is the node header + indexing overhead. | ||
732 | * | ||
733 | * Indexing overhead calculations are based on the following formula: | ||
734 | * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number | ||
735 | * of data nodes, f - fanout. Because effective UBIFS fanout is twice | ||
736 | * as less than maximum fanout, we assume that each data node | ||
737 | * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. | ||
738 | * Note, the multiplier 3 is because UBIFS reseves thrice as more space | ||
739 | * for the index. | ||
740 | */ | ||
741 | f = c->fanout > 3 ? c->fanout >> 1 : 2; | ||
742 | factor = UBIFS_BLOCK_SIZE; | ||
743 | divisor = UBIFS_MAX_DATA_NODE_SZ; | ||
744 | divisor += (c->max_idx_node_sz * 3) / (f - 1); | ||
745 | free *= factor; | ||
746 | do_div(free, divisor); | ||
747 | return free; | ||
748 | } | ||
749 | |||
750 | /** | ||
751 | * ubifs_get_free_space - return amount of free space. | ||
697 | * @c: UBIFS file-system description object | 752 | * @c: UBIFS file-system description object |
698 | * | 753 | * |
699 | * This function returns amount of free space on the file-system. | 754 | * This function calculates amount of free space to report to user-space. |
755 | * | ||
756 | * Because UBIFS may introduce substantial overhead (the index, node headers, | ||
757 | * alighment, wastage at the end of eraseblocks, etc), it cannot report real | ||
758 | * amount of free flash space it has (well, because not all dirty space is | ||
759 | * reclamable, UBIFS does not actually know the real amount). If UBIFS did so, | ||
760 | * it would bread user expectetion about what free space is. Users seem to | ||
761 | * accustomed to assume that if the file-system reports N bytes of free space, | ||
762 | * they would be able to fit a file of N bytes to the FS. This almost works for | ||
763 | * traditional file-systems, because they have way less overhead than UBIFS. | ||
764 | * So, to keep users happy, UBIFS tries to take the overhead into account. | ||
700 | */ | 765 | */ |
701 | long long ubifs_budg_get_free_space(struct ubifs_info *c) | 766 | long long ubifs_get_free_space(struct ubifs_info *c) |
702 | { | 767 | { |
703 | int min_idx_lebs, rsvd_idx_lebs; | 768 | int min_idx_lebs, rsvd_idx_lebs, lebs; |
704 | long long available, outstanding, free; | 769 | long long available, outstanding, free; |
705 | 770 | ||
706 | /* Do exactly the same calculations as in 'do_budget_space()' */ | ||
707 | spin_lock(&c->space_lock); | 771 | spin_lock(&c->space_lock); |
708 | min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 772 | min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
773 | outstanding = c->budg_data_growth + c->budg_dd_growth; | ||
709 | 774 | ||
710 | if (min_idx_lebs > c->lst.idx_lebs) | 775 | /* |
711 | rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; | 776 | * Force the amount available to the total size reported if the used |
712 | else | 777 | * space is zero. |
713 | rsvd_idx_lebs = 0; | 778 | */ |
714 | 779 | if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) { | |
715 | if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt | ||
716 | - c->lst.taken_empty_lebs) { | ||
717 | spin_unlock(&c->space_lock); | 780 | spin_unlock(&c->space_lock); |
718 | return 0; | 781 | return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT; |
719 | } | 782 | } |
720 | 783 | ||
721 | available = ubifs_calc_available(c, min_idx_lebs); | 784 | available = ubifs_calc_available(c, min_idx_lebs); |
722 | outstanding = c->budg_data_growth + c->budg_dd_growth; | 785 | |
723 | c->min_idx_lebs = min_idx_lebs; | 786 | /* |
787 | * When reporting free space to user-space, UBIFS guarantees that it is | ||
788 | * possible to write a file of free space size. This means that for | ||
789 | * empty LEBs we may use more precise calculations than | ||
790 | * 'ubifs_calc_available()' is using. Namely, we know that in empty | ||
791 | * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm. | ||
792 | * Thus, amend the available space. | ||
793 | * | ||
794 | * Note, the calculations below are similar to what we have in | ||
795 | * 'do_budget_space()', so refer there for comments. | ||
796 | */ | ||
797 | if (min_idx_lebs > c->lst.idx_lebs) | ||
798 | rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; | ||
799 | else | ||
800 | rsvd_idx_lebs = 0; | ||
801 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | ||
802 | c->lst.taken_empty_lebs; | ||
803 | lebs -= rsvd_idx_lebs; | ||
804 | available += lebs * (c->dark_wm - c->leb_overhead); | ||
724 | spin_unlock(&c->space_lock); | 805 | spin_unlock(&c->space_lock); |
725 | 806 | ||
726 | if (available > outstanding) | 807 | if (available > outstanding) |
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index 3b516316c9b3..0a6aa2cc78f0 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c | |||
@@ -74,6 +74,7 @@ static int do_commit(struct ubifs_info *c) | |||
74 | goto out_up; | 74 | goto out_up; |
75 | } | 75 | } |
76 | 76 | ||
77 | c->cmt_no += 1; | ||
77 | err = ubifs_gc_start_commit(c); | 78 | err = ubifs_gc_start_commit(c); |
78 | if (err) | 79 | if (err) |
79 | goto out_up; | 80 | goto out_up; |
@@ -115,7 +116,7 @@ static int do_commit(struct ubifs_info *c) | |||
115 | goto out; | 116 | goto out; |
116 | 117 | ||
117 | mutex_lock(&c->mst_mutex); | 118 | mutex_lock(&c->mst_mutex); |
118 | c->mst_node->cmt_no = cpu_to_le64(++c->cmt_no); | 119 | c->mst_node->cmt_no = cpu_to_le64(c->cmt_no); |
119 | c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum); | 120 | c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum); |
120 | c->mst_node->root_lnum = cpu_to_le32(zroot.lnum); | 121 | c->mst_node->root_lnum = cpu_to_le32(zroot.lnum); |
121 | c->mst_node->root_offs = cpu_to_le32(zroot.offs); | 122 | c->mst_node->root_offs = cpu_to_le32(zroot.offs); |
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 4e3aaeba4eca..d7f7645779f2 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
@@ -538,7 +538,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
538 | printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); | 538 | printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); |
539 | for (i = 0; i < n; i++) | 539 | for (i = 0; i < n; i++) |
540 | printk(KERN_DEBUG "\t ino %llu\n", | 540 | printk(KERN_DEBUG "\t ino %llu\n", |
541 | le64_to_cpu(orph->inos[i])); | 541 | (unsigned long long)le64_to_cpu(orph->inos[i])); |
542 | break; | 542 | break; |
543 | } | 543 | } |
544 | default: | 544 | default: |
@@ -568,8 +568,8 @@ void dbg_dump_budget_req(const struct ubifs_budget_req *req) | |||
568 | void dbg_dump_lstats(const struct ubifs_lp_stats *lst) | 568 | void dbg_dump_lstats(const struct ubifs_lp_stats *lst) |
569 | { | 569 | { |
570 | spin_lock(&dbg_lock); | 570 | spin_lock(&dbg_lock); |
571 | printk(KERN_DEBUG "Lprops statistics: empty_lebs %d, idx_lebs %d\n", | 571 | printk(KERN_DEBUG "(pid %d) Lprops statistics: empty_lebs %d, " |
572 | lst->empty_lebs, lst->idx_lebs); | 572 | "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs); |
573 | printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, " | 573 | printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, " |
574 | "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, | 574 | "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, |
575 | lst->total_dirty); | 575 | lst->total_dirty); |
@@ -587,8 +587,8 @@ void dbg_dump_budg(struct ubifs_info *c) | |||
587 | struct ubifs_gced_idx_leb *idx_gc; | 587 | struct ubifs_gced_idx_leb *idx_gc; |
588 | 588 | ||
589 | spin_lock(&dbg_lock); | 589 | spin_lock(&dbg_lock); |
590 | printk(KERN_DEBUG "Budgeting info: budg_data_growth %lld, " | 590 | printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " |
591 | "budg_dd_growth %lld, budg_idx_growth %lld\n", | 591 | "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, |
592 | c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); | 592 | c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); |
593 | printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " | 593 | printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " |
594 | "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, | 594 | "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, |
@@ -634,7 +634,7 @@ void dbg_dump_lprops(struct ubifs_info *c) | |||
634 | struct ubifs_lprops lp; | 634 | struct ubifs_lprops lp; |
635 | struct ubifs_lp_stats lst; | 635 | struct ubifs_lp_stats lst; |
636 | 636 | ||
637 | printk(KERN_DEBUG "Dumping LEB properties\n"); | 637 | printk(KERN_DEBUG "(pid %d) Dumping LEB properties\n", current->pid); |
638 | ubifs_get_lp_stats(c, &lst); | 638 | ubifs_get_lp_stats(c, &lst); |
639 | dbg_dump_lstats(&lst); | 639 | dbg_dump_lstats(&lst); |
640 | 640 | ||
@@ -655,7 +655,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) | |||
655 | if (dbg_failure_mode) | 655 | if (dbg_failure_mode) |
656 | return; | 656 | return; |
657 | 657 | ||
658 | printk(KERN_DEBUG "Dumping LEB %d\n", lnum); | 658 | printk(KERN_DEBUG "(pid %d) Dumping LEB %d\n", current->pid, lnum); |
659 | 659 | ||
660 | sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); | 660 | sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); |
661 | if (IS_ERR(sleb)) { | 661 | if (IS_ERR(sleb)) { |
@@ -720,8 +720,8 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) | |||
720 | { | 720 | { |
721 | int i; | 721 | int i; |
722 | 722 | ||
723 | printk(KERN_DEBUG "Dumping heap cat %d (%d elements)\n", | 723 | printk(KERN_DEBUG "(pid %d) Dumping heap cat %d (%d elements)\n", |
724 | cat, heap->cnt); | 724 | current->pid, cat, heap->cnt); |
725 | for (i = 0; i < heap->cnt; i++) { | 725 | for (i = 0; i < heap->cnt; i++) { |
726 | struct ubifs_lprops *lprops = heap->arr[i]; | 726 | struct ubifs_lprops *lprops = heap->arr[i]; |
727 | 727 | ||
@@ -736,7 +736,7 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, | |||
736 | { | 736 | { |
737 | int i; | 737 | int i; |
738 | 738 | ||
739 | printk(KERN_DEBUG "Dumping pnode:\n"); | 739 | printk(KERN_DEBUG "(pid %d) Dumping pnode:\n", current->pid); |
740 | printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", | 740 | printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", |
741 | (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); | 741 | (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); |
742 | printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", | 742 | printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", |
@@ -755,7 +755,7 @@ void dbg_dump_tnc(struct ubifs_info *c) | |||
755 | int level; | 755 | int level; |
756 | 756 | ||
757 | printk(KERN_DEBUG "\n"); | 757 | printk(KERN_DEBUG "\n"); |
758 | printk(KERN_DEBUG "Dumping the TNC tree\n"); | 758 | printk(KERN_DEBUG "(pid %d) Dumping the TNC tree\n", current->pid); |
759 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); | 759 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); |
760 | level = znode->level; | 760 | level = znode->level; |
761 | printk(KERN_DEBUG "== Level %d ==\n", level); | 761 | printk(KERN_DEBUG "== Level %d ==\n", level); |
@@ -2208,16 +2208,17 @@ int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, | |||
2208 | int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, | 2208 | int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, |
2209 | int offset, int len, int dtype) | 2209 | int offset, int len, int dtype) |
2210 | { | 2210 | { |
2211 | int err; | 2211 | int err, failing; |
2212 | 2212 | ||
2213 | if (in_failure_mode(desc)) | 2213 | if (in_failure_mode(desc)) |
2214 | return -EIO; | 2214 | return -EIO; |
2215 | if (do_fail(desc, lnum, 1)) | 2215 | failing = do_fail(desc, lnum, 1); |
2216 | if (failing) | ||
2216 | cut_data(buf, len); | 2217 | cut_data(buf, len); |
2217 | err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); | 2218 | err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); |
2218 | if (err) | 2219 | if (err) |
2219 | return err; | 2220 | return err; |
2220 | if (in_failure_mode(desc)) | 2221 | if (failing) |
2221 | return -EIO; | 2222 | return -EIO; |
2222 | return 0; | 2223 | return 0; |
2223 | } | 2224 | } |
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 3c4f1e93c9e0..50315fc57185 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h | |||
@@ -27,7 +27,7 @@ | |||
27 | 27 | ||
28 | #define UBIFS_DBG(op) op | 28 | #define UBIFS_DBG(op) op |
29 | 29 | ||
30 | #define ubifs_assert(expr) do { \ | 30 | #define ubifs_assert(expr) do { \ |
31 | if (unlikely(!(expr))) { \ | 31 | if (unlikely(!(expr))) { \ |
32 | printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ | 32 | printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ |
33 | __func__, __LINE__, current->pid); \ | 33 | __func__, __LINE__, current->pid); \ |
@@ -73,50 +73,50 @@ const char *dbg_key_str1(const struct ubifs_info *c, | |||
73 | const union ubifs_key *key); | 73 | const union ubifs_key *key); |
74 | 74 | ||
75 | /* | 75 | /* |
76 | * DBGKEY macros require dbg_lock to be held, which it is in the dbg message | 76 | * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message |
77 | * macros. | 77 | * macros. |
78 | */ | 78 | */ |
79 | #define DBGKEY(key) dbg_key_str0(c, (key)) | 79 | #define DBGKEY(key) dbg_key_str0(c, (key)) |
80 | #define DBGKEY1(key) dbg_key_str1(c, (key)) | 80 | #define DBGKEY1(key) dbg_key_str1(c, (key)) |
81 | 81 | ||
82 | /* General messages */ | 82 | /* General messages */ |
83 | #define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) | 83 | #define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) |
84 | 84 | ||
85 | /* Additional journal messages */ | 85 | /* Additional journal messages */ |
86 | #define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) | 86 | #define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) |
87 | 87 | ||
88 | /* Additional TNC messages */ | 88 | /* Additional TNC messages */ |
89 | #define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) | 89 | #define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) |
90 | 90 | ||
91 | /* Additional lprops messages */ | 91 | /* Additional lprops messages */ |
92 | #define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) | 92 | #define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) |
93 | 93 | ||
94 | /* Additional LEB find messages */ | 94 | /* Additional LEB find messages */ |
95 | #define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) | 95 | #define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) |
96 | 96 | ||
97 | /* Additional mount messages */ | 97 | /* Additional mount messages */ |
98 | #define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) | 98 | #define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) |
99 | 99 | ||
100 | /* Additional I/O messages */ | 100 | /* Additional I/O messages */ |
101 | #define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) | 101 | #define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) |
102 | 102 | ||
103 | /* Additional commit messages */ | 103 | /* Additional commit messages */ |
104 | #define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) | 104 | #define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) |
105 | 105 | ||
106 | /* Additional budgeting messages */ | 106 | /* Additional budgeting messages */ |
107 | #define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) | 107 | #define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) |
108 | 108 | ||
109 | /* Additional log messages */ | 109 | /* Additional log messages */ |
110 | #define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) | 110 | #define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) |
111 | 111 | ||
112 | /* Additional gc messages */ | 112 | /* Additional gc messages */ |
113 | #define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) | 113 | #define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) |
114 | 114 | ||
115 | /* Additional scan messages */ | 115 | /* Additional scan messages */ |
116 | #define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) | 116 | #define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) |
117 | 117 | ||
118 | /* Additional recovery messages */ | 118 | /* Additional recovery messages */ |
119 | #define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) | 119 | #define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) |
120 | 120 | ||
121 | /* | 121 | /* |
122 | * Debugging message type flags (must match msg_type_names in debug.c). | 122 | * Debugging message type flags (must match msg_type_names in debug.c). |
@@ -239,34 +239,23 @@ typedef int (*dbg_leaf_callback)(struct ubifs_info *c, | |||
239 | struct ubifs_zbranch *zbr, void *priv); | 239 | struct ubifs_zbranch *zbr, void *priv); |
240 | typedef int (*dbg_znode_callback)(struct ubifs_info *c, | 240 | typedef int (*dbg_znode_callback)(struct ubifs_info *c, |
241 | struct ubifs_znode *znode, void *priv); | 241 | struct ubifs_znode *znode, void *priv); |
242 | |||
243 | int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, | 242 | int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, |
244 | dbg_znode_callback znode_cb, void *priv); | 243 | dbg_znode_callback znode_cb, void *priv); |
245 | 244 | ||
246 | /* Checking functions */ | 245 | /* Checking functions */ |
247 | 246 | ||
248 | int dbg_check_lprops(struct ubifs_info *c); | 247 | int dbg_check_lprops(struct ubifs_info *c); |
249 | |||
250 | int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot); | 248 | int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot); |
251 | int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); | 249 | int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); |
252 | |||
253 | int dbg_check_cats(struct ubifs_info *c); | 250 | int dbg_check_cats(struct ubifs_info *c); |
254 | |||
255 | int dbg_check_ltab(struct ubifs_info *c); | 251 | int dbg_check_ltab(struct ubifs_info *c); |
256 | |||
257 | int dbg_check_synced_i_size(struct inode *inode); | 252 | int dbg_check_synced_i_size(struct inode *inode); |
258 | |||
259 | int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); | 253 | int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); |
260 | |||
261 | int dbg_check_tnc(struct ubifs_info *c, int extra); | 254 | int dbg_check_tnc(struct ubifs_info *c, int extra); |
262 | |||
263 | int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); | 255 | int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); |
264 | |||
265 | int dbg_check_filesystem(struct ubifs_info *c); | 256 | int dbg_check_filesystem(struct ubifs_info *c); |
266 | |||
267 | void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, | 257 | void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, |
268 | int add_pos); | 258 | int add_pos); |
269 | |||
270 | int dbg_check_lprops(struct ubifs_info *c); | 259 | int dbg_check_lprops(struct ubifs_info *c); |
271 | int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, | 260 | int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, |
272 | int row, int col); | 261 | int row, int col); |
@@ -329,71 +318,77 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, | |||
329 | #else /* !CONFIG_UBIFS_FS_DEBUG */ | 318 | #else /* !CONFIG_UBIFS_FS_DEBUG */ |
330 | 319 | ||
331 | #define UBIFS_DBG(op) | 320 | #define UBIFS_DBG(op) |
332 | #define ubifs_assert(expr) ({}) | 321 | |
333 | #define ubifs_assert_cmt_locked(c) | 322 | /* Use "if (0)" to make compiler check arguments even if debugging is off */ |
323 | #define ubifs_assert(expr) do { \ | ||
324 | if (0 && (expr)) \ | ||
325 | printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ | ||
326 | __func__, __LINE__, current->pid); \ | ||
327 | } while (0) | ||
328 | |||
329 | #define dbg_err(fmt, ...) do { \ | ||
330 | if (0) \ | ||
331 | ubifs_err(fmt, ##__VA_ARGS__); \ | ||
332 | } while (0) | ||
333 | |||
334 | #define dbg_msg(fmt, ...) do { \ | ||
335 | if (0) \ | ||
336 | printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ | ||
337 | current->pid, __func__, ##__VA_ARGS__); \ | ||
338 | } while (0) | ||
339 | |||
334 | #define dbg_dump_stack() | 340 | #define dbg_dump_stack() |
335 | #define dbg_err(fmt, ...) ({}) | 341 | #define ubifs_assert_cmt_locked(c) |
336 | #define dbg_msg(fmt, ...) ({}) | ||
337 | #define dbg_key(c, key, fmt, ...) ({}) | ||
338 | |||
339 | #define dbg_gen(fmt, ...) ({}) | ||
340 | #define dbg_jnl(fmt, ...) ({}) | ||
341 | #define dbg_tnc(fmt, ...) ({}) | ||
342 | #define dbg_lp(fmt, ...) ({}) | ||
343 | #define dbg_find(fmt, ...) ({}) | ||
344 | #define dbg_mnt(fmt, ...) ({}) | ||
345 | #define dbg_io(fmt, ...) ({}) | ||
346 | #define dbg_cmt(fmt, ...) ({}) | ||
347 | #define dbg_budg(fmt, ...) ({}) | ||
348 | #define dbg_log(fmt, ...) ({}) | ||
349 | #define dbg_gc(fmt, ...) ({}) | ||
350 | #define dbg_scan(fmt, ...) ({}) | ||
351 | #define dbg_rcvry(fmt, ...) ({}) | ||
352 | |||
353 | #define dbg_ntype(type) "" | ||
354 | #define dbg_cstate(cmt_state) "" | ||
355 | #define dbg_get_key_dump(c, key) ({}) | ||
356 | #define dbg_dump_inode(c, inode) ({}) | ||
357 | #define dbg_dump_node(c, node) ({}) | ||
358 | #define dbg_dump_budget_req(req) ({}) | ||
359 | #define dbg_dump_lstats(lst) ({}) | ||
360 | #define dbg_dump_budg(c) ({}) | ||
361 | #define dbg_dump_lprop(c, lp) ({}) | ||
362 | #define dbg_dump_lprops(c) ({}) | ||
363 | #define dbg_dump_leb(c, lnum) ({}) | ||
364 | #define dbg_dump_znode(c, znode) ({}) | ||
365 | #define dbg_dump_heap(c, heap, cat) ({}) | ||
366 | #define dbg_dump_pnode(c, pnode, parent, iip) ({}) | ||
367 | #define dbg_dump_tnc(c) ({}) | ||
368 | #define dbg_dump_index(c) ({}) | ||
369 | 342 | ||
370 | #define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 | 343 | #define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) |
344 | #define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | ||
345 | #define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | ||
346 | #define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | ||
347 | #define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | ||
348 | #define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | ||
349 | #define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | ||
350 | #define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | ||
351 | #define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | ||
352 | #define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | ||
353 | #define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | ||
354 | #define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | ||
355 | #define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | ||
356 | |||
357 | #define DBGKEY(key) ((char *)(key)) | ||
358 | #define DBGKEY1(key) ((char *)(key)) | ||
359 | |||
360 | #define dbg_ntype(type) "" | ||
361 | #define dbg_cstate(cmt_state) "" | ||
362 | #define dbg_get_key_dump(c, key) ({}) | ||
363 | #define dbg_dump_inode(c, inode) ({}) | ||
364 | #define dbg_dump_node(c, node) ({}) | ||
365 | #define dbg_dump_budget_req(req) ({}) | ||
366 | #define dbg_dump_lstats(lst) ({}) | ||
367 | #define dbg_dump_budg(c) ({}) | ||
368 | #define dbg_dump_lprop(c, lp) ({}) | ||
369 | #define dbg_dump_lprops(c) ({}) | ||
370 | #define dbg_dump_leb(c, lnum) ({}) | ||
371 | #define dbg_dump_znode(c, znode) ({}) | ||
372 | #define dbg_dump_heap(c, heap, cat) ({}) | ||
373 | #define dbg_dump_pnode(c, pnode, parent, iip) ({}) | ||
374 | #define dbg_dump_tnc(c) ({}) | ||
375 | #define dbg_dump_index(c) ({}) | ||
371 | 376 | ||
377 | #define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 | ||
372 | #define dbg_old_index_check_init(c, zroot) 0 | 378 | #define dbg_old_index_check_init(c, zroot) 0 |
373 | #define dbg_check_old_index(c, zroot) 0 | 379 | #define dbg_check_old_index(c, zroot) 0 |
374 | |||
375 | #define dbg_check_cats(c) 0 | 380 | #define dbg_check_cats(c) 0 |
376 | |||
377 | #define dbg_check_ltab(c) 0 | 381 | #define dbg_check_ltab(c) 0 |
378 | |||
379 | #define dbg_check_synced_i_size(inode) 0 | 382 | #define dbg_check_synced_i_size(inode) 0 |
380 | |||
381 | #define dbg_check_dir_size(c, dir) 0 | 383 | #define dbg_check_dir_size(c, dir) 0 |
382 | |||
383 | #define dbg_check_tnc(c, x) 0 | 384 | #define dbg_check_tnc(c, x) 0 |
384 | |||
385 | #define dbg_check_idx_size(c, idx_size) 0 | 385 | #define dbg_check_idx_size(c, idx_size) 0 |
386 | |||
387 | #define dbg_check_filesystem(c) 0 | 386 | #define dbg_check_filesystem(c) 0 |
388 | |||
389 | #define dbg_check_heap(c, heap, cat, add_pos) ({}) | 387 | #define dbg_check_heap(c, heap, cat, add_pos) ({}) |
390 | |||
391 | #define dbg_check_lprops(c) 0 | 388 | #define dbg_check_lprops(c) 0 |
392 | #define dbg_check_lpt_nodes(c, cnode, row, col) 0 | 389 | #define dbg_check_lpt_nodes(c, cnode, row, col) 0 |
393 | |||
394 | #define dbg_force_in_the_gaps_enabled 0 | 390 | #define dbg_force_in_the_gaps_enabled 0 |
395 | #define dbg_force_in_the_gaps() 0 | 391 | #define dbg_force_in_the_gaps() 0 |
396 | |||
397 | #define dbg_failure_mode 0 | 392 | #define dbg_failure_mode 0 |
398 | #define dbg_failure_mode_registration(c) ({}) | 393 | #define dbg_failure_mode_registration(c) ({}) |
399 | #define dbg_failure_mode_deregistration(c) ({}) | 394 | #define dbg_failure_mode_deregistration(c) ({}) |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index e90374be7d3b..526c01ec8003 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
@@ -165,7 +165,6 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, | |||
165 | } | 165 | } |
166 | 166 | ||
167 | inode->i_ino = ++c->highest_inum; | 167 | inode->i_ino = ++c->highest_inum; |
168 | inode->i_generation = ++c->vfs_gen; | ||
169 | /* | 168 | /* |
170 | * The creation sequence number remains with this inode for its | 169 | * The creation sequence number remains with this inode for its |
171 | * lifetime. All nodes for this inode have a greater sequence number, | 170 | * lifetime. All nodes for this inode have a greater sequence number, |
@@ -220,15 +219,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, | |||
220 | 219 | ||
221 | err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name); | 220 | err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name); |
222 | if (err) { | 221 | if (err) { |
223 | /* | 222 | if (err == -ENOENT) { |
224 | * Do not hash the direntry if parent 'i_nlink' is zero, because | ||
225 | * this has side-effects - '->delete_inode()' call will not be | ||
226 | * called for the parent orphan inode, because 'd_count' of its | ||
227 | * direntry will stay 1 (it'll be negative direntry I guess) | ||
228 | * and prevent 'iput_final()' until the dentry is destroyed due | ||
229 | * to unmount or memory pressure. | ||
230 | */ | ||
231 | if (err == -ENOENT && dir->i_nlink != 0) { | ||
232 | dbg_gen("not found"); | 223 | dbg_gen("not found"); |
233 | goto done; | 224 | goto done; |
234 | } | 225 | } |
@@ -435,7 +426,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
435 | 426 | ||
436 | while (1) { | 427 | while (1) { |
437 | dbg_gen("feed '%s', ino %llu, new f_pos %#x", | 428 | dbg_gen("feed '%s', ino %llu, new f_pos %#x", |
438 | dent->name, le64_to_cpu(dent->inum), | 429 | dent->name, (unsigned long long)le64_to_cpu(dent->inum), |
439 | key_hash_flash(c, &dent->key)); | 430 | key_hash_flash(c, &dent->key)); |
440 | ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); | 431 | ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); |
441 | 432 | ||
@@ -525,7 +516,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, | |||
525 | struct ubifs_inode *dir_ui = ubifs_inode(dir); | 516 | struct ubifs_inode *dir_ui = ubifs_inode(dir); |
526 | int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); | 517 | int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); |
527 | struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2, | 518 | struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2, |
528 | .dirtied_ino_d = ui->data_len }; | 519 | .dirtied_ino_d = ALIGN(ui->data_len, 8) }; |
529 | 520 | ||
530 | /* | 521 | /* |
531 | * Budget request settings: new direntry, changing the target inode, | 522 | * Budget request settings: new direntry, changing the target inode, |
@@ -596,7 +587,6 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) | |||
596 | if (err) { | 587 | if (err) { |
597 | if (err != -ENOSPC) | 588 | if (err != -ENOSPC) |
598 | return err; | 589 | return err; |
599 | err = 0; | ||
600 | budgeted = 0; | 590 | budgeted = 0; |
601 | } | 591 | } |
602 | 592 | ||
@@ -727,8 +717,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
727 | struct ubifs_inode *dir_ui = ubifs_inode(dir); | 717 | struct ubifs_inode *dir_ui = ubifs_inode(dir); |
728 | struct ubifs_info *c = dir->i_sb->s_fs_info; | 718 | struct ubifs_info *c = dir->i_sb->s_fs_info; |
729 | int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); | 719 | int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); |
730 | struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, | 720 | struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 }; |
731 | .dirtied_ino_d = 1 }; | ||
732 | 721 | ||
733 | /* | 722 | /* |
734 | * Budget request settings: new inode, new direntry and changing parent | 723 | * Budget request settings: new inode, new direntry and changing parent |
@@ -789,7 +778,8 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, | |||
789 | int sz_change = CALC_DENT_SIZE(dentry->d_name.len); | 778 | int sz_change = CALC_DENT_SIZE(dentry->d_name.len); |
790 | int err, devlen = 0; | 779 | int err, devlen = 0; |
791 | struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, | 780 | struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, |
792 | .new_ino_d = devlen, .dirtied_ino = 1 }; | 781 | .new_ino_d = ALIGN(devlen, 8), |
782 | .dirtied_ino = 1 }; | ||
793 | 783 | ||
794 | /* | 784 | /* |
795 | * Budget request settings: new inode, new direntry and changing parent | 785 | * Budget request settings: new inode, new direntry and changing parent |
@@ -863,7 +853,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, | |||
863 | int err, len = strlen(symname); | 853 | int err, len = strlen(symname); |
864 | int sz_change = CALC_DENT_SIZE(dentry->d_name.len); | 854 | int sz_change = CALC_DENT_SIZE(dentry->d_name.len); |
865 | struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, | 855 | struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, |
866 | .new_ino_d = len, .dirtied_ino = 1 }; | 856 | .new_ino_d = ALIGN(len, 8), |
857 | .dirtied_ino = 1 }; | ||
867 | 858 | ||
868 | /* | 859 | /* |
869 | * Budget request settings: new inode, new direntry and changing parent | 860 | * Budget request settings: new inode, new direntry and changing parent |
@@ -1012,7 +1003,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1012 | struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1, | 1003 | struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1, |
1013 | .dirtied_ino = 3 }; | 1004 | .dirtied_ino = 3 }; |
1014 | struct ubifs_budget_req ino_req = { .dirtied_ino = 1, | 1005 | struct ubifs_budget_req ino_req = { .dirtied_ino = 1, |
1015 | .dirtied_ino_d = old_inode_ui->data_len }; | 1006 | .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; |
1016 | struct timespec time; | 1007 | struct timespec time; |
1017 | 1008 | ||
1018 | /* | 1009 | /* |
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 8565e586e533..3d698e2022b1 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -793,7 +793,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
793 | int err; | 793 | int err; |
794 | struct ubifs_budget_req req; | 794 | struct ubifs_budget_req req; |
795 | loff_t old_size = inode->i_size, new_size = attr->ia_size; | 795 | loff_t old_size = inode->i_size, new_size = attr->ia_size; |
796 | int offset = new_size & (UBIFS_BLOCK_SIZE - 1); | 796 | int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1; |
797 | struct ubifs_inode *ui = ubifs_inode(inode); | 797 | struct ubifs_inode *ui = ubifs_inode(inode); |
798 | 798 | ||
799 | dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); | 799 | dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); |
@@ -811,8 +811,15 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
811 | /* A funny way to budget for truncation node */ | 811 | /* A funny way to budget for truncation node */ |
812 | req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; | 812 | req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; |
813 | err = ubifs_budget_space(c, &req); | 813 | err = ubifs_budget_space(c, &req); |
814 | if (err) | 814 | if (err) { |
815 | return err; | 815 | /* |
816 | * Treat truncations to zero as deletion and always allow them, | ||
817 | * just like we do for '->unlink()'. | ||
818 | */ | ||
819 | if (new_size || err != -ENOSPC) | ||
820 | return err; | ||
821 | budgeted = 0; | ||
822 | } | ||
816 | 823 | ||
817 | err = vmtruncate(inode, new_size); | 824 | err = vmtruncate(inode, new_size); |
818 | if (err) | 825 | if (err) |
@@ -869,7 +876,12 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
869 | err = ubifs_jnl_truncate(c, inode, old_size, new_size); | 876 | err = ubifs_jnl_truncate(c, inode, old_size, new_size); |
870 | mutex_unlock(&ui->ui_mutex); | 877 | mutex_unlock(&ui->ui_mutex); |
871 | out_budg: | 878 | out_budg: |
872 | ubifs_release_budget(c, &req); | 879 | if (budgeted) |
880 | ubifs_release_budget(c, &req); | ||
881 | else { | ||
882 | c->nospace = c->nospace_rp = 0; | ||
883 | smp_wmb(); | ||
884 | } | ||
873 | return err; | 885 | return err; |
874 | } | 886 | } |
875 | 887 | ||
@@ -890,7 +902,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, | |||
890 | loff_t new_size = attr->ia_size; | 902 | loff_t new_size = attr->ia_size; |
891 | struct ubifs_inode *ui = ubifs_inode(inode); | 903 | struct ubifs_inode *ui = ubifs_inode(inode); |
892 | struct ubifs_budget_req req = { .dirtied_ino = 1, | 904 | struct ubifs_budget_req req = { .dirtied_ino = 1, |
893 | .dirtied_ino_d = ui->data_len }; | 905 | .dirtied_ino_d = ALIGN(ui->data_len, 8) }; |
894 | 906 | ||
895 | err = ubifs_budget_space(c, &req); | 907 | err = ubifs_budget_space(c, &req); |
896 | if (err) | 908 | if (err) |
@@ -941,7 +953,8 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr) | |||
941 | struct inode *inode = dentry->d_inode; | 953 | struct inode *inode = dentry->d_inode; |
942 | struct ubifs_info *c = inode->i_sb->s_fs_info; | 954 | struct ubifs_info *c = inode->i_sb->s_fs_info; |
943 | 955 | ||
944 | dbg_gen("ino %lu, ia_valid %#x", inode->i_ino, attr->ia_valid); | 956 | dbg_gen("ino %lu, mode %#x, ia_valid %#x", |
957 | inode->i_ino, inode->i_mode, attr->ia_valid); | ||
945 | err = inode_change_ok(inode, attr); | 958 | err = inode_change_ok(inode, attr); |
946 | if (err) | 959 | if (err) |
947 | return err; | 960 | return err; |
@@ -1051,7 +1064,7 @@ static int update_mctime(struct ubifs_info *c, struct inode *inode) | |||
1051 | if (mctime_update_needed(inode, &now)) { | 1064 | if (mctime_update_needed(inode, &now)) { |
1052 | int err, release; | 1065 | int err, release; |
1053 | struct ubifs_budget_req req = { .dirtied_ino = 1, | 1066 | struct ubifs_budget_req req = { .dirtied_ino = 1, |
1054 | .dirtied_ino_d = ui->data_len }; | 1067 | .dirtied_ino_d = ALIGN(ui->data_len, 8) }; |
1055 | 1068 | ||
1056 | err = ubifs_budget_space(c, &req); | 1069 | err = ubifs_budget_space(c, &req); |
1057 | if (err) | 1070 | if (err) |
@@ -1270,6 +1283,7 @@ struct file_operations ubifs_file_operations = { | |||
1270 | .fsync = ubifs_fsync, | 1283 | .fsync = ubifs_fsync, |
1271 | .unlocked_ioctl = ubifs_ioctl, | 1284 | .unlocked_ioctl = ubifs_ioctl, |
1272 | .splice_read = generic_file_splice_read, | 1285 | .splice_read = generic_file_splice_read, |
1286 | .splice_write = generic_file_splice_write, | ||
1273 | #ifdef CONFIG_COMPAT | 1287 | #ifdef CONFIG_COMPAT |
1274 | .compat_ioctl = ubifs_compat_ioctl, | 1288 | .compat_ioctl = ubifs_compat_ioctl, |
1275 | #endif | 1289 | #endif |
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 10394c548367..47814cde2407 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c | |||
@@ -211,14 +211,8 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, | |||
211 | * dirty index heap, and it falls-back to LPT scanning if the heaps are empty | 211 | * dirty index heap, and it falls-back to LPT scanning if the heaps are empty |
212 | * or do not have an LEB which satisfies the @min_space criteria. | 212 | * or do not have an LEB which satisfies the @min_space criteria. |
213 | * | 213 | * |
214 | * Note: | 214 | * Note, LEBs which have less than dead watermark of free + dirty space are |
215 | * o LEBs which have less than dead watermark of dirty space are never picked | 215 | * never picked by this function. |
216 | * by this function; | ||
217 | * | ||
218 | * Returns zero and the LEB properties of | ||
219 | * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a | ||
220 | * negative error code in case of other failures. The returned LEB is marked as | ||
221 | * "taken". | ||
222 | * | 216 | * |
223 | * The additional @pick_free argument controls if this function has to return a | 217 | * The additional @pick_free argument controls if this function has to return a |
224 | * free or freeable LEB if one is present. For example, GC must to set it to %1, | 218 | * free or freeable LEB if one is present. For example, GC must to set it to %1, |
@@ -231,6 +225,10 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, | |||
231 | * | 225 | * |
232 | * In addition @pick_free is set to %2 by the recovery process in order to | 226 | * In addition @pick_free is set to %2 by the recovery process in order to |
233 | * recover gc_lnum in which case an index LEB must not be returned. | 227 | * recover gc_lnum in which case an index LEB must not be returned. |
228 | * | ||
229 | * This function returns zero and the LEB properties of found dirty LEB in case | ||
230 | * of success, %-ENOSPC if no dirty LEB was found and a negative error code in | ||
231 | * case of other failures. The returned LEB is marked as "taken". | ||
234 | */ | 232 | */ |
235 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | 233 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, |
236 | int min_space, int pick_free) | 234 | int min_space, int pick_free) |
@@ -245,7 +243,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
245 | int lebs, rsvd_idx_lebs = 0; | 243 | int lebs, rsvd_idx_lebs = 0; |
246 | 244 | ||
247 | spin_lock(&c->space_lock); | 245 | spin_lock(&c->space_lock); |
248 | lebs = c->lst.empty_lebs; | 246 | lebs = c->lst.empty_lebs + c->idx_gc_cnt; |
249 | lebs += c->freeable_cnt - c->lst.taken_empty_lebs; | 247 | lebs += c->freeable_cnt - c->lst.taken_empty_lebs; |
250 | 248 | ||
251 | /* | 249 | /* |
@@ -290,9 +288,14 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
290 | idx_lp = idx_heap->arr[0]; | 288 | idx_lp = idx_heap->arr[0]; |
291 | sum = idx_lp->free + idx_lp->dirty; | 289 | sum = idx_lp->free + idx_lp->dirty; |
292 | /* | 290 | /* |
293 | * Since we reserve twice as more space for the index than it | 291 | * Since we reserve thrice as much space for the index than it |
294 | * actually takes, it does not make sense to pick indexing LEBs | 292 | * actually takes, it does not make sense to pick indexing LEBs |
295 | * with less than half LEB of dirty space. | 293 | * with less than, say, half LEB of dirty space. May be half is |
294 | * not the optimal boundary - this should be tested and | ||
295 | * checked. This boundary should determine how much we use | ||
296 | * in-the-gaps to consolidate the index comparing to how much | ||
297 | * we use garbage collector to consolidate it. The "half" | ||
298 | * criteria just feels to be fine. | ||
296 | */ | 299 | */ |
297 | if (sum < min_space || sum < c->half_leb_size) | 300 | if (sum < min_space || sum < c->half_leb_size) |
298 | idx_lp = NULL; | 301 | idx_lp = NULL; |
@@ -312,7 +315,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
312 | lp = idx_lp; | 315 | lp = idx_lp; |
313 | 316 | ||
314 | if (lp) { | 317 | if (lp) { |
315 | ubifs_assert(lp->dirty >= c->dead_wm); | 318 | ubifs_assert(lp->free + lp->dirty >= c->dead_wm); |
316 | goto found; | 319 | goto found; |
317 | } | 320 | } |
318 | 321 | ||
@@ -504,7 +507,6 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, | |||
504 | rsvd_idx_lebs = 0; | 507 | rsvd_idx_lebs = 0; |
505 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | 508 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - |
506 | c->lst.taken_empty_lebs; | 509 | c->lst.taken_empty_lebs; |
507 | ubifs_assert(lebs + c->lst.idx_lebs >= c->min_idx_lebs); | ||
508 | if (rsvd_idx_lebs < lebs) | 510 | if (rsvd_idx_lebs < lebs) |
509 | /* | 511 | /* |
510 | * OK to allocate an empty LEB, but we still don't want to go | 512 | * OK to allocate an empty LEB, but we still don't want to go |
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index d0f3dac29081..02aba36fe3d4 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c | |||
@@ -334,15 +334,21 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) | |||
334 | 334 | ||
335 | err = move_nodes(c, sleb); | 335 | err = move_nodes(c, sleb); |
336 | if (err) | 336 | if (err) |
337 | goto out; | 337 | goto out_inc_seq; |
338 | 338 | ||
339 | err = gc_sync_wbufs(c); | 339 | err = gc_sync_wbufs(c); |
340 | if (err) | 340 | if (err) |
341 | goto out; | 341 | goto out_inc_seq; |
342 | 342 | ||
343 | err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); | 343 | err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); |
344 | if (err) | 344 | if (err) |
345 | goto out; | 345 | goto out_inc_seq; |
346 | |||
347 | /* Allow for races with TNC */ | ||
348 | c->gced_lnum = lnum; | ||
349 | smp_wmb(); | ||
350 | c->gc_seq += 1; | ||
351 | smp_wmb(); | ||
346 | 352 | ||
347 | if (c->gc_lnum == -1) { | 353 | if (c->gc_lnum == -1) { |
348 | c->gc_lnum = lnum; | 354 | c->gc_lnum = lnum; |
@@ -363,6 +369,14 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) | |||
363 | out: | 369 | out: |
364 | ubifs_scan_destroy(sleb); | 370 | ubifs_scan_destroy(sleb); |
365 | return err; | 371 | return err; |
372 | |||
373 | out_inc_seq: | ||
374 | /* We may have moved at least some nodes so allow for races with TNC */ | ||
375 | c->gced_lnum = lnum; | ||
376 | smp_wmb(); | ||
377 | c->gc_seq += 1; | ||
378 | smp_wmb(); | ||
379 | goto out; | ||
366 | } | 380 | } |
367 | 381 | ||
368 | /** | 382 | /** |
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 3374f91b6709..054363f2b207 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c | |||
@@ -54,6 +54,20 @@ | |||
54 | #include "ubifs.h" | 54 | #include "ubifs.h" |
55 | 55 | ||
56 | /** | 56 | /** |
57 | * ubifs_ro_mode - switch UBIFS to read read-only mode. | ||
58 | * @c: UBIFS file-system description object | ||
59 | * @err: error code which is the reason of switching to R/O mode | ||
60 | */ | ||
61 | void ubifs_ro_mode(struct ubifs_info *c, int err) | ||
62 | { | ||
63 | if (!c->ro_media) { | ||
64 | c->ro_media = 1; | ||
65 | ubifs_warn("switched to read-only mode, error %d", err); | ||
66 | dbg_dump_stack(); | ||
67 | } | ||
68 | } | ||
69 | |||
70 | /** | ||
57 | * ubifs_check_node - check node. | 71 | * ubifs_check_node - check node. |
58 | * @c: UBIFS file-system description object | 72 | * @c: UBIFS file-system description object |
59 | * @buf: node to check | 73 | * @buf: node to check |
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 283155abe5f5..22993f867d19 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c | |||
@@ -447,13 +447,11 @@ static int get_dent_type(int mode) | |||
447 | * @ino: buffer in which to pack inode node | 447 | * @ino: buffer in which to pack inode node |
448 | * @inode: inode to pack | 448 | * @inode: inode to pack |
449 | * @last: indicates the last node of the group | 449 | * @last: indicates the last node of the group |
450 | * @last_reference: non-zero if this is a deletion inode | ||
451 | */ | 450 | */ |
452 | static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino, | 451 | static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino, |
453 | const struct inode *inode, int last, | 452 | const struct inode *inode, int last) |
454 | int last_reference) | ||
455 | { | 453 | { |
456 | int data_len = 0; | 454 | int data_len = 0, last_reference = !inode->i_nlink; |
457 | struct ubifs_inode *ui = ubifs_inode(inode); | 455 | struct ubifs_inode *ui = ubifs_inode(inode); |
458 | 456 | ||
459 | ino->ch.node_type = UBIFS_INO_NODE; | 457 | ino->ch.node_type = UBIFS_INO_NODE; |
@@ -596,9 +594,9 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, | |||
596 | ubifs_prep_grp_node(c, dent, dlen, 0); | 594 | ubifs_prep_grp_node(c, dent, dlen, 0); |
597 | 595 | ||
598 | ino = (void *)dent + aligned_dlen; | 596 | ino = (void *)dent + aligned_dlen; |
599 | pack_inode(c, ino, inode, 0, last_reference); | 597 | pack_inode(c, ino, inode, 0); |
600 | ino = (void *)ino + aligned_ilen; | 598 | ino = (void *)ino + aligned_ilen; |
601 | pack_inode(c, ino, dir, 1, 0); | 599 | pack_inode(c, ino, dir, 1); |
602 | 600 | ||
603 | if (last_reference) { | 601 | if (last_reference) { |
604 | err = ubifs_add_orphan(c, inode->i_ino); | 602 | err = ubifs_add_orphan(c, inode->i_ino); |
@@ -606,6 +604,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, | |||
606 | release_head(c, BASEHD); | 604 | release_head(c, BASEHD); |
607 | goto out_finish; | 605 | goto out_finish; |
608 | } | 606 | } |
607 | ui->del_cmtno = c->cmt_no; | ||
609 | } | 608 | } |
610 | 609 | ||
611 | err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync); | 610 | err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync); |
@@ -750,30 +749,25 @@ out_free: | |||
750 | * ubifs_jnl_write_inode - flush inode to the journal. | 749 | * ubifs_jnl_write_inode - flush inode to the journal. |
751 | * @c: UBIFS file-system description object | 750 | * @c: UBIFS file-system description object |
752 | * @inode: inode to flush | 751 | * @inode: inode to flush |
753 | * @deletion: inode has been deleted | ||
754 | * | 752 | * |
755 | * This function writes inode @inode to the journal. If the inode is | 753 | * This function writes inode @inode to the journal. If the inode is |
756 | * synchronous, it also synchronizes the write-buffer. Returns zero in case of | 754 | * synchronous, it also synchronizes the write-buffer. Returns zero in case of |
757 | * success and a negative error code in case of failure. | 755 | * success and a negative error code in case of failure. |
758 | */ | 756 | */ |
759 | int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, | 757 | int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) |
760 | int deletion) | ||
761 | { | 758 | { |
762 | int err, len, lnum, offs, sync = 0; | 759 | int err, lnum, offs; |
763 | struct ubifs_ino_node *ino; | 760 | struct ubifs_ino_node *ino; |
764 | struct ubifs_inode *ui = ubifs_inode(inode); | 761 | struct ubifs_inode *ui = ubifs_inode(inode); |
762 | int sync = 0, len = UBIFS_INO_NODE_SZ, last_reference = !inode->i_nlink; | ||
765 | 763 | ||
766 | dbg_jnl("ino %lu%s", inode->i_ino, | 764 | dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink); |
767 | deletion ? " (last reference)" : ""); | ||
768 | if (deletion) | ||
769 | ubifs_assert(inode->i_nlink == 0); | ||
770 | 765 | ||
771 | len = UBIFS_INO_NODE_SZ; | ||
772 | /* | 766 | /* |
773 | * If the inode is being deleted, do not write the attached data. No | 767 | * If the inode is being deleted, do not write the attached data. No |
774 | * need to synchronize the write-buffer either. | 768 | * need to synchronize the write-buffer either. |
775 | */ | 769 | */ |
776 | if (!deletion) { | 770 | if (!last_reference) { |
777 | len += ui->data_len; | 771 | len += ui->data_len; |
778 | sync = IS_SYNC(inode); | 772 | sync = IS_SYNC(inode); |
779 | } | 773 | } |
@@ -786,7 +780,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, | |||
786 | if (err) | 780 | if (err) |
787 | goto out_free; | 781 | goto out_free; |
788 | 782 | ||
789 | pack_inode(c, ino, inode, 1, deletion); | 783 | pack_inode(c, ino, inode, 1); |
790 | err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync); | 784 | err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync); |
791 | if (err) | 785 | if (err) |
792 | goto out_release; | 786 | goto out_release; |
@@ -795,7 +789,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, | |||
795 | inode->i_ino); | 789 | inode->i_ino); |
796 | release_head(c, BASEHD); | 790 | release_head(c, BASEHD); |
797 | 791 | ||
798 | if (deletion) { | 792 | if (last_reference) { |
799 | err = ubifs_tnc_remove_ino(c, inode->i_ino); | 793 | err = ubifs_tnc_remove_ino(c, inode->i_ino); |
800 | if (err) | 794 | if (err) |
801 | goto out_ro; | 795 | goto out_ro; |
@@ -828,6 +822,65 @@ out_free: | |||
828 | } | 822 | } |
829 | 823 | ||
830 | /** | 824 | /** |
825 | * ubifs_jnl_delete_inode - delete an inode. | ||
826 | * @c: UBIFS file-system description object | ||
827 | * @inode: inode to delete | ||
828 | * | ||
829 | * This function deletes inode @inode which includes removing it from orphans, | ||
830 | * deleting it from TNC and, in some cases, writing a deletion inode to the | ||
831 | * journal. | ||
832 | * | ||
833 | * When regular file inodes are unlinked or a directory inode is removed, the | ||
834 | * 'ubifs_jnl_update()' function writes a corresponding deletion inode and | ||
835 | * direntry to the media, and adds the inode to orphans. After this, when the | ||
836 | * last reference to this inode has been dropped, this function is called. In | ||
837 | * general, it has to write one more deletion inode to the media, because if | ||
838 | * a commit happened between 'ubifs_jnl_update()' and | ||
839 | * 'ubifs_jnl_delete_inode()', the deletion inode is not in the journal | ||
840 | * anymore, and in fact it might not be on the flash anymore, because it might | ||
841 | * have been garbage-collected already. And for optimization reasons UBIFS does | ||
842 | * not read the orphan area if it has been unmounted cleanly, so it would have | ||
843 | * no indication in the journal that there is a deleted inode which has to be | ||
844 | * removed from TNC. | ||
845 | * | ||
846 | * However, if there was no commit between 'ubifs_jnl_update()' and | ||
847 | * 'ubifs_jnl_delete_inode()', then there is no need to write the deletion | ||
848 | * inode to the media for the second time. And this is quite a typical case. | ||
849 | * | ||
850 | * This function returns zero in case of success and a negative error code in | ||
851 | * case of failure. | ||
852 | */ | ||
853 | int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode) | ||
854 | { | ||
855 | int err; | ||
856 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
857 | |||
858 | ubifs_assert(inode->i_nlink == 0); | ||
859 | |||
860 | if (ui->del_cmtno != c->cmt_no) | ||
861 | /* A commit happened for sure */ | ||
862 | return ubifs_jnl_write_inode(c, inode); | ||
863 | |||
864 | down_read(&c->commit_sem); | ||
865 | /* | ||
866 | * Check commit number again, because the first test has been done | ||
867 | * without @c->commit_sem, so a commit might have happened. | ||
868 | */ | ||
869 | if (ui->del_cmtno != c->cmt_no) { | ||
870 | up_read(&c->commit_sem); | ||
871 | return ubifs_jnl_write_inode(c, inode); | ||
872 | } | ||
873 | |||
874 | err = ubifs_tnc_remove_ino(c, inode->i_ino); | ||
875 | if (err) | ||
876 | ubifs_ro_mode(c, err); | ||
877 | else | ||
878 | ubifs_delete_orphan(c, inode->i_ino); | ||
879 | up_read(&c->commit_sem); | ||
880 | return err; | ||
881 | } | ||
882 | |||
883 | /** | ||
831 | * ubifs_jnl_rename - rename a directory entry. | 884 | * ubifs_jnl_rename - rename a directory entry. |
832 | * @c: UBIFS file-system description object | 885 | * @c: UBIFS file-system description object |
833 | * @old_dir: parent inode of directory entry to rename | 886 | * @old_dir: parent inode of directory entry to rename |
@@ -917,16 +970,16 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, | |||
917 | 970 | ||
918 | p = (void *)dent2 + aligned_dlen2; | 971 | p = (void *)dent2 + aligned_dlen2; |
919 | if (new_inode) { | 972 | if (new_inode) { |
920 | pack_inode(c, p, new_inode, 0, last_reference); | 973 | pack_inode(c, p, new_inode, 0); |
921 | p += ALIGN(ilen, 8); | 974 | p += ALIGN(ilen, 8); |
922 | } | 975 | } |
923 | 976 | ||
924 | if (!move) | 977 | if (!move) |
925 | pack_inode(c, p, old_dir, 1, 0); | 978 | pack_inode(c, p, old_dir, 1); |
926 | else { | 979 | else { |
927 | pack_inode(c, p, old_dir, 0, 0); | 980 | pack_inode(c, p, old_dir, 0); |
928 | p += ALIGN(plen, 8); | 981 | p += ALIGN(plen, 8); |
929 | pack_inode(c, p, new_dir, 1, 0); | 982 | pack_inode(c, p, new_dir, 1); |
930 | } | 983 | } |
931 | 984 | ||
932 | if (last_reference) { | 985 | if (last_reference) { |
@@ -935,6 +988,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, | |||
935 | release_head(c, BASEHD); | 988 | release_head(c, BASEHD); |
936 | goto out_finish; | 989 | goto out_finish; |
937 | } | 990 | } |
991 | new_ui->del_cmtno = c->cmt_no; | ||
938 | } | 992 | } |
939 | 993 | ||
940 | err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync); | 994 | err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync); |
@@ -1131,7 +1185,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, | |||
1131 | if (err) | 1185 | if (err) |
1132 | goto out_free; | 1186 | goto out_free; |
1133 | 1187 | ||
1134 | pack_inode(c, ino, inode, 0, 0); | 1188 | pack_inode(c, ino, inode, 0); |
1135 | ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1); | 1189 | ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1); |
1136 | if (dlen) | 1190 | if (dlen) |
1137 | ubifs_prep_grp_node(c, dn, dlen, 1); | 1191 | ubifs_prep_grp_node(c, dn, dlen, 1); |
@@ -1251,9 +1305,9 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, | |||
1251 | ubifs_prep_grp_node(c, xent, xlen, 0); | 1305 | ubifs_prep_grp_node(c, xent, xlen, 0); |
1252 | 1306 | ||
1253 | ino = (void *)xent + aligned_xlen; | 1307 | ino = (void *)xent + aligned_xlen; |
1254 | pack_inode(c, ino, inode, 0, 1); | 1308 | pack_inode(c, ino, inode, 0); |
1255 | ino = (void *)ino + UBIFS_INO_NODE_SZ; | 1309 | ino = (void *)ino + UBIFS_INO_NODE_SZ; |
1256 | pack_inode(c, ino, host, 1, 0); | 1310 | pack_inode(c, ino, host, 1); |
1257 | 1311 | ||
1258 | err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync); | 1312 | err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync); |
1259 | if (!sync && !err) | 1313 | if (!sync && !err) |
@@ -1320,7 +1374,7 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode, | |||
1320 | const struct inode *host) | 1374 | const struct inode *host) |
1321 | { | 1375 | { |
1322 | int err, len1, len2, aligned_len, aligned_len1, lnum, offs; | 1376 | int err, len1, len2, aligned_len, aligned_len1, lnum, offs; |
1323 | struct ubifs_inode *host_ui = ubifs_inode(inode); | 1377 | struct ubifs_inode *host_ui = ubifs_inode(host); |
1324 | struct ubifs_ino_node *ino; | 1378 | struct ubifs_ino_node *ino; |
1325 | union ubifs_key key; | 1379 | union ubifs_key key; |
1326 | int sync = IS_DIRSYNC(host); | 1380 | int sync = IS_DIRSYNC(host); |
@@ -1344,8 +1398,8 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode, | |||
1344 | if (err) | 1398 | if (err) |
1345 | goto out_free; | 1399 | goto out_free; |
1346 | 1400 | ||
1347 | pack_inode(c, ino, host, 0, 0); | 1401 | pack_inode(c, ino, host, 0); |
1348 | pack_inode(c, (void *)ino + aligned_len1, inode, 1, 0); | 1402 | pack_inode(c, (void *)ino + aligned_len1, inode, 1); |
1349 | 1403 | ||
1350 | err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0); | 1404 | err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0); |
1351 | if (!sync && !err) { | 1405 | if (!sync && !err) { |
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 36857b9ed59e..3e0aa7367556 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c | |||
@@ -317,6 +317,8 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) | |||
317 | return 0; | 317 | return 0; |
318 | 318 | ||
319 | out_unlock: | 319 | out_unlock: |
320 | if (err != -EAGAIN) | ||
321 | ubifs_ro_mode(c, err); | ||
320 | mutex_unlock(&c->log_mutex); | 322 | mutex_unlock(&c->log_mutex); |
321 | kfree(ref); | 323 | kfree(ref); |
322 | kfree(bud); | 324 | kfree(bud); |
@@ -410,7 +412,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) | |||
410 | return -ENOMEM; | 412 | return -ENOMEM; |
411 | 413 | ||
412 | cs->ch.node_type = UBIFS_CS_NODE; | 414 | cs->ch.node_type = UBIFS_CS_NODE; |
413 | cs->cmt_no = cpu_to_le64(c->cmt_no + 1); | 415 | cs->cmt_no = cpu_to_le64(c->cmt_no); |
414 | ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0); | 416 | ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0); |
415 | 417 | ||
416 | /* | 418 | /* |
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 4beccfc256d2..4c12a9215d7f 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h | |||
@@ -80,20 +80,6 @@ static inline struct ubifs_inode *ubifs_inode(const struct inode *inode) | |||
80 | } | 80 | } |
81 | 81 | ||
82 | /** | 82 | /** |
83 | * ubifs_ro_mode - switch UBIFS to read read-only mode. | ||
84 | * @c: UBIFS file-system description object | ||
85 | * @err: error code which is the reason of switching to R/O mode | ||
86 | */ | ||
87 | static inline void ubifs_ro_mode(struct ubifs_info *c, int err) | ||
88 | { | ||
89 | if (!c->ro_media) { | ||
90 | c->ro_media = 1; | ||
91 | ubifs_warn("switched to read-only mode, error %d", err); | ||
92 | dbg_dump_stack(); | ||
93 | } | ||
94 | } | ||
95 | |||
96 | /** | ||
97 | * ubifs_compr_present - check if compressor was compiled in. | 83 | * ubifs_compr_present - check if compressor was compiled in. |
98 | * @compr_type: compressor type to check | 84 | * @compr_type: compressor type to check |
99 | * | 85 | * |
@@ -298,38 +284,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c, | |||
298 | } | 284 | } |
299 | 285 | ||
300 | /** | 286 | /** |
301 | * ubifs_reported_space - calculate reported free space. | ||
302 | * @c: the UBIFS file-system description object | ||
303 | * @free: amount of free space | ||
304 | * | ||
305 | * This function calculates amount of free space which will be reported to | ||
306 | * user-space. User-space application tend to expect that if the file-system | ||
307 | * (e.g., via the 'statfs()' call) reports that it has N bytes available, they | ||
308 | * are able to write a file of size N. UBIFS attaches node headers to each data | ||
309 | * node and it has to write indexind nodes as well. This introduces additional | ||
310 | * overhead, and UBIFS it has to report sligtly less free space to meet the | ||
311 | * above expectetion. | ||
312 | * | ||
313 | * This function assumes free space is made up of uncompressed data nodes and | ||
314 | * full index nodes (one per data node, doubled because we always allow enough | ||
315 | * space to write the index twice). | ||
316 | * | ||
317 | * Note, the calculation is pessimistic, which means that most of the time | ||
318 | * UBIFS reports less space than it actually has. | ||
319 | */ | ||
320 | static inline long long ubifs_reported_space(const struct ubifs_info *c, | ||
321 | uint64_t free) | ||
322 | { | ||
323 | int divisor, factor; | ||
324 | |||
325 | divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz << 1); | ||
326 | factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ; | ||
327 | do_div(free, divisor); | ||
328 | |||
329 | return free * factor; | ||
330 | } | ||
331 | |||
332 | /** | ||
333 | * ubifs_current_time - round current time to time granularity. | 287 | * ubifs_current_time - round current time to time granularity. |
334 | * @inode: inode | 288 | * @inode: inode |
335 | */ | 289 | */ |
@@ -339,4 +293,21 @@ static inline struct timespec ubifs_current_time(struct inode *inode) | |||
339 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; | 293 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; |
340 | } | 294 | } |
341 | 295 | ||
296 | /** | ||
297 | * ubifs_tnc_lookup - look up a file-system node. | ||
298 | * @c: UBIFS file-system description object | ||
299 | * @key: node key to lookup | ||
300 | * @node: the node is returned here | ||
301 | * | ||
302 | * This function look up and reads node with key @key. The caller has to make | ||
303 | * sure the @node buffer is large enough to fit the node. Returns zero in case | ||
304 | * of success, %-ENOENT if the node was not found, and a negative error code in | ||
305 | * case of failure. | ||
306 | */ | ||
307 | static inline int ubifs_tnc_lookup(struct ubifs_info *c, | ||
308 | const union ubifs_key *key, void *node) | ||
309 | { | ||
310 | return ubifs_tnc_locate(c, key, node, NULL, NULL); | ||
311 | } | ||
312 | |||
342 | #endif /* __UBIFS_MISC_H__ */ | 313 | #endif /* __UBIFS_MISC_H__ */ |
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 3afeb9242c6a..02d3462f4d3e 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c | |||
@@ -310,10 +310,10 @@ static int write_orph_node(struct ubifs_info *c, int atomic) | |||
310 | c->cmt_orphans -= cnt; | 310 | c->cmt_orphans -= cnt; |
311 | spin_unlock(&c->orphan_lock); | 311 | spin_unlock(&c->orphan_lock); |
312 | if (c->cmt_orphans) | 312 | if (c->cmt_orphans) |
313 | orph->cmt_no = cpu_to_le64(c->cmt_no + 1); | 313 | orph->cmt_no = cpu_to_le64(c->cmt_no); |
314 | else | 314 | else |
315 | /* Mark the last node of the commit */ | 315 | /* Mark the last node of the commit */ |
316 | orph->cmt_no = cpu_to_le64((c->cmt_no + 1) | (1ULL << 63)); | 316 | orph->cmt_no = cpu_to_le64((c->cmt_no) | (1ULL << 63)); |
317 | ubifs_assert(c->ohead_offs + len <= c->leb_size); | 317 | ubifs_assert(c->ohead_offs + len <= c->leb_size); |
318 | ubifs_assert(c->ohead_lnum >= c->orph_first); | 318 | ubifs_assert(c->ohead_lnum >= c->orph_first); |
319 | ubifs_assert(c->ohead_lnum <= c->orph_last); | 319 | ubifs_assert(c->ohead_lnum <= c->orph_last); |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index ca1e2d4e03cc..9a9220333b3b 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/module.h> | 31 | #include <linux/module.h> |
32 | #include <linux/ctype.h> | 32 | #include <linux/ctype.h> |
33 | #include <linux/random.h> | ||
34 | #include <linux/kthread.h> | 33 | #include <linux/kthread.h> |
35 | #include <linux/parser.h> | 34 | #include <linux/parser.h> |
36 | #include <linux/seq_file.h> | 35 | #include <linux/seq_file.h> |
@@ -149,7 +148,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) | |||
149 | if (err) | 148 | if (err) |
150 | goto out_invalid; | 149 | goto out_invalid; |
151 | 150 | ||
152 | /* Disable readahead */ | 151 | /* Disable read-ahead */ |
153 | inode->i_mapping->backing_dev_info = &c->bdi; | 152 | inode->i_mapping->backing_dev_info = &c->bdi; |
154 | 153 | ||
155 | switch (inode->i_mode & S_IFMT) { | 154 | switch (inode->i_mode & S_IFMT) { |
@@ -278,7 +277,7 @@ static void ubifs_destroy_inode(struct inode *inode) | |||
278 | */ | 277 | */ |
279 | static int ubifs_write_inode(struct inode *inode, int wait) | 278 | static int ubifs_write_inode(struct inode *inode, int wait) |
280 | { | 279 | { |
281 | int err; | 280 | int err = 0; |
282 | struct ubifs_info *c = inode->i_sb->s_fs_info; | 281 | struct ubifs_info *c = inode->i_sb->s_fs_info; |
283 | struct ubifs_inode *ui = ubifs_inode(inode); | 282 | struct ubifs_inode *ui = ubifs_inode(inode); |
284 | 283 | ||
@@ -299,10 +298,18 @@ static int ubifs_write_inode(struct inode *inode, int wait) | |||
299 | return 0; | 298 | return 0; |
300 | } | 299 | } |
301 | 300 | ||
302 | dbg_gen("inode %lu", inode->i_ino); | 301 | /* |
303 | err = ubifs_jnl_write_inode(c, inode, 0); | 302 | * As an optimization, do not write orphan inodes to the media just |
304 | if (err) | 303 | * because this is not needed. |
305 | ubifs_err("can't write inode %lu, error %d", inode->i_ino, err); | 304 | */ |
305 | dbg_gen("inode %lu, mode %#x, nlink %u", | ||
306 | inode->i_ino, (int)inode->i_mode, inode->i_nlink); | ||
307 | if (inode->i_nlink) { | ||
308 | err = ubifs_jnl_write_inode(c, inode); | ||
309 | if (err) | ||
310 | ubifs_err("can't write inode %lu, error %d", | ||
311 | inode->i_ino, err); | ||
312 | } | ||
306 | 313 | ||
307 | ui->dirty = 0; | 314 | ui->dirty = 0; |
308 | mutex_unlock(&ui->ui_mutex); | 315 | mutex_unlock(&ui->ui_mutex); |
@@ -314,8 +321,9 @@ static void ubifs_delete_inode(struct inode *inode) | |||
314 | { | 321 | { |
315 | int err; | 322 | int err; |
316 | struct ubifs_info *c = inode->i_sb->s_fs_info; | 323 | struct ubifs_info *c = inode->i_sb->s_fs_info; |
324 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
317 | 325 | ||
318 | if (ubifs_inode(inode)->xattr) | 326 | if (ui->xattr) |
319 | /* | 327 | /* |
320 | * Extended attribute inode deletions are fully handled in | 328 | * Extended attribute inode deletions are fully handled in |
321 | * 'ubifs_removexattr()'. These inodes are special and have | 329 | * 'ubifs_removexattr()'. These inodes are special and have |
@@ -323,7 +331,7 @@ static void ubifs_delete_inode(struct inode *inode) | |||
323 | */ | 331 | */ |
324 | goto out; | 332 | goto out; |
325 | 333 | ||
326 | dbg_gen("inode %lu", inode->i_ino); | 334 | dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); |
327 | ubifs_assert(!atomic_read(&inode->i_count)); | 335 | ubifs_assert(!atomic_read(&inode->i_count)); |
328 | ubifs_assert(inode->i_nlink == 0); | 336 | ubifs_assert(inode->i_nlink == 0); |
329 | 337 | ||
@@ -331,15 +339,19 @@ static void ubifs_delete_inode(struct inode *inode) | |||
331 | if (is_bad_inode(inode)) | 339 | if (is_bad_inode(inode)) |
332 | goto out; | 340 | goto out; |
333 | 341 | ||
334 | ubifs_inode(inode)->ui_size = inode->i_size = 0; | 342 | ui->ui_size = inode->i_size = 0; |
335 | err = ubifs_jnl_write_inode(c, inode, 1); | 343 | err = ubifs_jnl_delete_inode(c, inode); |
336 | if (err) | 344 | if (err) |
337 | /* | 345 | /* |
338 | * Worst case we have a lost orphan inode wasting space, so a | 346 | * Worst case we have a lost orphan inode wasting space, so a |
339 | * simple error message is ok here. | 347 | * simple error message is OK here. |
340 | */ | 348 | */ |
341 | ubifs_err("can't write inode %lu, error %d", inode->i_ino, err); | 349 | ubifs_err("can't delete inode %lu, error %d", |
350 | inode->i_ino, err); | ||
351 | |||
342 | out: | 352 | out: |
353 | if (ui->dirty) | ||
354 | ubifs_release_dirty_inode_budget(c, ui); | ||
343 | clear_inode(inode); | 355 | clear_inode(inode); |
344 | } | 356 | } |
345 | 357 | ||
@@ -358,8 +370,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
358 | { | 370 | { |
359 | struct ubifs_info *c = dentry->d_sb->s_fs_info; | 371 | struct ubifs_info *c = dentry->d_sb->s_fs_info; |
360 | unsigned long long free; | 372 | unsigned long long free; |
373 | __le32 *uuid = (__le32 *)c->uuid; | ||
361 | 374 | ||
362 | free = ubifs_budg_get_free_space(c); | 375 | free = ubifs_get_free_space(c); |
363 | dbg_gen("free space %lld bytes (%lld blocks)", | 376 | dbg_gen("free space %lld bytes (%lld blocks)", |
364 | free, free >> UBIFS_BLOCK_SHIFT); | 377 | free, free >> UBIFS_BLOCK_SHIFT); |
365 | 378 | ||
@@ -374,7 +387,8 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
374 | buf->f_files = 0; | 387 | buf->f_files = 0; |
375 | buf->f_ffree = 0; | 388 | buf->f_ffree = 0; |
376 | buf->f_namelen = UBIFS_MAX_NLEN; | 389 | buf->f_namelen = UBIFS_MAX_NLEN; |
377 | 390 | buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]); | |
391 | buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]); | ||
378 | return 0; | 392 | return 0; |
379 | } | 393 | } |
380 | 394 | ||
@@ -518,6 +532,12 @@ static int init_constants_early(struct ubifs_info *c) | |||
518 | c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); | 532 | c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); |
519 | c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); | 533 | c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); |
520 | 534 | ||
535 | /* | ||
536 | * Calculate how many bytes would be wasted at the end of LEB if it was | ||
537 | * fully filled with data nodes of maximum size. This is used in | ||
538 | * calculations when reporting free space. | ||
539 | */ | ||
540 | c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; | ||
521 | return 0; | 541 | return 0; |
522 | } | 542 | } |
523 | 543 | ||
@@ -635,13 +655,11 @@ static int init_constants_late(struct ubifs_info *c) | |||
635 | * internally because it does not make much sense for UBIFS, but it is | 655 | * internally because it does not make much sense for UBIFS, but it is |
636 | * necessary to report something for the 'statfs()' call. | 656 | * necessary to report something for the 'statfs()' call. |
637 | * | 657 | * |
638 | * Subtract the LEB reserved for GC and the LEB which is reserved for | 658 | * Subtract the LEB reserved for GC, the LEB which is reserved for |
639 | * deletions. | 659 | * deletions, and assume only one journal head is available. |
640 | * | ||
641 | * Review 'ubifs_calc_available()' if changing this calculation. | ||
642 | */ | 660 | */ |
643 | tmp64 = c->main_lebs - 2; | 661 | tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1; |
644 | tmp64 *= (uint64_t)c->leb_size - c->dark_wm; | 662 | tmp64 *= (uint64_t)c->leb_size - c->leb_overhead; |
645 | tmp64 = ubifs_reported_space(c, tmp64); | 663 | tmp64 = ubifs_reported_space(c, tmp64); |
646 | c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; | 664 | c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; |
647 | 665 | ||
@@ -830,7 +848,7 @@ enum { | |||
830 | Opt_err, | 848 | Opt_err, |
831 | }; | 849 | }; |
832 | 850 | ||
833 | static match_table_t tokens = { | 851 | static const match_table_t tokens = { |
834 | {Opt_fast_unmount, "fast_unmount"}, | 852 | {Opt_fast_unmount, "fast_unmount"}, |
835 | {Opt_norm_unmount, "norm_unmount"}, | 853 | {Opt_norm_unmount, "norm_unmount"}, |
836 | {Opt_err, NULL}, | 854 | {Opt_err, NULL}, |
@@ -1006,14 +1024,13 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1006 | goto out_dereg; | 1024 | goto out_dereg; |
1007 | } | 1025 | } |
1008 | 1026 | ||
1027 | sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); | ||
1009 | if (!mounted_read_only) { | 1028 | if (!mounted_read_only) { |
1010 | err = alloc_wbufs(c); | 1029 | err = alloc_wbufs(c); |
1011 | if (err) | 1030 | if (err) |
1012 | goto out_cbuf; | 1031 | goto out_cbuf; |
1013 | 1032 | ||
1014 | /* Create background thread */ | 1033 | /* Create background thread */ |
1015 | sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, | ||
1016 | c->vi.vol_id); | ||
1017 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); | 1034 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); |
1018 | if (!c->bgt) | 1035 | if (!c->bgt) |
1019 | c->bgt = ERR_PTR(-EINVAL); | 1036 | c->bgt = ERR_PTR(-EINVAL); |
@@ -1122,8 +1139,8 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1122 | if (err) | 1139 | if (err) |
1123 | goto out_infos; | 1140 | goto out_infos; |
1124 | 1141 | ||
1125 | ubifs_msg("mounted UBI device %d, volume %d", c->vi.ubi_num, | 1142 | ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", |
1126 | c->vi.vol_id); | 1143 | c->vi.ubi_num, c->vi.vol_id, c->vi.name); |
1127 | if (mounted_read_only) | 1144 | if (mounted_read_only) |
1128 | ubifs_msg("mounted read-only"); | 1145 | ubifs_msg("mounted read-only"); |
1129 | x = (long long)c->main_lebs * c->leb_size; | 1146 | x = (long long)c->main_lebs * c->leb_size; |
@@ -1469,6 +1486,7 @@ static void ubifs_put_super(struct super_block *sb) | |||
1469 | */ | 1486 | */ |
1470 | ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); | 1487 | ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); |
1471 | ubifs_assert(c->budg_idx_growth == 0); | 1488 | ubifs_assert(c->budg_idx_growth == 0); |
1489 | ubifs_assert(c->budg_dd_growth == 0); | ||
1472 | ubifs_assert(c->budg_data_growth == 0); | 1490 | ubifs_assert(c->budg_data_growth == 0); |
1473 | 1491 | ||
1474 | /* | 1492 | /* |
@@ -1657,7 +1675,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) | |||
1657 | INIT_LIST_HEAD(&c->orph_new); | 1675 | INIT_LIST_HEAD(&c->orph_new); |
1658 | 1676 | ||
1659 | c->highest_inum = UBIFS_FIRST_INO; | 1677 | c->highest_inum = UBIFS_FIRST_INO; |
1660 | get_random_bytes(&c->vfs_gen, sizeof(int)); | ||
1661 | c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; | 1678 | c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; |
1662 | 1679 | ||
1663 | ubi_get_volume_info(ubi, &c->vi); | 1680 | ubi_get_volume_info(ubi, &c->vi); |
@@ -1671,10 +1688,10 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) | |||
1671 | } | 1688 | } |
1672 | 1689 | ||
1673 | /* | 1690 | /* |
1674 | * UBIFS provids 'backing_dev_info' in order to disable readahead. For | 1691 | * UBIFS provides 'backing_dev_info' in order to disable read-ahead. For |
1675 | * UBIFS, I/O is not deferred, it is done immediately in readpage, | 1692 | * UBIFS, I/O is not deferred, it is done immediately in readpage, |
1676 | * which means the user would have to wait not just for their own I/O | 1693 | * which means the user would have to wait not just for their own I/O |
1677 | * but the readahead I/O as well i.e. completely pointless. | 1694 | * but the read-ahead I/O as well i.e. completely pointless. |
1678 | * | 1695 | * |
1679 | * Read-ahead will be disabled because @c->bdi.ra_pages is 0. | 1696 | * Read-ahead will be disabled because @c->bdi.ra_pages is 0. |
1680 | */ | 1697 | */ |
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index e909f4a96443..7634c5970887 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
@@ -506,7 +506,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, | |||
506 | if (keys_cmp(c, key, &node_key) != 0) | 506 | if (keys_cmp(c, key, &node_key) != 0) |
507 | ret = 0; | 507 | ret = 0; |
508 | } | 508 | } |
509 | if (ret == 0) | 509 | if (ret == 0 && c->replaying) |
510 | dbg_mnt("dangling branch LEB %d:%d len %d, key %s", | 510 | dbg_mnt("dangling branch LEB %d:%d len %d, key %s", |
511 | zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); | 511 | zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); |
512 | return ret; | 512 | return ret; |
@@ -1382,50 +1382,39 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, | |||
1382 | } | 1382 | } |
1383 | 1383 | ||
1384 | /** | 1384 | /** |
1385 | * ubifs_tnc_lookup - look up a file-system node. | 1385 | * maybe_leb_gced - determine if a LEB may have been garbage collected. |
1386 | * @c: UBIFS file-system description object | 1386 | * @c: UBIFS file-system description object |
1387 | * @key: node key to lookup | 1387 | * @lnum: LEB number |
1388 | * @node: the node is returned here | 1388 | * @gc_seq1: garbage collection sequence number |
1389 | * | 1389 | * |
1390 | * This function look up and reads node with key @key. The caller has to make | 1390 | * This function determines if @lnum may have been garbage collected since |
1391 | * sure the @node buffer is large enough to fit the node. Returns zero in case | 1391 | * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise |
1392 | * of success, %-ENOENT if the node was not found, and a negative error code in | 1392 | * %0 is returned. |
1393 | * case of failure. | ||
1394 | */ | 1393 | */ |
1395 | int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, | 1394 | static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) |
1396 | void *node) | ||
1397 | { | 1395 | { |
1398 | int found, n, err; | 1396 | int gc_seq2, gced_lnum; |
1399 | struct ubifs_znode *znode; | ||
1400 | struct ubifs_zbranch zbr, *zt; | ||
1401 | 1397 | ||
1402 | mutex_lock(&c->tnc_mutex); | 1398 | gced_lnum = c->gced_lnum; |
1403 | found = ubifs_lookup_level0(c, key, &znode, &n); | 1399 | smp_rmb(); |
1404 | if (!found) { | 1400 | gc_seq2 = c->gc_seq; |
1405 | err = -ENOENT; | 1401 | /* Same seq means no GC */ |
1406 | goto out; | 1402 | if (gc_seq1 == gc_seq2) |
1407 | } else if (found < 0) { | 1403 | return 0; |
1408 | err = found; | 1404 | /* Different by more than 1 means we don't know */ |
1409 | goto out; | 1405 | if (gc_seq1 + 1 != gc_seq2) |
1410 | } | 1406 | return 1; |
1411 | zt = &znode->zbranch[n]; | 1407 | /* |
1412 | if (is_hash_key(c, key)) { | 1408 | * We have seen the sequence number has increased by 1. Now we need to |
1413 | /* | 1409 | * be sure we read the right LEB number, so read it again. |
1414 | * In this case the leaf node cache gets used, so we pass the | 1410 | */ |
1415 | * address of the zbranch and keep the mutex locked | 1411 | smp_rmb(); |
1416 | */ | 1412 | if (gced_lnum != c->gced_lnum) |
1417 | err = tnc_read_node_nm(c, zt, node); | 1413 | return 1; |
1418 | goto out; | 1414 | /* Finally we can check lnum */ |
1419 | } | 1415 | if (gced_lnum == lnum) |
1420 | zbr = znode->zbranch[n]; | 1416 | return 1; |
1421 | mutex_unlock(&c->tnc_mutex); | 1417 | return 0; |
1422 | |||
1423 | err = ubifs_tnc_read_node(c, &zbr, node); | ||
1424 | return err; | ||
1425 | |||
1426 | out: | ||
1427 | mutex_unlock(&c->tnc_mutex); | ||
1428 | return err; | ||
1429 | } | 1418 | } |
1430 | 1419 | ||
1431 | /** | 1420 | /** |
@@ -1436,16 +1425,19 @@ out: | |||
1436 | * @lnum: LEB number is returned here | 1425 | * @lnum: LEB number is returned here |
1437 | * @offs: offset is returned here | 1426 | * @offs: offset is returned here |
1438 | * | 1427 | * |
1439 | * This function is the same as 'ubifs_tnc_lookup()' but it returns the node | 1428 | * This function look up and reads node with key @key. The caller has to make |
1440 | * location also. See 'ubifs_tnc_lookup()'. | 1429 | * sure the @node buffer is large enough to fit the node. Returns zero in case |
1430 | * of success, %-ENOENT if the node was not found, and a negative error code in | ||
1431 | * case of failure. The node location can be returned in @lnum and @offs. | ||
1441 | */ | 1432 | */ |
1442 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | 1433 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, |
1443 | void *node, int *lnum, int *offs) | 1434 | void *node, int *lnum, int *offs) |
1444 | { | 1435 | { |
1445 | int found, n, err; | 1436 | int found, n, err, safely = 0, gc_seq1; |
1446 | struct ubifs_znode *znode; | 1437 | struct ubifs_znode *znode; |
1447 | struct ubifs_zbranch zbr, *zt; | 1438 | struct ubifs_zbranch zbr, *zt; |
1448 | 1439 | ||
1440 | again: | ||
1449 | mutex_lock(&c->tnc_mutex); | 1441 | mutex_lock(&c->tnc_mutex); |
1450 | found = ubifs_lookup_level0(c, key, &znode, &n); | 1442 | found = ubifs_lookup_level0(c, key, &znode, &n); |
1451 | if (!found) { | 1443 | if (!found) { |
@@ -1456,24 +1448,43 @@ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | |||
1456 | goto out; | 1448 | goto out; |
1457 | } | 1449 | } |
1458 | zt = &znode->zbranch[n]; | 1450 | zt = &znode->zbranch[n]; |
1451 | if (lnum) { | ||
1452 | *lnum = zt->lnum; | ||
1453 | *offs = zt->offs; | ||
1454 | } | ||
1459 | if (is_hash_key(c, key)) { | 1455 | if (is_hash_key(c, key)) { |
1460 | /* | 1456 | /* |
1461 | * In this case the leaf node cache gets used, so we pass the | 1457 | * In this case the leaf node cache gets used, so we pass the |
1462 | * address of the zbranch and keep the mutex locked | 1458 | * address of the zbranch and keep the mutex locked |
1463 | */ | 1459 | */ |
1464 | *lnum = zt->lnum; | ||
1465 | *offs = zt->offs; | ||
1466 | err = tnc_read_node_nm(c, zt, node); | 1460 | err = tnc_read_node_nm(c, zt, node); |
1467 | goto out; | 1461 | goto out; |
1468 | } | 1462 | } |
1463 | if (safely) { | ||
1464 | err = ubifs_tnc_read_node(c, zt, node); | ||
1465 | goto out; | ||
1466 | } | ||
1467 | /* Drop the TNC mutex prematurely and race with garbage collection */ | ||
1469 | zbr = znode->zbranch[n]; | 1468 | zbr = znode->zbranch[n]; |
1469 | gc_seq1 = c->gc_seq; | ||
1470 | mutex_unlock(&c->tnc_mutex); | 1470 | mutex_unlock(&c->tnc_mutex); |
1471 | 1471 | ||
1472 | *lnum = zbr.lnum; | 1472 | if (ubifs_get_wbuf(c, zbr.lnum)) { |
1473 | *offs = zbr.offs; | 1473 | /* We do not GC journal heads */ |
1474 | err = ubifs_tnc_read_node(c, &zbr, node); | ||
1475 | return err; | ||
1476 | } | ||
1474 | 1477 | ||
1475 | err = ubifs_tnc_read_node(c, &zbr, node); | 1478 | err = fallible_read_node(c, key, &zbr, node); |
1476 | return err; | 1479 | if (err <= 0 || maybe_leb_gced(c, zbr.lnum, gc_seq1)) { |
1480 | /* | ||
1481 | * The node may have been GC'ed out from under us so try again | ||
1482 | * while keeping the TNC mutex locked. | ||
1483 | */ | ||
1484 | safely = 1; | ||
1485 | goto again; | ||
1486 | } | ||
1487 | return 0; | ||
1477 | 1488 | ||
1478 | out: | 1489 | out: |
1479 | mutex_unlock(&c->tnc_mutex); | 1490 | mutex_unlock(&c->tnc_mutex); |
@@ -1498,7 +1509,6 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
1498 | { | 1509 | { |
1499 | int found, n, err; | 1510 | int found, n, err; |
1500 | struct ubifs_znode *znode; | 1511 | struct ubifs_znode *znode; |
1501 | struct ubifs_zbranch zbr; | ||
1502 | 1512 | ||
1503 | dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); | 1513 | dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); |
1504 | mutex_lock(&c->tnc_mutex); | 1514 | mutex_lock(&c->tnc_mutex); |
@@ -1522,11 +1532,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
1522 | goto out_unlock; | 1532 | goto out_unlock; |
1523 | } | 1533 | } |
1524 | 1534 | ||
1525 | zbr = znode->zbranch[n]; | 1535 | err = tnc_read_node_nm(c, &znode->zbranch[n], node); |
1526 | mutex_unlock(&c->tnc_mutex); | ||
1527 | |||
1528 | err = tnc_read_node_nm(c, &zbr, node); | ||
1529 | return err; | ||
1530 | 1536 | ||
1531 | out_unlock: | 1537 | out_unlock: |
1532 | mutex_unlock(&c->tnc_mutex); | 1538 | mutex_unlock(&c->tnc_mutex); |
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 8117e65ba2e9..8ac76b1c2d55 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c | |||
@@ -372,26 +372,25 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) | |||
372 | written = layout_leb_in_gaps(c, p); | 372 | written = layout_leb_in_gaps(c, p); |
373 | if (written < 0) { | 373 | if (written < 0) { |
374 | err = written; | 374 | err = written; |
375 | if (err == -ENOSPC) { | 375 | if (err != -ENOSPC) { |
376 | if (!dbg_force_in_the_gaps_enabled) { | 376 | kfree(c->gap_lebs); |
377 | /* | 377 | c->gap_lebs = NULL; |
378 | * Do not print scary warnings if the | 378 | return err; |
379 | * debugging option which forces | ||
380 | * in-the-gaps is enabled. | ||
381 | */ | ||
382 | ubifs_err("out of space"); | ||
383 | spin_lock(&c->space_lock); | ||
384 | dbg_dump_budg(c); | ||
385 | spin_unlock(&c->space_lock); | ||
386 | dbg_dump_lprops(c); | ||
387 | } | ||
388 | /* Try to commit anyway */ | ||
389 | err = 0; | ||
390 | break; | ||
391 | } | 379 | } |
392 | kfree(c->gap_lebs); | 380 | if (!dbg_force_in_the_gaps_enabled) { |
393 | c->gap_lebs = NULL; | 381 | /* |
394 | return err; | 382 | * Do not print scary warnings if the debugging |
383 | * option which forces in-the-gaps is enabled. | ||
384 | */ | ||
385 | ubifs_err("out of space"); | ||
386 | spin_lock(&c->space_lock); | ||
387 | dbg_dump_budg(c); | ||
388 | spin_unlock(&c->space_lock); | ||
389 | dbg_dump_lprops(c); | ||
390 | } | ||
391 | /* Try to commit anyway */ | ||
392 | err = 0; | ||
393 | break; | ||
395 | } | 394 | } |
396 | p++; | 395 | p++; |
397 | cnt -= written; | 396 | cnt -= written; |
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index 0cc7da9bed47..a9ecbd9af20d 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h | |||
@@ -87,7 +87,7 @@ | |||
87 | #define UBIFS_SK_LEN 8 | 87 | #define UBIFS_SK_LEN 8 |
88 | 88 | ||
89 | /* Minimum index tree fanout */ | 89 | /* Minimum index tree fanout */ |
90 | #define UBIFS_MIN_FANOUT 2 | 90 | #define UBIFS_MIN_FANOUT 3 |
91 | 91 | ||
92 | /* Maximum number of levels in UBIFS indexing B-tree */ | 92 | /* Maximum number of levels in UBIFS indexing B-tree */ |
93 | #define UBIFS_MAX_LEVELS 512 | 93 | #define UBIFS_MAX_LEVELS 512 |
@@ -228,10 +228,10 @@ enum { | |||
228 | /* Minimum number of orphan area logical eraseblocks */ | 228 | /* Minimum number of orphan area logical eraseblocks */ |
229 | #define UBIFS_MIN_ORPH_LEBS 1 | 229 | #define UBIFS_MIN_ORPH_LEBS 1 |
230 | /* | 230 | /* |
231 | * Minimum number of main area logical eraseblocks (buds, 2 for the index, 1 | 231 | * Minimum number of main area logical eraseblocks (buds, 3 for the index, 1 |
232 | * for GC, 1 for deletions, and at least 1 for committed data). | 232 | * for GC, 1 for deletions, and at least 1 for committed data). |
233 | */ | 233 | */ |
234 | #define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 5) | 234 | #define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 6) |
235 | 235 | ||
236 | /* Minimum number of logical eraseblocks */ | 236 | /* Minimum number of logical eraseblocks */ |
237 | #define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \ | 237 | #define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \ |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index e4f89f271827..17c620b93eec 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -20,8 +20,6 @@ | |||
20 | * Adrian Hunter | 20 | * Adrian Hunter |
21 | */ | 21 | */ |
22 | 22 | ||
23 | /* Implementation version 0.7 */ | ||
24 | |||
25 | #ifndef __UBIFS_H__ | 23 | #ifndef __UBIFS_H__ |
26 | #define __UBIFS_H__ | 24 | #define __UBIFS_H__ |
27 | 25 | ||
@@ -322,6 +320,8 @@ struct ubifs_gced_idx_leb { | |||
322 | * struct ubifs_inode - UBIFS in-memory inode description. | 320 | * struct ubifs_inode - UBIFS in-memory inode description. |
323 | * @vfs_inode: VFS inode description object | 321 | * @vfs_inode: VFS inode description object |
324 | * @creat_sqnum: sequence number at time of creation | 322 | * @creat_sqnum: sequence number at time of creation |
323 | * @del_cmtno: commit number corresponding to the time the inode was deleted, | ||
324 | * protected by @c->commit_sem; | ||
325 | * @xattr_size: summarized size of all extended attributes in bytes | 325 | * @xattr_size: summarized size of all extended attributes in bytes |
326 | * @xattr_cnt: count of extended attributes this inode has | 326 | * @xattr_cnt: count of extended attributes this inode has |
327 | * @xattr_names: sum of lengths of all extended attribute names belonging to | 327 | * @xattr_names: sum of lengths of all extended attribute names belonging to |
@@ -373,6 +373,7 @@ struct ubifs_gced_idx_leb { | |||
373 | struct ubifs_inode { | 373 | struct ubifs_inode { |
374 | struct inode vfs_inode; | 374 | struct inode vfs_inode; |
375 | unsigned long long creat_sqnum; | 375 | unsigned long long creat_sqnum; |
376 | unsigned long long del_cmtno; | ||
376 | unsigned int xattr_size; | 377 | unsigned int xattr_size; |
377 | unsigned int xattr_cnt; | 378 | unsigned int xattr_cnt; |
378 | unsigned int xattr_names; | 379 | unsigned int xattr_names; |
@@ -779,7 +780,7 @@ struct ubifs_compressor { | |||
779 | /** | 780 | /** |
780 | * struct ubifs_budget_req - budget requirements of an operation. | 781 | * struct ubifs_budget_req - budget requirements of an operation. |
781 | * | 782 | * |
782 | * @fast: non-zero if the budgeting should try to aquire budget quickly and | 783 | * @fast: non-zero if the budgeting should try to acquire budget quickly and |
783 | * should not try to call write-back | 784 | * should not try to call write-back |
784 | * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields | 785 | * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields |
785 | * have to be re-calculated | 786 | * have to be re-calculated |
@@ -805,21 +806,31 @@ struct ubifs_compressor { | |||
805 | * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d | 806 | * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d |
806 | * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made | 807 | * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made |
807 | * dirty by the re-name operation. | 808 | * dirty by the re-name operation. |
809 | * | ||
810 | * Note, UBIFS aligns node lengths to 8-bytes boundary, so the requester has to | ||
811 | * make sure the amount of inode data which contribute to @new_ino_d and | ||
812 | * @dirtied_ino_d fields are aligned. | ||
808 | */ | 813 | */ |
809 | struct ubifs_budget_req { | 814 | struct ubifs_budget_req { |
810 | unsigned int fast:1; | 815 | unsigned int fast:1; |
811 | unsigned int recalculate:1; | 816 | unsigned int recalculate:1; |
817 | #ifndef UBIFS_DEBUG | ||
812 | unsigned int new_page:1; | 818 | unsigned int new_page:1; |
813 | unsigned int dirtied_page:1; | 819 | unsigned int dirtied_page:1; |
814 | unsigned int new_dent:1; | 820 | unsigned int new_dent:1; |
815 | unsigned int mod_dent:1; | 821 | unsigned int mod_dent:1; |
816 | unsigned int new_ino:1; | 822 | unsigned int new_ino:1; |
817 | unsigned int new_ino_d:13; | 823 | unsigned int new_ino_d:13; |
818 | #ifndef UBIFS_DEBUG | ||
819 | unsigned int dirtied_ino:4; | 824 | unsigned int dirtied_ino:4; |
820 | unsigned int dirtied_ino_d:15; | 825 | unsigned int dirtied_ino_d:15; |
821 | #else | 826 | #else |
822 | /* Not bit-fields to check for overflows */ | 827 | /* Not bit-fields to check for overflows */ |
828 | unsigned int new_page; | ||
829 | unsigned int dirtied_page; | ||
830 | unsigned int new_dent; | ||
831 | unsigned int mod_dent; | ||
832 | unsigned int new_ino; | ||
833 | unsigned int new_ino_d; | ||
823 | unsigned int dirtied_ino; | 834 | unsigned int dirtied_ino; |
824 | unsigned int dirtied_ino_d; | 835 | unsigned int dirtied_ino_d; |
825 | #endif | 836 | #endif |
@@ -860,13 +871,13 @@ struct ubifs_mount_opts { | |||
860 | * struct ubifs_info - UBIFS file-system description data structure | 871 | * struct ubifs_info - UBIFS file-system description data structure |
861 | * (per-superblock). | 872 | * (per-superblock). |
862 | * @vfs_sb: VFS @struct super_block object | 873 | * @vfs_sb: VFS @struct super_block object |
863 | * @bdi: backing device info object to make VFS happy and disable readahead | 874 | * @bdi: backing device info object to make VFS happy and disable read-ahead |
864 | * | 875 | * |
865 | * @highest_inum: highest used inode number | 876 | * @highest_inum: highest used inode number |
866 | * @vfs_gen: VFS inode generation counter | ||
867 | * @max_sqnum: current global sequence number | 877 | * @max_sqnum: current global sequence number |
868 | * @cmt_no: commit number (last successfully completed commit) | 878 | * @cmt_no: commit number of the last successfully completed commit, protected |
869 | * @cnt_lock: protects @highest_inum, @vfs_gen, and @max_sqnum counters | 879 | * by @commit_sem |
880 | * @cnt_lock: protects @highest_inum and @max_sqnum counters | ||
870 | * @fmt_version: UBIFS on-flash format version | 881 | * @fmt_version: UBIFS on-flash format version |
871 | * @uuid: UUID from super block | 882 | * @uuid: UUID from super block |
872 | * | 883 | * |
@@ -984,6 +995,9 @@ struct ubifs_mount_opts { | |||
984 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary | 995 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary |
985 | * @max_inode_sz: maximum possible inode size in bytes | 996 | * @max_inode_sz: maximum possible inode size in bytes |
986 | * @max_znode_sz: size of znode in bytes | 997 | * @max_znode_sz: size of znode in bytes |
998 | * | ||
999 | * @leb_overhead: how many bytes are wasted in an LEB when it is filled with | ||
1000 | * data nodes of maximum size - used in free space reporting | ||
987 | * @dead_wm: LEB dead space watermark | 1001 | * @dead_wm: LEB dead space watermark |
988 | * @dark_wm: LEB dark space watermark | 1002 | * @dark_wm: LEB dark space watermark |
989 | * @block_cnt: count of 4KiB blocks on the FS | 1003 | * @block_cnt: count of 4KiB blocks on the FS |
@@ -1017,6 +1031,8 @@ struct ubifs_mount_opts { | |||
1017 | * @sbuf: a buffer of LEB size used by GC and replay for scanning | 1031 | * @sbuf: a buffer of LEB size used by GC and replay for scanning |
1018 | * @idx_gc: list of index LEBs that have been garbage collected | 1032 | * @idx_gc: list of index LEBs that have been garbage collected |
1019 | * @idx_gc_cnt: number of elements on the idx_gc list | 1033 | * @idx_gc_cnt: number of elements on the idx_gc list |
1034 | * @gc_seq: incremented for every non-index LEB garbage collected | ||
1035 | * @gced_lnum: last non-index LEB that was garbage collected | ||
1020 | * | 1036 | * |
1021 | * @infos_list: links all 'ubifs_info' objects | 1037 | * @infos_list: links all 'ubifs_info' objects |
1022 | * @umount_mutex: serializes shrinker and un-mount | 1038 | * @umount_mutex: serializes shrinker and un-mount |
@@ -1103,7 +1119,6 @@ struct ubifs_info { | |||
1103 | struct backing_dev_info bdi; | 1119 | struct backing_dev_info bdi; |
1104 | 1120 | ||
1105 | ino_t highest_inum; | 1121 | ino_t highest_inum; |
1106 | unsigned int vfs_gen; | ||
1107 | unsigned long long max_sqnum; | 1122 | unsigned long long max_sqnum; |
1108 | unsigned long long cmt_no; | 1123 | unsigned long long cmt_no; |
1109 | spinlock_t cnt_lock; | 1124 | spinlock_t cnt_lock; |
@@ -1214,6 +1229,8 @@ struct ubifs_info { | |||
1214 | int max_idx_node_sz; | 1229 | int max_idx_node_sz; |
1215 | long long max_inode_sz; | 1230 | long long max_inode_sz; |
1216 | int max_znode_sz; | 1231 | int max_znode_sz; |
1232 | |||
1233 | int leb_overhead; | ||
1217 | int dead_wm; | 1234 | int dead_wm; |
1218 | int dark_wm; | 1235 | int dark_wm; |
1219 | int block_cnt; | 1236 | int block_cnt; |
@@ -1247,6 +1264,8 @@ struct ubifs_info { | |||
1247 | void *sbuf; | 1264 | void *sbuf; |
1248 | struct list_head idx_gc; | 1265 | struct list_head idx_gc; |
1249 | int idx_gc_cnt; | 1266 | int idx_gc_cnt; |
1267 | volatile int gc_seq; | ||
1268 | volatile int gced_lnum; | ||
1250 | 1269 | ||
1251 | struct list_head infos_list; | 1270 | struct list_head infos_list; |
1252 | struct mutex umount_mutex; | 1271 | struct mutex umount_mutex; |
@@ -1346,6 +1365,7 @@ extern struct backing_dev_info ubifs_backing_dev_info; | |||
1346 | extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; | 1365 | extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; |
1347 | 1366 | ||
1348 | /* io.c */ | 1367 | /* io.c */ |
1368 | void ubifs_ro_mode(struct ubifs_info *c, int err); | ||
1349 | int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); | 1369 | int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); |
1350 | int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, | 1370 | int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, |
1351 | int dtype); | 1371 | int dtype); |
@@ -1399,8 +1419,8 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, | |||
1399 | int deletion, int xent); | 1419 | int deletion, int xent); |
1400 | int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, | 1420 | int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, |
1401 | const union ubifs_key *key, const void *buf, int len); | 1421 | const union ubifs_key *key, const void *buf, int len); |
1402 | int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, | 1422 | int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode); |
1403 | int last_reference); | 1423 | int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode); |
1404 | int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, | 1424 | int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, |
1405 | const struct dentry *old_dentry, | 1425 | const struct dentry *old_dentry, |
1406 | const struct inode *new_dir, | 1426 | const struct inode *new_dir, |
@@ -1423,9 +1443,10 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, | |||
1423 | struct ubifs_budget_req *req); | 1443 | struct ubifs_budget_req *req); |
1424 | void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, | 1444 | void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, |
1425 | struct ubifs_budget_req *req); | 1445 | struct ubifs_budget_req *req); |
1426 | long long ubifs_budg_get_free_space(struct ubifs_info *c); | 1446 | long long ubifs_get_free_space(struct ubifs_info *c); |
1427 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c); | 1447 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c); |
1428 | void ubifs_convert_page_budget(struct ubifs_info *c); | 1448 | void ubifs_convert_page_budget(struct ubifs_info *c); |
1449 | long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free); | ||
1429 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); | 1450 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); |
1430 | 1451 | ||
1431 | /* find.c */ | 1452 | /* find.c */ |
@@ -1440,8 +1461,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c); | |||
1440 | /* tnc.c */ | 1461 | /* tnc.c */ |
1441 | int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, | 1462 | int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, |
1442 | struct ubifs_znode **zn, int *n); | 1463 | struct ubifs_znode **zn, int *n); |
1443 | int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, | ||
1444 | void *node); | ||
1445 | int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | 1464 | int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, |
1446 | void *node, const struct qstr *nm); | 1465 | void *node, const struct qstr *nm); |
1447 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | 1466 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, |
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 1388a078e1a9..649bec78b645 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c | |||
@@ -61,7 +61,7 @@ | |||
61 | 61 | ||
62 | /* | 62 | /* |
63 | * Limit the number of extended attributes per inode so that the total size | 63 | * Limit the number of extended attributes per inode so that the total size |
64 | * (xattr_size) is guaranteeded to fit in an 'unsigned int'. | 64 | * (@xattr_size) is guaranteeded to fit in an 'unsigned int'. |
65 | */ | 65 | */ |
66 | #define MAX_XATTRS_PER_INODE 65535 | 66 | #define MAX_XATTRS_PER_INODE 65535 |
67 | 67 | ||
@@ -103,14 +103,14 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, | |||
103 | struct inode *inode; | 103 | struct inode *inode; |
104 | struct ubifs_inode *ui, *host_ui = ubifs_inode(host); | 104 | struct ubifs_inode *ui, *host_ui = ubifs_inode(host); |
105 | struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, | 105 | struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, |
106 | .new_ino_d = size, .dirtied_ino = 1, | 106 | .new_ino_d = ALIGN(size, 8), .dirtied_ino = 1, |
107 | .dirtied_ino_d = host_ui->data_len}; | 107 | .dirtied_ino_d = ALIGN(host_ui->data_len, 8) }; |
108 | 108 | ||
109 | if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) | 109 | if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) |
110 | return -ENOSPC; | 110 | return -ENOSPC; |
111 | /* | 111 | /* |
112 | * Linux limits the maximum size of the extended attribute names list | 112 | * Linux limits the maximum size of the extended attribute names list |
113 | * to %XATTR_LIST_MAX. This means we should not allow creating more* | 113 | * to %XATTR_LIST_MAX. This means we should not allow creating more |
114 | * extended attributes if the name list becomes larger. This limitation | 114 | * extended attributes if the name list becomes larger. This limitation |
115 | * is artificial for UBIFS, though. | 115 | * is artificial for UBIFS, though. |
116 | */ | 116 | */ |
@@ -128,7 +128,6 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, | |||
128 | goto out_budg; | 128 | goto out_budg; |
129 | } | 129 | } |
130 | 130 | ||
131 | mutex_lock(&host_ui->ui_mutex); | ||
132 | /* Re-define all operations to be "nothing" */ | 131 | /* Re-define all operations to be "nothing" */ |
133 | inode->i_mapping->a_ops = &none_address_operations; | 132 | inode->i_mapping->a_ops = &none_address_operations; |
134 | inode->i_op = &none_inode_operations; | 133 | inode->i_op = &none_inode_operations; |
@@ -141,23 +140,19 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, | |||
141 | ui->data = kmalloc(size, GFP_NOFS); | 140 | ui->data = kmalloc(size, GFP_NOFS); |
142 | if (!ui->data) { | 141 | if (!ui->data) { |
143 | err = -ENOMEM; | 142 | err = -ENOMEM; |
144 | goto out_unlock; | 143 | goto out_free; |
145 | } | 144 | } |
146 | |||
147 | memcpy(ui->data, value, size); | 145 | memcpy(ui->data, value, size); |
146 | inode->i_size = ui->ui_size = size; | ||
147 | ui->data_len = size; | ||
148 | |||
149 | mutex_lock(&host_ui->ui_mutex); | ||
148 | host->i_ctime = ubifs_current_time(host); | 150 | host->i_ctime = ubifs_current_time(host); |
149 | host_ui->xattr_cnt += 1; | 151 | host_ui->xattr_cnt += 1; |
150 | host_ui->xattr_size += CALC_DENT_SIZE(nm->len); | 152 | host_ui->xattr_size += CALC_DENT_SIZE(nm->len); |
151 | host_ui->xattr_size += CALC_XATTR_BYTES(size); | 153 | host_ui->xattr_size += CALC_XATTR_BYTES(size); |
152 | host_ui->xattr_names += nm->len; | 154 | host_ui->xattr_names += nm->len; |
153 | 155 | ||
154 | /* | ||
155 | * We do not use i_size_write() because nobody can race with us as we | ||
156 | * are holding host @host->i_mutex - every xattr operation for this | ||
157 | * inode is serialized by it. | ||
158 | */ | ||
159 | inode->i_size = ui->ui_size = size; | ||
160 | ui->data_len = size; | ||
161 | err = ubifs_jnl_update(c, host, nm, inode, 0, 1); | 156 | err = ubifs_jnl_update(c, host, nm, inode, 0, 1); |
162 | if (err) | 157 | if (err) |
163 | goto out_cancel; | 158 | goto out_cancel; |
@@ -172,8 +167,8 @@ out_cancel: | |||
172 | host_ui->xattr_cnt -= 1; | 167 | host_ui->xattr_cnt -= 1; |
173 | host_ui->xattr_size -= CALC_DENT_SIZE(nm->len); | 168 | host_ui->xattr_size -= CALC_DENT_SIZE(nm->len); |
174 | host_ui->xattr_size -= CALC_XATTR_BYTES(size); | 169 | host_ui->xattr_size -= CALC_XATTR_BYTES(size); |
175 | out_unlock: | ||
176 | mutex_unlock(&host_ui->ui_mutex); | 170 | mutex_unlock(&host_ui->ui_mutex); |
171 | out_free: | ||
177 | make_bad_inode(inode); | 172 | make_bad_inode(inode); |
178 | iput(inode); | 173 | iput(inode); |
179 | out_budg: | 174 | out_budg: |
@@ -200,29 +195,28 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, | |||
200 | struct ubifs_inode *host_ui = ubifs_inode(host); | 195 | struct ubifs_inode *host_ui = ubifs_inode(host); |
201 | struct ubifs_inode *ui = ubifs_inode(inode); | 196 | struct ubifs_inode *ui = ubifs_inode(inode); |
202 | struct ubifs_budget_req req = { .dirtied_ino = 2, | 197 | struct ubifs_budget_req req = { .dirtied_ino = 2, |
203 | .dirtied_ino_d = size + host_ui->data_len }; | 198 | .dirtied_ino_d = ALIGN(size, 8) + ALIGN(host_ui->data_len, 8) }; |
204 | 199 | ||
205 | ubifs_assert(ui->data_len == inode->i_size); | 200 | ubifs_assert(ui->data_len == inode->i_size); |
206 | err = ubifs_budget_space(c, &req); | 201 | err = ubifs_budget_space(c, &req); |
207 | if (err) | 202 | if (err) |
208 | return err; | 203 | return err; |
209 | 204 | ||
210 | mutex_lock(&host_ui->ui_mutex); | ||
211 | host->i_ctime = ubifs_current_time(host); | ||
212 | host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); | ||
213 | host_ui->xattr_size += CALC_XATTR_BYTES(size); | ||
214 | |||
215 | kfree(ui->data); | 205 | kfree(ui->data); |
216 | ui->data = kmalloc(size, GFP_NOFS); | 206 | ui->data = kmalloc(size, GFP_NOFS); |
217 | if (!ui->data) { | 207 | if (!ui->data) { |
218 | err = -ENOMEM; | 208 | err = -ENOMEM; |
219 | goto out_unlock; | 209 | goto out_free; |
220 | } | 210 | } |
221 | |||
222 | memcpy(ui->data, value, size); | 211 | memcpy(ui->data, value, size); |
223 | inode->i_size = ui->ui_size = size; | 212 | inode->i_size = ui->ui_size = size; |
224 | ui->data_len = size; | 213 | ui->data_len = size; |
225 | 214 | ||
215 | mutex_lock(&host_ui->ui_mutex); | ||
216 | host->i_ctime = ubifs_current_time(host); | ||
217 | host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); | ||
218 | host_ui->xattr_size += CALC_XATTR_BYTES(size); | ||
219 | |||
226 | /* | 220 | /* |
227 | * It is important to write the host inode after the xattr inode | 221 | * It is important to write the host inode after the xattr inode |
228 | * because if the host inode gets synchronized (via 'fsync()'), then | 222 | * because if the host inode gets synchronized (via 'fsync()'), then |
@@ -240,9 +234,9 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, | |||
240 | out_cancel: | 234 | out_cancel: |
241 | host_ui->xattr_size -= CALC_XATTR_BYTES(size); | 235 | host_ui->xattr_size -= CALC_XATTR_BYTES(size); |
242 | host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); | 236 | host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); |
243 | make_bad_inode(inode); | ||
244 | out_unlock: | ||
245 | mutex_unlock(&host_ui->ui_mutex); | 237 | mutex_unlock(&host_ui->ui_mutex); |
238 | make_bad_inode(inode); | ||
239 | out_free: | ||
246 | ubifs_release_budget(c, &req); | 240 | ubifs_release_budget(c, &req); |
247 | return err; | 241 | return err; |
248 | } | 242 | } |
@@ -312,6 +306,7 @@ int ubifs_setxattr(struct dentry *dentry, const char *name, | |||
312 | 306 | ||
313 | dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name, | 307 | dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name, |
314 | host->i_ino, dentry->d_name.len, dentry->d_name.name, size); | 308 | host->i_ino, dentry->d_name.len, dentry->d_name.name, size); |
309 | ubifs_assert(mutex_is_locked(&host->i_mutex)); | ||
315 | 310 | ||
316 | if (size > UBIFS_MAX_INO_DATA) | 311 | if (size > UBIFS_MAX_INO_DATA) |
317 | return -ERANGE; | 312 | return -ERANGE; |
@@ -384,7 +379,6 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, | |||
384 | if (!xent) | 379 | if (!xent) |
385 | return -ENOMEM; | 380 | return -ENOMEM; |
386 | 381 | ||
387 | mutex_lock(&host->i_mutex); | ||
388 | xent_key_init(c, &key, host->i_ino, &nm); | 382 | xent_key_init(c, &key, host->i_ino, &nm); |
389 | err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); | 383 | err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); |
390 | if (err) { | 384 | if (err) { |
@@ -419,7 +413,6 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, | |||
419 | out_iput: | 413 | out_iput: |
420 | iput(inode); | 414 | iput(inode); |
421 | out_unlock: | 415 | out_unlock: |
422 | mutex_unlock(&host->i_mutex); | ||
423 | kfree(xent); | 416 | kfree(xent); |
424 | return err; | 417 | return err; |
425 | } | 418 | } |
@@ -449,8 +442,6 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
449 | return -ERANGE; | 442 | return -ERANGE; |
450 | 443 | ||
451 | lowest_xent_key(c, &key, host->i_ino); | 444 | lowest_xent_key(c, &key, host->i_ino); |
452 | |||
453 | mutex_lock(&host->i_mutex); | ||
454 | while (1) { | 445 | while (1) { |
455 | int type; | 446 | int type; |
456 | 447 | ||
@@ -479,7 +470,6 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
479 | pxent = xent; | 470 | pxent = xent; |
480 | key_read(c, &xent->key, &key); | 471 | key_read(c, &xent->key, &key); |
481 | } | 472 | } |
482 | mutex_unlock(&host->i_mutex); | ||
483 | 473 | ||
484 | kfree(pxent); | 474 | kfree(pxent); |
485 | if (err != -ENOENT) { | 475 | if (err != -ENOENT) { |
@@ -497,8 +487,8 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host, | |||
497 | int err; | 487 | int err; |
498 | struct ubifs_inode *host_ui = ubifs_inode(host); | 488 | struct ubifs_inode *host_ui = ubifs_inode(host); |
499 | struct ubifs_inode *ui = ubifs_inode(inode); | 489 | struct ubifs_inode *ui = ubifs_inode(inode); |
500 | struct ubifs_budget_req req = { .dirtied_ino = 1, .mod_dent = 1, | 490 | struct ubifs_budget_req req = { .dirtied_ino = 2, .mod_dent = 1, |
501 | .dirtied_ino_d = host_ui->data_len }; | 491 | .dirtied_ino_d = ALIGN(host_ui->data_len, 8) }; |
502 | 492 | ||
503 | ubifs_assert(ui->data_len == inode->i_size); | 493 | ubifs_assert(ui->data_len == inode->i_size); |
504 | 494 | ||
diff --git a/fs/udf/file.c b/fs/udf/file.c index 0ed6e146a0d9..eb91f3b70320 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c | |||
@@ -211,6 +211,7 @@ const struct file_operations udf_file_operations = { | |||
211 | .release = udf_release_file, | 211 | .release = udf_release_file, |
212 | .fsync = udf_fsync_file, | 212 | .fsync = udf_fsync_file, |
213 | .splice_read = generic_file_splice_read, | 213 | .splice_read = generic_file_splice_read, |
214 | .llseek = generic_file_llseek, | ||
214 | }; | 215 | }; |
215 | 216 | ||
216 | const struct inode_operations udf_file_inode_operations = { | 217 | const struct inode_operations udf_file_inode_operations = { |
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index eb9cfa23dc3d..a4f2b3ce45b0 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c | |||
@@ -76,11 +76,24 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
76 | *err = -ENOSPC; | 76 | *err = -ENOSPC; |
77 | 77 | ||
78 | iinfo = UDF_I(inode); | 78 | iinfo = UDF_I(inode); |
79 | iinfo->i_unique = 0; | 79 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { |
80 | iinfo->i_lenExtents = 0; | 80 | iinfo->i_efe = 1; |
81 | iinfo->i_next_alloc_block = 0; | 81 | if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev) |
82 | iinfo->i_next_alloc_goal = 0; | 82 | sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE; |
83 | iinfo->i_strat4096 = 0; | 83 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - |
84 | sizeof(struct extendedFileEntry), | ||
85 | GFP_KERNEL); | ||
86 | } else { | ||
87 | iinfo->i_efe = 0; | ||
88 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - | ||
89 | sizeof(struct fileEntry), | ||
90 | GFP_KERNEL); | ||
91 | } | ||
92 | if (!iinfo->i_ext.i_data) { | ||
93 | iput(inode); | ||
94 | *err = -ENOMEM; | ||
95 | return NULL; | ||
96 | } | ||
84 | 97 | ||
85 | block = udf_new_block(dir->i_sb, NULL, | 98 | block = udf_new_block(dir->i_sb, NULL, |
86 | dinfo->i_location.partitionReferenceNum, | 99 | dinfo->i_location.partitionReferenceNum, |
@@ -111,6 +124,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
111 | lvhd->uniqueID = cpu_to_le64(uniqueID); | 124 | lvhd->uniqueID = cpu_to_le64(uniqueID); |
112 | mark_buffer_dirty(sbi->s_lvid_bh); | 125 | mark_buffer_dirty(sbi->s_lvid_bh); |
113 | } | 126 | } |
127 | mutex_unlock(&sbi->s_alloc_mutex); | ||
114 | inode->i_mode = mode; | 128 | inode->i_mode = mode; |
115 | inode->i_uid = current->fsuid; | 129 | inode->i_uid = current->fsuid; |
116 | if (dir->i_mode & S_ISGID) { | 130 | if (dir->i_mode & S_ISGID) { |
@@ -129,25 +143,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
129 | iinfo->i_lenEAttr = 0; | 143 | iinfo->i_lenEAttr = 0; |
130 | iinfo->i_lenAlloc = 0; | 144 | iinfo->i_lenAlloc = 0; |
131 | iinfo->i_use = 0; | 145 | iinfo->i_use = 0; |
132 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { | ||
133 | iinfo->i_efe = 1; | ||
134 | if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev) | ||
135 | sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE; | ||
136 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - | ||
137 | sizeof(struct extendedFileEntry), | ||
138 | GFP_KERNEL); | ||
139 | } else { | ||
140 | iinfo->i_efe = 0; | ||
141 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - | ||
142 | sizeof(struct fileEntry), | ||
143 | GFP_KERNEL); | ||
144 | } | ||
145 | if (!iinfo->i_ext.i_data) { | ||
146 | iput(inode); | ||
147 | *err = -ENOMEM; | ||
148 | mutex_unlock(&sbi->s_alloc_mutex); | ||
149 | return NULL; | ||
150 | } | ||
151 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) | 146 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) |
152 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; | 147 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; |
153 | else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) | 148 | else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) |
@@ -158,7 +153,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
158 | iinfo->i_crtime = current_fs_time(inode->i_sb); | 153 | iinfo->i_crtime = current_fs_time(inode->i_sb); |
159 | insert_inode_hash(inode); | 154 | insert_inode_hash(inode); |
160 | mark_inode_dirty(inode); | 155 | mark_inode_dirty(inode); |
161 | mutex_unlock(&sbi->s_alloc_mutex); | ||
162 | 156 | ||
163 | if (DQUOT_ALLOC_INODE(inode)) { | 157 | if (DQUOT_ALLOC_INODE(inode)) { |
164 | DQUOT_DROP(inode); | 158 | DQUOT_DROP(inode); |
diff --git a/fs/udf/super.c b/fs/udf/super.c index 5698bbf83bbf..e25e7010627b 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
@@ -369,7 +369,7 @@ enum { | |||
369 | Opt_err, Opt_uforget, Opt_uignore, Opt_gforget, Opt_gignore | 369 | Opt_err, Opt_uforget, Opt_uignore, Opt_gforget, Opt_gignore |
370 | }; | 370 | }; |
371 | 371 | ||
372 | static match_table_t tokens = { | 372 | static const match_table_t tokens = { |
373 | {Opt_novrs, "novrs"}, | 373 | {Opt_novrs, "novrs"}, |
374 | {Opt_nostrict, "nostrict"}, | 374 | {Opt_nostrict, "nostrict"}, |
375 | {Opt_bs, "bs=%u"}, | 375 | {Opt_bs, "bs=%u"}, |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 3141969b456d..e65212dfb60e 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
@@ -309,7 +309,7 @@ enum { | |||
309 | Opt_err | 309 | Opt_err |
310 | }; | 310 | }; |
311 | 311 | ||
312 | static match_table_t tokens = { | 312 | static const match_table_t tokens = { |
313 | {Opt_type_old, "ufstype=old"}, | 313 | {Opt_type_old, "ufstype=old"}, |
314 | {Opt_type_sunx86, "ufstype=sunx86"}, | 314 | {Opt_type_sunx86, "ufstype=sunx86"}, |
315 | {Opt_type_sun, "ufstype=sun"}, | 315 | {Opt_type_sun, "ufstype=sun"}, |
@@ -1233,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
1233 | { | 1233 | { |
1234 | struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); | 1234 | struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); |
1235 | unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; | 1235 | unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; |
1236 | struct match_token *tp = tokens; | 1236 | const struct match_token *tp = tokens; |
1237 | 1237 | ||
1238 | while (tp->token != Opt_onerror_panic && tp->token != mval) | 1238 | while (tp->token != Opt_onerror_panic && tp->token != mval) |
1239 | ++tp; | 1239 | ++tp; |
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h deleted file mode 100644 index 3abe7e9ceb33..000000000000 --- a/fs/xfs/linux-2.6/sema.h +++ /dev/null | |||
@@ -1,52 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_SUPPORT_SEMA_H__ | ||
19 | #define __XFS_SUPPORT_SEMA_H__ | ||
20 | |||
21 | #include <linux/time.h> | ||
22 | #include <linux/wait.h> | ||
23 | #include <linux/semaphore.h> | ||
24 | #include <asm/atomic.h> | ||
25 | |||
26 | /* | ||
27 | * sema_t structure just maps to struct semaphore in Linux kernel. | ||
28 | */ | ||
29 | |||
30 | typedef struct semaphore sema_t; | ||
31 | |||
32 | #define initnsema(sp, val, name) sema_init(sp, val) | ||
33 | #define psema(sp, b) down(sp) | ||
34 | #define vsema(sp) up(sp) | ||
35 | #define freesema(sema) do { } while (0) | ||
36 | |||
37 | static inline int issemalocked(sema_t *sp) | ||
38 | { | ||
39 | return down_trylock(sp) || (up(sp), 0); | ||
40 | } | ||
41 | |||
42 | /* | ||
43 | * Map cpsema (try to get the sema) to down_trylock. We need to switch | ||
44 | * the return values since cpsema returns 1 (acquired) 0 (failed) and | ||
45 | * down_trylock returns the reverse 0 (acquired) 1 (failed). | ||
46 | */ | ||
47 | static inline int cpsema(sema_t *sp) | ||
48 | { | ||
49 | return down_trylock(sp) ? 0 : 1; | ||
50 | } | ||
51 | |||
52 | #endif /* __XFS_SUPPORT_SEMA_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index fa47e43b8b41..a44d68eb50b5 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -73,7 +73,6 @@ xfs_page_trace( | |||
73 | unsigned long pgoff) | 73 | unsigned long pgoff) |
74 | { | 74 | { |
75 | xfs_inode_t *ip; | 75 | xfs_inode_t *ip; |
76 | bhv_vnode_t *vp = vn_from_inode(inode); | ||
77 | loff_t isize = i_size_read(inode); | 76 | loff_t isize = i_size_read(inode); |
78 | loff_t offset = page_offset(page); | 77 | loff_t offset = page_offset(page); |
79 | int delalloc = -1, unmapped = -1, unwritten = -1; | 78 | int delalloc = -1, unmapped = -1, unwritten = -1; |
@@ -81,7 +80,7 @@ xfs_page_trace( | |||
81 | if (page_has_buffers(page)) | 80 | if (page_has_buffers(page)) |
82 | xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); | 81 | xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); |
83 | 82 | ||
84 | ip = xfs_vtoi(vp); | 83 | ip = XFS_I(inode); |
85 | if (!ip->i_rwtrace) | 84 | if (!ip->i_rwtrace) |
86 | return; | 85 | return; |
87 | 86 | ||
@@ -1339,6 +1338,10 @@ __xfs_get_blocks( | |||
1339 | offset = (xfs_off_t)iblock << inode->i_blkbits; | 1338 | offset = (xfs_off_t)iblock << inode->i_blkbits; |
1340 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); | 1339 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); |
1341 | size = bh_result->b_size; | 1340 | size = bh_result->b_size; |
1341 | |||
1342 | if (!create && direct && offset >= i_size_read(inode)) | ||
1343 | return 0; | ||
1344 | |||
1342 | error = xfs_iomap(XFS_I(inode), offset, size, | 1345 | error = xfs_iomap(XFS_I(inode), offset, size, |
1343 | create ? flags : BMAPI_READ, &iomap, &niomap); | 1346 | create ? flags : BMAPI_READ, &iomap, &niomap); |
1344 | if (error) | 1347 | if (error) |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 9cc8f0213095..36d5fcd3f593 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -58,7 +58,7 @@ xfs_buf_trace( | |||
58 | bp, id, | 58 | bp, id, |
59 | (void *)(unsigned long)bp->b_flags, | 59 | (void *)(unsigned long)bp->b_flags, |
60 | (void *)(unsigned long)bp->b_hold.counter, | 60 | (void *)(unsigned long)bp->b_hold.counter, |
61 | (void *)(unsigned long)bp->b_sema.count.counter, | 61 | (void *)(unsigned long)bp->b_sema.count, |
62 | (void *)current, | 62 | (void *)current, |
63 | data, ra, | 63 | data, ra, |
64 | (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff), | 64 | (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff), |
@@ -253,7 +253,7 @@ _xfs_buf_initialize( | |||
253 | 253 | ||
254 | memset(bp, 0, sizeof(xfs_buf_t)); | 254 | memset(bp, 0, sizeof(xfs_buf_t)); |
255 | atomic_set(&bp->b_hold, 1); | 255 | atomic_set(&bp->b_hold, 1); |
256 | init_MUTEX_LOCKED(&bp->b_iodonesema); | 256 | init_completion(&bp->b_iowait); |
257 | INIT_LIST_HEAD(&bp->b_list); | 257 | INIT_LIST_HEAD(&bp->b_list); |
258 | INIT_LIST_HEAD(&bp->b_hash_list); | 258 | INIT_LIST_HEAD(&bp->b_hash_list); |
259 | init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ | 259 | init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ |
@@ -838,6 +838,7 @@ xfs_buf_rele( | |||
838 | return; | 838 | return; |
839 | } | 839 | } |
840 | 840 | ||
841 | ASSERT(atomic_read(&bp->b_hold) > 0); | ||
841 | if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { | 842 | if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { |
842 | if (bp->b_relse) { | 843 | if (bp->b_relse) { |
843 | atomic_inc(&bp->b_hold); | 844 | atomic_inc(&bp->b_hold); |
@@ -851,11 +852,6 @@ xfs_buf_rele( | |||
851 | spin_unlock(&hash->bh_lock); | 852 | spin_unlock(&hash->bh_lock); |
852 | xfs_buf_free(bp); | 853 | xfs_buf_free(bp); |
853 | } | 854 | } |
854 | } else { | ||
855 | /* | ||
856 | * Catch reference count leaks | ||
857 | */ | ||
858 | ASSERT(atomic_read(&bp->b_hold) >= 0); | ||
859 | } | 855 | } |
860 | } | 856 | } |
861 | 857 | ||
@@ -1005,12 +1001,13 @@ xfs_buf_iodone_work( | |||
1005 | * We can get an EOPNOTSUPP to ordered writes. Here we clear the | 1001 | * We can get an EOPNOTSUPP to ordered writes. Here we clear the |
1006 | * ordered flag and reissue them. Because we can't tell the higher | 1002 | * ordered flag and reissue them. Because we can't tell the higher |
1007 | * layers directly that they should not issue ordered I/O anymore, they | 1003 | * layers directly that they should not issue ordered I/O anymore, they |
1008 | * need to check if the ordered flag was cleared during I/O completion. | 1004 | * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion. |
1009 | */ | 1005 | */ |
1010 | if ((bp->b_error == EOPNOTSUPP) && | 1006 | if ((bp->b_error == EOPNOTSUPP) && |
1011 | (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { | 1007 | (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { |
1012 | XB_TRACE(bp, "ordered_retry", bp->b_iodone); | 1008 | XB_TRACE(bp, "ordered_retry", bp->b_iodone); |
1013 | bp->b_flags &= ~XBF_ORDERED; | 1009 | bp->b_flags &= ~XBF_ORDERED; |
1010 | bp->b_flags |= _XFS_BARRIER_FAILED; | ||
1014 | xfs_buf_iorequest(bp); | 1011 | xfs_buf_iorequest(bp); |
1015 | } else if (bp->b_iodone) | 1012 | } else if (bp->b_iodone) |
1016 | (*(bp->b_iodone))(bp); | 1013 | (*(bp->b_iodone))(bp); |
@@ -1037,7 +1034,7 @@ xfs_buf_ioend( | |||
1037 | xfs_buf_iodone_work(&bp->b_iodone_work); | 1034 | xfs_buf_iodone_work(&bp->b_iodone_work); |
1038 | } | 1035 | } |
1039 | } else { | 1036 | } else { |
1040 | up(&bp->b_iodonesema); | 1037 | complete(&bp->b_iowait); |
1041 | } | 1038 | } |
1042 | } | 1039 | } |
1043 | 1040 | ||
@@ -1275,7 +1272,7 @@ xfs_buf_iowait( | |||
1275 | XB_TRACE(bp, "iowait", 0); | 1272 | XB_TRACE(bp, "iowait", 0); |
1276 | if (atomic_read(&bp->b_io_remaining)) | 1273 | if (atomic_read(&bp->b_io_remaining)) |
1277 | blk_run_address_space(bp->b_target->bt_mapping); | 1274 | blk_run_address_space(bp->b_target->bt_mapping); |
1278 | down(&bp->b_iodonesema); | 1275 | wait_for_completion(&bp->b_iowait); |
1279 | XB_TRACE(bp, "iowaited", (long)bp->b_error); | 1276 | XB_TRACE(bp, "iowaited", (long)bp->b_error); |
1280 | return bp->b_error; | 1277 | return bp->b_error; |
1281 | } | 1278 | } |
@@ -1799,7 +1796,7 @@ int __init | |||
1799 | xfs_buf_init(void) | 1796 | xfs_buf_init(void) |
1800 | { | 1797 | { |
1801 | #ifdef XFS_BUF_TRACE | 1798 | #ifdef XFS_BUF_TRACE |
1802 | xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP); | 1799 | xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_NOFS); |
1803 | #endif | 1800 | #endif |
1804 | 1801 | ||
1805 | xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", | 1802 | xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 29d1d4adc078..456519a088c7 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -85,6 +85,14 @@ typedef enum { | |||
85 | * modifications being lost. | 85 | * modifications being lost. |
86 | */ | 86 | */ |
87 | _XBF_PAGE_LOCKED = (1 << 22), | 87 | _XBF_PAGE_LOCKED = (1 << 22), |
88 | |||
89 | /* | ||
90 | * If we try a barrier write, but it fails we have to communicate | ||
91 | * this to the upper layers. Unfortunately b_error gets overwritten | ||
92 | * when the buffer is re-issued so we have to add another flag to | ||
93 | * keep this information. | ||
94 | */ | ||
95 | _XFS_BARRIER_FAILED = (1 << 23), | ||
88 | } xfs_buf_flags_t; | 96 | } xfs_buf_flags_t; |
89 | 97 | ||
90 | typedef enum { | 98 | typedef enum { |
@@ -157,7 +165,7 @@ typedef struct xfs_buf { | |||
157 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ | 165 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ |
158 | xfs_buf_relse_t b_relse; /* releasing function */ | 166 | xfs_buf_relse_t b_relse; /* releasing function */ |
159 | xfs_buf_bdstrat_t b_strat; /* pre-write function */ | 167 | xfs_buf_bdstrat_t b_strat; /* pre-write function */ |
160 | struct semaphore b_iodonesema; /* Semaphore for I/O waiters */ | 168 | struct completion b_iowait; /* queue for I/O waiters */ |
161 | void *b_fspriv; | 169 | void *b_fspriv; |
162 | void *b_fspriv2; | 170 | void *b_fspriv2; |
163 | void *b_fspriv3; | 171 | void *b_fspriv3; |
@@ -352,7 +360,7 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); | |||
352 | #define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0) | 360 | #define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0) |
353 | #define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp) | 361 | #define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp) |
354 | #define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp) | 362 | #define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp) |
355 | #define XFS_BUF_V_IODONESEMA(bp) up(&bp->b_iodonesema); | 363 | #define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait); |
356 | 364 | ||
357 | #define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target)) | 365 | #define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target)) |
358 | #define XFS_BUF_TARGET(bp) ((bp)->b_target) | 366 | #define XFS_BUF_TARGET(bp) ((bp)->b_target) |
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 987fe84f7b13..24fd598af846 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c | |||
@@ -139,7 +139,7 @@ xfs_nfs_get_inode( | |||
139 | } | 139 | } |
140 | 140 | ||
141 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 141 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
142 | return ip->i_vnode; | 142 | return VFS_I(ip); |
143 | } | 143 | } |
144 | 144 | ||
145 | STATIC struct dentry * | 145 | STATIC struct dentry * |
@@ -167,7 +167,7 @@ xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid, | |||
167 | if (!inode) | 167 | if (!inode) |
168 | return NULL; | 168 | return NULL; |
169 | if (IS_ERR(inode)) | 169 | if (IS_ERR(inode)) |
170 | return ERR_PTR(PTR_ERR(inode)); | 170 | return ERR_CAST(inode); |
171 | result = d_alloc_anon(inode); | 171 | result = d_alloc_anon(inode); |
172 | if (!result) { | 172 | if (!result) { |
173 | iput(inode); | 173 | iput(inode); |
@@ -198,7 +198,7 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
198 | if (!inode) | 198 | if (!inode) |
199 | return NULL; | 199 | return NULL; |
200 | if (IS_ERR(inode)) | 200 | if (IS_ERR(inode)) |
201 | return ERR_PTR(PTR_ERR(inode)); | 201 | return ERR_CAST(inode); |
202 | result = d_alloc_anon(inode); | 202 | result = d_alloc_anon(inode); |
203 | if (!result) { | 203 | if (!result) { |
204 | iput(inode); | 204 | iput(inode); |
@@ -219,9 +219,9 @@ xfs_fs_get_parent( | |||
219 | if (unlikely(error)) | 219 | if (unlikely(error)) |
220 | return ERR_PTR(-error); | 220 | return ERR_PTR(-error); |
221 | 221 | ||
222 | parent = d_alloc_anon(cip->i_vnode); | 222 | parent = d_alloc_anon(VFS_I(cip)); |
223 | if (unlikely(!parent)) { | 223 | if (unlikely(!parent)) { |
224 | iput(cip->i_vnode); | 224 | iput(VFS_I(cip)); |
225 | return ERR_PTR(-ENOMEM); | 225 | return ERR_PTR(-ENOMEM); |
226 | } | 226 | } |
227 | return parent; | 227 | return parent; |
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 5f60363b9343..5311c1acdd40 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -475,6 +475,7 @@ const struct file_operations xfs_invis_file_operations = { | |||
475 | const struct file_operations xfs_dir_file_operations = { | 475 | const struct file_operations xfs_dir_file_operations = { |
476 | .read = generic_read_dir, | 476 | .read = generic_read_dir, |
477 | .readdir = xfs_file_readdir, | 477 | .readdir = xfs_file_readdir, |
478 | .llseek = generic_file_llseek, | ||
478 | .unlocked_ioctl = xfs_file_ioctl, | 479 | .unlocked_ioctl = xfs_file_ioctl, |
479 | #ifdef CONFIG_COMPAT | 480 | #ifdef CONFIG_COMPAT |
480 | .compat_ioctl = xfs_file_compat_ioctl, | 481 | .compat_ioctl = xfs_file_compat_ioctl, |
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 1eefe61f0e10..36caa6d957df 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c | |||
@@ -31,7 +31,7 @@ xfs_tosspages( | |||
31 | xfs_off_t last, | 31 | xfs_off_t last, |
32 | int fiopt) | 32 | int fiopt) |
33 | { | 33 | { |
34 | struct address_space *mapping = ip->i_vnode->i_mapping; | 34 | struct address_space *mapping = VFS_I(ip)->i_mapping; |
35 | 35 | ||
36 | if (mapping->nrpages) | 36 | if (mapping->nrpages) |
37 | truncate_inode_pages(mapping, first); | 37 | truncate_inode_pages(mapping, first); |
@@ -44,7 +44,7 @@ xfs_flushinval_pages( | |||
44 | xfs_off_t last, | 44 | xfs_off_t last, |
45 | int fiopt) | 45 | int fiopt) |
46 | { | 46 | { |
47 | struct address_space *mapping = ip->i_vnode->i_mapping; | 47 | struct address_space *mapping = VFS_I(ip)->i_mapping; |
48 | int ret = 0; | 48 | int ret = 0; |
49 | 49 | ||
50 | if (mapping->nrpages) { | 50 | if (mapping->nrpages) { |
@@ -64,7 +64,7 @@ xfs_flush_pages( | |||
64 | uint64_t flags, | 64 | uint64_t flags, |
65 | int fiopt) | 65 | int fiopt) |
66 | { | 66 | { |
67 | struct address_space *mapping = ip->i_vnode->i_mapping; | 67 | struct address_space *mapping = VFS_I(ip)->i_mapping; |
68 | int ret = 0; | 68 | int ret = 0; |
69 | int ret2; | 69 | int ret2; |
70 | 70 | ||
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index acb978d9d085..48799ba7e3e6 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -245,7 +245,7 @@ xfs_vget_fsop_handlereq( | |||
245 | 245 | ||
246 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 246 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
247 | 247 | ||
248 | *inode = XFS_ITOV(ip); | 248 | *inode = VFS_I(ip); |
249 | return 0; | 249 | return 0; |
250 | } | 250 | } |
251 | 251 | ||
@@ -927,7 +927,7 @@ STATIC void | |||
927 | xfs_diflags_to_linux( | 927 | xfs_diflags_to_linux( |
928 | struct xfs_inode *ip) | 928 | struct xfs_inode *ip) |
929 | { | 929 | { |
930 | struct inode *inode = XFS_ITOV(ip); | 930 | struct inode *inode = VFS_I(ip); |
931 | unsigned int xflags = xfs_ip2xflags(ip); | 931 | unsigned int xflags = xfs_ip2xflags(ip); |
932 | 932 | ||
933 | if (xflags & XFS_XFLAG_IMMUTABLE) | 933 | if (xflags & XFS_XFLAG_IMMUTABLE) |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index e88f51028086..095d271f3434 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -62,7 +62,7 @@ void | |||
62 | xfs_synchronize_atime( | 62 | xfs_synchronize_atime( |
63 | xfs_inode_t *ip) | 63 | xfs_inode_t *ip) |
64 | { | 64 | { |
65 | struct inode *inode = ip->i_vnode; | 65 | struct inode *inode = VFS_I(ip); |
66 | 66 | ||
67 | if (inode) { | 67 | if (inode) { |
68 | ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; | 68 | ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; |
@@ -79,7 +79,7 @@ void | |||
79 | xfs_mark_inode_dirty_sync( | 79 | xfs_mark_inode_dirty_sync( |
80 | xfs_inode_t *ip) | 80 | xfs_inode_t *ip) |
81 | { | 81 | { |
82 | struct inode *inode = ip->i_vnode; | 82 | struct inode *inode = VFS_I(ip); |
83 | 83 | ||
84 | if (inode) | 84 | if (inode) |
85 | mark_inode_dirty_sync(inode); | 85 | mark_inode_dirty_sync(inode); |
@@ -89,36 +89,31 @@ xfs_mark_inode_dirty_sync( | |||
89 | * Change the requested timestamp in the given inode. | 89 | * Change the requested timestamp in the given inode. |
90 | * We don't lock across timestamp updates, and we don't log them but | 90 | * We don't lock across timestamp updates, and we don't log them but |
91 | * we do record the fact that there is dirty information in core. | 91 | * we do record the fact that there is dirty information in core. |
92 | * | ||
93 | * NOTE -- callers MUST combine XFS_ICHGTIME_MOD or XFS_ICHGTIME_CHG | ||
94 | * with XFS_ICHGTIME_ACC to be sure that access time | ||
95 | * update will take. Calling first with XFS_ICHGTIME_ACC | ||
96 | * and then XFS_ICHGTIME_MOD may fail to modify the access | ||
97 | * timestamp if the filesystem is mounted noacctm. | ||
98 | */ | 92 | */ |
99 | void | 93 | void |
100 | xfs_ichgtime( | 94 | xfs_ichgtime( |
101 | xfs_inode_t *ip, | 95 | xfs_inode_t *ip, |
102 | int flags) | 96 | int flags) |
103 | { | 97 | { |
104 | struct inode *inode = vn_to_inode(XFS_ITOV(ip)); | 98 | struct inode *inode = VFS_I(ip); |
105 | timespec_t tv; | 99 | timespec_t tv; |
100 | int sync_it = 0; | ||
101 | |||
102 | tv = current_fs_time(inode->i_sb); | ||
106 | 103 | ||
107 | nanotime(&tv); | 104 | if ((flags & XFS_ICHGTIME_MOD) && |
108 | if (flags & XFS_ICHGTIME_MOD) { | 105 | !timespec_equal(&inode->i_mtime, &tv)) { |
109 | inode->i_mtime = tv; | 106 | inode->i_mtime = tv; |
110 | ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; | 107 | ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; |
111 | ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; | 108 | ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; |
109 | sync_it = 1; | ||
112 | } | 110 | } |
113 | if (flags & XFS_ICHGTIME_ACC) { | 111 | if ((flags & XFS_ICHGTIME_CHG) && |
114 | inode->i_atime = tv; | 112 | !timespec_equal(&inode->i_ctime, &tv)) { |
115 | ip->i_d.di_atime.t_sec = (__int32_t)tv.tv_sec; | ||
116 | ip->i_d.di_atime.t_nsec = (__int32_t)tv.tv_nsec; | ||
117 | } | ||
118 | if (flags & XFS_ICHGTIME_CHG) { | ||
119 | inode->i_ctime = tv; | 113 | inode->i_ctime = tv; |
120 | ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec; | 114 | ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec; |
121 | ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec; | 115 | ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec; |
116 | sync_it = 1; | ||
122 | } | 117 | } |
123 | 118 | ||
124 | /* | 119 | /* |
@@ -130,55 +125,11 @@ xfs_ichgtime( | |||
130 | * ensure that the compiler does not reorder the update | 125 | * ensure that the compiler does not reorder the update |
131 | * of i_update_core above the timestamp updates above. | 126 | * of i_update_core above the timestamp updates above. |
132 | */ | 127 | */ |
133 | SYNCHRONIZE(); | 128 | if (sync_it) { |
134 | ip->i_update_core = 1; | 129 | SYNCHRONIZE(); |
135 | if (!(inode->i_state & I_NEW)) | 130 | ip->i_update_core = 1; |
136 | mark_inode_dirty_sync(inode); | 131 | mark_inode_dirty_sync(inode); |
137 | } | ||
138 | |||
139 | /* | ||
140 | * Variant on the above which avoids querying the system clock | ||
141 | * in situations where we know the Linux inode timestamps have | ||
142 | * just been updated (and so we can update our inode cheaply). | ||
143 | */ | ||
144 | void | ||
145 | xfs_ichgtime_fast( | ||
146 | xfs_inode_t *ip, | ||
147 | struct inode *inode, | ||
148 | int flags) | ||
149 | { | ||
150 | timespec_t *tvp; | ||
151 | |||
152 | /* | ||
153 | * Atime updates for read() & friends are handled lazily now, and | ||
154 | * explicit updates must go through xfs_ichgtime() | ||
155 | */ | ||
156 | ASSERT((flags & XFS_ICHGTIME_ACC) == 0); | ||
157 | |||
158 | if (flags & XFS_ICHGTIME_MOD) { | ||
159 | tvp = &inode->i_mtime; | ||
160 | ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec; | ||
161 | ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec; | ||
162 | } | 132 | } |
163 | if (flags & XFS_ICHGTIME_CHG) { | ||
164 | tvp = &inode->i_ctime; | ||
165 | ip->i_d.di_ctime.t_sec = (__int32_t)tvp->tv_sec; | ||
166 | ip->i_d.di_ctime.t_nsec = (__int32_t)tvp->tv_nsec; | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * We update the i_update_core field _after_ changing | ||
171 | * the timestamps in order to coordinate properly with | ||
172 | * xfs_iflush() so that we don't lose timestamp updates. | ||
173 | * This keeps us from having to hold the inode lock | ||
174 | * while doing this. We use the SYNCHRONIZE macro to | ||
175 | * ensure that the compiler does not reorder the update | ||
176 | * of i_update_core above the timestamp updates above. | ||
177 | */ | ||
178 | SYNCHRONIZE(); | ||
179 | ip->i_update_core = 1; | ||
180 | if (!(inode->i_state & I_NEW)) | ||
181 | mark_inode_dirty_sync(inode); | ||
182 | } | 133 | } |
183 | 134 | ||
184 | /* | 135 | /* |
@@ -299,7 +250,7 @@ xfs_vn_mknod( | |||
299 | if (unlikely(error)) | 250 | if (unlikely(error)) |
300 | goto out_free_acl; | 251 | goto out_free_acl; |
301 | 252 | ||
302 | inode = ip->i_vnode; | 253 | inode = VFS_I(ip); |
303 | 254 | ||
304 | error = xfs_init_security(inode, dir); | 255 | error = xfs_init_security(inode, dir); |
305 | if (unlikely(error)) | 256 | if (unlikely(error)) |
@@ -366,7 +317,7 @@ xfs_vn_lookup( | |||
366 | return NULL; | 317 | return NULL; |
367 | } | 318 | } |
368 | 319 | ||
369 | return d_splice_alias(cip->i_vnode, dentry); | 320 | return d_splice_alias(VFS_I(cip), dentry); |
370 | } | 321 | } |
371 | 322 | ||
372 | STATIC struct dentry * | 323 | STATIC struct dentry * |
@@ -399,12 +350,12 @@ xfs_vn_ci_lookup( | |||
399 | 350 | ||
400 | /* if exact match, just splice and exit */ | 351 | /* if exact match, just splice and exit */ |
401 | if (!ci_name.name) | 352 | if (!ci_name.name) |
402 | return d_splice_alias(ip->i_vnode, dentry); | 353 | return d_splice_alias(VFS_I(ip), dentry); |
403 | 354 | ||
404 | /* else case-insensitive match... */ | 355 | /* else case-insensitive match... */ |
405 | dname.name = ci_name.name; | 356 | dname.name = ci_name.name; |
406 | dname.len = ci_name.len; | 357 | dname.len = ci_name.len; |
407 | dentry = d_add_ci(ip->i_vnode, dentry, &dname); | 358 | dentry = d_add_ci(dentry, VFS_I(ip), &dname); |
408 | kmem_free(ci_name.name); | 359 | kmem_free(ci_name.name); |
409 | return dentry; | 360 | return dentry; |
410 | } | 361 | } |
@@ -478,7 +429,7 @@ xfs_vn_symlink( | |||
478 | if (unlikely(error)) | 429 | if (unlikely(error)) |
479 | goto out; | 430 | goto out; |
480 | 431 | ||
481 | inode = cip->i_vnode; | 432 | inode = VFS_I(cip); |
482 | 433 | ||
483 | error = xfs_init_security(inode, dir); | 434 | error = xfs_init_security(inode, dir); |
484 | if (unlikely(error)) | 435 | if (unlikely(error)) |
@@ -710,7 +661,7 @@ out_error: | |||
710 | return error; | 661 | return error; |
711 | } | 662 | } |
712 | 663 | ||
713 | const struct inode_operations xfs_inode_operations = { | 664 | static const struct inode_operations xfs_inode_operations = { |
714 | .permission = xfs_vn_permission, | 665 | .permission = xfs_vn_permission, |
715 | .truncate = xfs_vn_truncate, | 666 | .truncate = xfs_vn_truncate, |
716 | .getattr = xfs_vn_getattr, | 667 | .getattr = xfs_vn_getattr, |
@@ -722,7 +673,7 @@ const struct inode_operations xfs_inode_operations = { | |||
722 | .fallocate = xfs_vn_fallocate, | 673 | .fallocate = xfs_vn_fallocate, |
723 | }; | 674 | }; |
724 | 675 | ||
725 | const struct inode_operations xfs_dir_inode_operations = { | 676 | static const struct inode_operations xfs_dir_inode_operations = { |
726 | .create = xfs_vn_create, | 677 | .create = xfs_vn_create, |
727 | .lookup = xfs_vn_lookup, | 678 | .lookup = xfs_vn_lookup, |
728 | .link = xfs_vn_link, | 679 | .link = xfs_vn_link, |
@@ -747,7 +698,7 @@ const struct inode_operations xfs_dir_inode_operations = { | |||
747 | .listxattr = xfs_vn_listxattr, | 698 | .listxattr = xfs_vn_listxattr, |
748 | }; | 699 | }; |
749 | 700 | ||
750 | const struct inode_operations xfs_dir_ci_inode_operations = { | 701 | static const struct inode_operations xfs_dir_ci_inode_operations = { |
751 | .create = xfs_vn_create, | 702 | .create = xfs_vn_create, |
752 | .lookup = xfs_vn_ci_lookup, | 703 | .lookup = xfs_vn_ci_lookup, |
753 | .link = xfs_vn_link, | 704 | .link = xfs_vn_link, |
@@ -772,7 +723,7 @@ const struct inode_operations xfs_dir_ci_inode_operations = { | |||
772 | .listxattr = xfs_vn_listxattr, | 723 | .listxattr = xfs_vn_listxattr, |
773 | }; | 724 | }; |
774 | 725 | ||
775 | const struct inode_operations xfs_symlink_inode_operations = { | 726 | static const struct inode_operations xfs_symlink_inode_operations = { |
776 | .readlink = generic_readlink, | 727 | .readlink = generic_readlink, |
777 | .follow_link = xfs_vn_follow_link, | 728 | .follow_link = xfs_vn_follow_link, |
778 | .put_link = xfs_vn_put_link, | 729 | .put_link = xfs_vn_put_link, |
@@ -784,3 +735,98 @@ const struct inode_operations xfs_symlink_inode_operations = { | |||
784 | .removexattr = generic_removexattr, | 735 | .removexattr = generic_removexattr, |
785 | .listxattr = xfs_vn_listxattr, | 736 | .listxattr = xfs_vn_listxattr, |
786 | }; | 737 | }; |
738 | |||
739 | STATIC void | ||
740 | xfs_diflags_to_iflags( | ||
741 | struct inode *inode, | ||
742 | struct xfs_inode *ip) | ||
743 | { | ||
744 | if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) | ||
745 | inode->i_flags |= S_IMMUTABLE; | ||
746 | else | ||
747 | inode->i_flags &= ~S_IMMUTABLE; | ||
748 | if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) | ||
749 | inode->i_flags |= S_APPEND; | ||
750 | else | ||
751 | inode->i_flags &= ~S_APPEND; | ||
752 | if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) | ||
753 | inode->i_flags |= S_SYNC; | ||
754 | else | ||
755 | inode->i_flags &= ~S_SYNC; | ||
756 | if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) | ||
757 | inode->i_flags |= S_NOATIME; | ||
758 | else | ||
759 | inode->i_flags &= ~S_NOATIME; | ||
760 | } | ||
761 | |||
762 | /* | ||
763 | * Initialize the Linux inode, set up the operation vectors and | ||
764 | * unlock the inode. | ||
765 | * | ||
766 | * When reading existing inodes from disk this is called directly | ||
767 | * from xfs_iget, when creating a new inode it is called from | ||
768 | * xfs_ialloc after setting up the inode. | ||
769 | */ | ||
770 | void | ||
771 | xfs_setup_inode( | ||
772 | struct xfs_inode *ip) | ||
773 | { | ||
774 | struct inode *inode = ip->i_vnode; | ||
775 | |||
776 | inode->i_mode = ip->i_d.di_mode; | ||
777 | inode->i_nlink = ip->i_d.di_nlink; | ||
778 | inode->i_uid = ip->i_d.di_uid; | ||
779 | inode->i_gid = ip->i_d.di_gid; | ||
780 | |||
781 | switch (inode->i_mode & S_IFMT) { | ||
782 | case S_IFBLK: | ||
783 | case S_IFCHR: | ||
784 | inode->i_rdev = | ||
785 | MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, | ||
786 | sysv_minor(ip->i_df.if_u2.if_rdev)); | ||
787 | break; | ||
788 | default: | ||
789 | inode->i_rdev = 0; | ||
790 | break; | ||
791 | } | ||
792 | |||
793 | inode->i_generation = ip->i_d.di_gen; | ||
794 | i_size_write(inode, ip->i_d.di_size); | ||
795 | inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; | ||
796 | inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; | ||
797 | inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; | ||
798 | inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; | ||
799 | inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; | ||
800 | inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; | ||
801 | xfs_diflags_to_iflags(inode, ip); | ||
802 | xfs_iflags_clear(ip, XFS_IMODIFIED); | ||
803 | |||
804 | switch (inode->i_mode & S_IFMT) { | ||
805 | case S_IFREG: | ||
806 | inode->i_op = &xfs_inode_operations; | ||
807 | inode->i_fop = &xfs_file_operations; | ||
808 | inode->i_mapping->a_ops = &xfs_address_space_operations; | ||
809 | break; | ||
810 | case S_IFDIR: | ||
811 | if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) | ||
812 | inode->i_op = &xfs_dir_ci_inode_operations; | ||
813 | else | ||
814 | inode->i_op = &xfs_dir_inode_operations; | ||
815 | inode->i_fop = &xfs_dir_file_operations; | ||
816 | break; | ||
817 | case S_IFLNK: | ||
818 | inode->i_op = &xfs_symlink_inode_operations; | ||
819 | if (!(ip->i_df.if_flags & XFS_IFINLINE)) | ||
820 | inode->i_mapping->a_ops = &xfs_address_space_operations; | ||
821 | break; | ||
822 | default: | ||
823 | inode->i_op = &xfs_inode_operations; | ||
824 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | ||
825 | break; | ||
826 | } | ||
827 | |||
828 | xfs_iflags_clear(ip, XFS_INEW); | ||
829 | barrier(); | ||
830 | |||
831 | unlock_new_inode(inode); | ||
832 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h index d97ba934a2ac..8b1a1e31dc21 100644 --- a/fs/xfs/linux-2.6/xfs_iops.h +++ b/fs/xfs/linux-2.6/xfs_iops.h | |||
@@ -18,10 +18,7 @@ | |||
18 | #ifndef __XFS_IOPS_H__ | 18 | #ifndef __XFS_IOPS_H__ |
19 | #define __XFS_IOPS_H__ | 19 | #define __XFS_IOPS_H__ |
20 | 20 | ||
21 | extern const struct inode_operations xfs_inode_operations; | 21 | struct xfs_inode; |
22 | extern const struct inode_operations xfs_dir_inode_operations; | ||
23 | extern const struct inode_operations xfs_dir_ci_inode_operations; | ||
24 | extern const struct inode_operations xfs_symlink_inode_operations; | ||
25 | 22 | ||
26 | extern const struct file_operations xfs_file_operations; | 23 | extern const struct file_operations xfs_file_operations; |
27 | extern const struct file_operations xfs_dir_file_operations; | 24 | extern const struct file_operations xfs_dir_file_operations; |
@@ -29,14 +26,6 @@ extern const struct file_operations xfs_invis_file_operations; | |||
29 | 26 | ||
30 | extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); | 27 | extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); |
31 | 28 | ||
32 | struct xfs_inode; | 29 | extern void xfs_setup_inode(struct xfs_inode *); |
33 | extern void xfs_ichgtime(struct xfs_inode *, int); | ||
34 | extern void xfs_ichgtime_fast(struct xfs_inode *, struct inode *, int); | ||
35 | |||
36 | #define xfs_vtoi(vp) \ | ||
37 | ((struct xfs_inode *)vn_to_inode(vp)->i_private) | ||
38 | |||
39 | #define XFS_I(inode) \ | ||
40 | ((struct xfs_inode *)(inode)->i_private) | ||
41 | 30 | ||
42 | #endif /* __XFS_IOPS_H__ */ | 31 | #endif /* __XFS_IOPS_H__ */ |
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 4d45d9351a6c..cc0f7b3a9795 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h | |||
@@ -45,13 +45,13 @@ | |||
45 | #include <mrlock.h> | 45 | #include <mrlock.h> |
46 | #include <sv.h> | 46 | #include <sv.h> |
47 | #include <mutex.h> | 47 | #include <mutex.h> |
48 | #include <sema.h> | ||
49 | #include <time.h> | 48 | #include <time.h> |
50 | 49 | ||
51 | #include <support/ktrace.h> | 50 | #include <support/ktrace.h> |
52 | #include <support/debug.h> | 51 | #include <support/debug.h> |
53 | #include <support/uuid.h> | 52 | #include <support/uuid.h> |
54 | 53 | ||
54 | #include <linux/semaphore.h> | ||
55 | #include <linux/mm.h> | 55 | #include <linux/mm.h> |
56 | #include <linux/kernel.h> | 56 | #include <linux/kernel.h> |
57 | #include <linux/blkdev.h> | 57 | #include <linux/blkdev.h> |
@@ -126,8 +126,6 @@ | |||
126 | 126 | ||
127 | #define current_cpu() (raw_smp_processor_id()) | 127 | #define current_cpu() (raw_smp_processor_id()) |
128 | #define current_pid() (current->pid) | 128 | #define current_pid() (current->pid) |
129 | #define current_fsuid(cred) (current->fsuid) | ||
130 | #define current_fsgid(cred) (current->fsgid) | ||
131 | #define current_test_flags(f) (current->flags & (f)) | 129 | #define current_test_flags(f) (current->flags & (f)) |
132 | #define current_set_flags_nested(sp, f) \ | 130 | #define current_set_flags_nested(sp, f) \ |
133 | (*(sp) = current->flags, current->flags |= (f)) | 131 | (*(sp) = current->flags, current->flags |= (f)) |
@@ -180,7 +178,7 @@ | |||
180 | #define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL) | 178 | #define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL) |
181 | #define xfs_stack_trace() dump_stack() | 179 | #define xfs_stack_trace() dump_stack() |
182 | #define xfs_itruncate_data(ip, off) \ | 180 | #define xfs_itruncate_data(ip, off) \ |
183 | (-vmtruncate(vn_to_inode(XFS_ITOV(ip)), (off))) | 181 | (-vmtruncate(VFS_I(ip), (off))) |
184 | 182 | ||
185 | 183 | ||
186 | /* Move the kernel do_div definition off to one side */ | 184 | /* Move the kernel do_div definition off to one side */ |
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 82333b3e118e..1957e5357d04 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c | |||
@@ -137,7 +137,7 @@ xfs_iozero( | |||
137 | struct address_space *mapping; | 137 | struct address_space *mapping; |
138 | int status; | 138 | int status; |
139 | 139 | ||
140 | mapping = ip->i_vnode->i_mapping; | 140 | mapping = VFS_I(ip)->i_mapping; |
141 | do { | 141 | do { |
142 | unsigned offset, bytes; | 142 | unsigned offset, bytes; |
143 | void *fsdata; | 143 | void *fsdata; |
@@ -674,9 +674,7 @@ start: | |||
674 | */ | 674 | */ |
675 | if (likely(!(ioflags & IO_INVIS) && | 675 | if (likely(!(ioflags & IO_INVIS) && |
676 | !mnt_want_write(file->f_path.mnt))) { | 676 | !mnt_want_write(file->f_path.mnt))) { |
677 | file_update_time(file); | 677 | xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
678 | xfs_ichgtime_fast(xip, inode, | ||
679 | XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
680 | mnt_drop_write(file->f_path.mnt); | 678 | mnt_drop_write(file->f_path.mnt); |
681 | } | 679 | } |
682 | 680 | ||
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 30ae96397e31..7227b2efef22 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -158,7 +158,7 @@ enum { | |||
158 | Opt_barrier, Opt_nobarrier, Opt_err | 158 | Opt_barrier, Opt_nobarrier, Opt_err |
159 | }; | 159 | }; |
160 | 160 | ||
161 | static match_table_t tokens = { | 161 | static const match_table_t tokens = { |
162 | {Opt_barrier, "barrier"}, | 162 | {Opt_barrier, "barrier"}, |
163 | {Opt_nobarrier, "nobarrier"}, | 163 | {Opt_nobarrier, "nobarrier"}, |
164 | {Opt_err, NULL} | 164 | {Opt_err, NULL} |
@@ -581,118 +581,6 @@ xfs_max_file_offset( | |||
581 | return (((__uint64_t)pagefactor) << bitshift) - 1; | 581 | return (((__uint64_t)pagefactor) << bitshift) - 1; |
582 | } | 582 | } |
583 | 583 | ||
584 | STATIC_INLINE void | ||
585 | xfs_set_inodeops( | ||
586 | struct inode *inode) | ||
587 | { | ||
588 | switch (inode->i_mode & S_IFMT) { | ||
589 | case S_IFREG: | ||
590 | inode->i_op = &xfs_inode_operations; | ||
591 | inode->i_fop = &xfs_file_operations; | ||
592 | inode->i_mapping->a_ops = &xfs_address_space_operations; | ||
593 | break; | ||
594 | case S_IFDIR: | ||
595 | if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) | ||
596 | inode->i_op = &xfs_dir_ci_inode_operations; | ||
597 | else | ||
598 | inode->i_op = &xfs_dir_inode_operations; | ||
599 | inode->i_fop = &xfs_dir_file_operations; | ||
600 | break; | ||
601 | case S_IFLNK: | ||
602 | inode->i_op = &xfs_symlink_inode_operations; | ||
603 | if (!(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE)) | ||
604 | inode->i_mapping->a_ops = &xfs_address_space_operations; | ||
605 | break; | ||
606 | default: | ||
607 | inode->i_op = &xfs_inode_operations; | ||
608 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | ||
609 | break; | ||
610 | } | ||
611 | } | ||
612 | |||
613 | STATIC_INLINE void | ||
614 | xfs_revalidate_inode( | ||
615 | xfs_mount_t *mp, | ||
616 | bhv_vnode_t *vp, | ||
617 | xfs_inode_t *ip) | ||
618 | { | ||
619 | struct inode *inode = vn_to_inode(vp); | ||
620 | |||
621 | inode->i_mode = ip->i_d.di_mode; | ||
622 | inode->i_nlink = ip->i_d.di_nlink; | ||
623 | inode->i_uid = ip->i_d.di_uid; | ||
624 | inode->i_gid = ip->i_d.di_gid; | ||
625 | |||
626 | switch (inode->i_mode & S_IFMT) { | ||
627 | case S_IFBLK: | ||
628 | case S_IFCHR: | ||
629 | inode->i_rdev = | ||
630 | MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, | ||
631 | sysv_minor(ip->i_df.if_u2.if_rdev)); | ||
632 | break; | ||
633 | default: | ||
634 | inode->i_rdev = 0; | ||
635 | break; | ||
636 | } | ||
637 | |||
638 | inode->i_generation = ip->i_d.di_gen; | ||
639 | i_size_write(inode, ip->i_d.di_size); | ||
640 | inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; | ||
641 | inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; | ||
642 | inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; | ||
643 | inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; | ||
644 | inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; | ||
645 | inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; | ||
646 | if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) | ||
647 | inode->i_flags |= S_IMMUTABLE; | ||
648 | else | ||
649 | inode->i_flags &= ~S_IMMUTABLE; | ||
650 | if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) | ||
651 | inode->i_flags |= S_APPEND; | ||
652 | else | ||
653 | inode->i_flags &= ~S_APPEND; | ||
654 | if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) | ||
655 | inode->i_flags |= S_SYNC; | ||
656 | else | ||
657 | inode->i_flags &= ~S_SYNC; | ||
658 | if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) | ||
659 | inode->i_flags |= S_NOATIME; | ||
660 | else | ||
661 | inode->i_flags &= ~S_NOATIME; | ||
662 | xfs_iflags_clear(ip, XFS_IMODIFIED); | ||
663 | } | ||
664 | |||
665 | void | ||
666 | xfs_initialize_vnode( | ||
667 | struct xfs_mount *mp, | ||
668 | bhv_vnode_t *vp, | ||
669 | struct xfs_inode *ip) | ||
670 | { | ||
671 | struct inode *inode = vn_to_inode(vp); | ||
672 | |||
673 | if (!ip->i_vnode) { | ||
674 | ip->i_vnode = vp; | ||
675 | inode->i_private = ip; | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * We need to set the ops vectors, and unlock the inode, but if | ||
680 | * we have been called during the new inode create process, it is | ||
681 | * too early to fill in the Linux inode. We will get called a | ||
682 | * second time once the inode is properly set up, and then we can | ||
683 | * finish our work. | ||
684 | */ | ||
685 | if (ip->i_d.di_mode != 0 && (inode->i_state & I_NEW)) { | ||
686 | xfs_revalidate_inode(mp, vp, ip); | ||
687 | xfs_set_inodeops(inode); | ||
688 | |||
689 | xfs_iflags_clear(ip, XFS_INEW); | ||
690 | barrier(); | ||
691 | |||
692 | unlock_new_inode(inode); | ||
693 | } | ||
694 | } | ||
695 | |||
696 | int | 584 | int |
697 | xfs_blkdev_get( | 585 | xfs_blkdev_get( |
698 | xfs_mount_t *mp, | 586 | xfs_mount_t *mp, |
@@ -982,26 +870,21 @@ STATIC struct inode * | |||
982 | xfs_fs_alloc_inode( | 870 | xfs_fs_alloc_inode( |
983 | struct super_block *sb) | 871 | struct super_block *sb) |
984 | { | 872 | { |
985 | bhv_vnode_t *vp; | 873 | return kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP); |
986 | |||
987 | vp = kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP); | ||
988 | if (unlikely(!vp)) | ||
989 | return NULL; | ||
990 | return vn_to_inode(vp); | ||
991 | } | 874 | } |
992 | 875 | ||
993 | STATIC void | 876 | STATIC void |
994 | xfs_fs_destroy_inode( | 877 | xfs_fs_destroy_inode( |
995 | struct inode *inode) | 878 | struct inode *inode) |
996 | { | 879 | { |
997 | kmem_zone_free(xfs_vnode_zone, vn_from_inode(inode)); | 880 | kmem_zone_free(xfs_vnode_zone, inode); |
998 | } | 881 | } |
999 | 882 | ||
1000 | STATIC void | 883 | STATIC void |
1001 | xfs_fs_inode_init_once( | 884 | xfs_fs_inode_init_once( |
1002 | void *vnode) | 885 | void *vnode) |
1003 | { | 886 | { |
1004 | inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); | 887 | inode_init_once((struct inode *)vnode); |
1005 | } | 888 | } |
1006 | 889 | ||
1007 | /* | 890 | /* |
@@ -1106,7 +989,7 @@ void | |||
1106 | xfs_flush_inode( | 989 | xfs_flush_inode( |
1107 | xfs_inode_t *ip) | 990 | xfs_inode_t *ip) |
1108 | { | 991 | { |
1109 | struct inode *inode = ip->i_vnode; | 992 | struct inode *inode = VFS_I(ip); |
1110 | 993 | ||
1111 | igrab(inode); | 994 | igrab(inode); |
1112 | xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work); | 995 | xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work); |
@@ -1131,7 +1014,7 @@ void | |||
1131 | xfs_flush_device( | 1014 | xfs_flush_device( |
1132 | xfs_inode_t *ip) | 1015 | xfs_inode_t *ip) |
1133 | { | 1016 | { |
1134 | struct inode *inode = vn_to_inode(XFS_ITOV(ip)); | 1017 | struct inode *inode = VFS_I(ip); |
1135 | 1018 | ||
1136 | igrab(inode); | 1019 | igrab(inode); |
1137 | xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); | 1020 | xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); |
@@ -1201,6 +1084,15 @@ xfssyncd( | |||
1201 | } | 1084 | } |
1202 | 1085 | ||
1203 | STATIC void | 1086 | STATIC void |
1087 | xfs_free_fsname( | ||
1088 | struct xfs_mount *mp) | ||
1089 | { | ||
1090 | kfree(mp->m_fsname); | ||
1091 | kfree(mp->m_rtname); | ||
1092 | kfree(mp->m_logname); | ||
1093 | } | ||
1094 | |||
1095 | STATIC void | ||
1204 | xfs_fs_put_super( | 1096 | xfs_fs_put_super( |
1205 | struct super_block *sb) | 1097 | struct super_block *sb) |
1206 | { | 1098 | { |
@@ -1239,8 +1131,6 @@ xfs_fs_put_super( | |||
1239 | error = xfs_unmount_flush(mp, 0); | 1131 | error = xfs_unmount_flush(mp, 0); |
1240 | WARN_ON(error); | 1132 | WARN_ON(error); |
1241 | 1133 | ||
1242 | IRELE(rip); | ||
1243 | |||
1244 | /* | 1134 | /* |
1245 | * If we're forcing a shutdown, typically because of a media error, | 1135 | * If we're forcing a shutdown, typically because of a media error, |
1246 | * we want to make sure we invalidate dirty pages that belong to | 1136 | * we want to make sure we invalidate dirty pages that belong to |
@@ -1257,10 +1147,12 @@ xfs_fs_put_super( | |||
1257 | } | 1147 | } |
1258 | 1148 | ||
1259 | xfs_unmountfs(mp); | 1149 | xfs_unmountfs(mp); |
1150 | xfs_freesb(mp); | ||
1260 | xfs_icsb_destroy_counters(mp); | 1151 | xfs_icsb_destroy_counters(mp); |
1261 | xfs_close_devices(mp); | 1152 | xfs_close_devices(mp); |
1262 | xfs_qmops_put(mp); | 1153 | xfs_qmops_put(mp); |
1263 | xfs_dmops_put(mp); | 1154 | xfs_dmops_put(mp); |
1155 | xfs_free_fsname(mp); | ||
1264 | kfree(mp); | 1156 | kfree(mp); |
1265 | } | 1157 | } |
1266 | 1158 | ||
@@ -1410,9 +1302,29 @@ xfs_fs_remount( | |||
1410 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | 1302 | mp->m_flags &= ~XFS_MOUNT_BARRIER; |
1411 | break; | 1303 | break; |
1412 | default: | 1304 | default: |
1305 | /* | ||
1306 | * Logically we would return an error here to prevent | ||
1307 | * users from believing they might have changed | ||
1308 | * mount options using remount which can't be changed. | ||
1309 | * | ||
1310 | * But unfortunately mount(8) adds all options from | ||
1311 | * mtab and fstab to the mount arguments in some cases | ||
1312 | * so we can't blindly reject options, but have to | ||
1313 | * check for each specified option if it actually | ||
1314 | * differs from the currently set option and only | ||
1315 | * reject it if that's the case. | ||
1316 | * | ||
1317 | * Until that is implemented we return success for | ||
1318 | * every remount request, and silently ignore all | ||
1319 | * options that we can't actually change. | ||
1320 | */ | ||
1321 | #if 0 | ||
1413 | printk(KERN_INFO | 1322 | printk(KERN_INFO |
1414 | "XFS: mount option \"%s\" not supported for remount\n", p); | 1323 | "XFS: mount option \"%s\" not supported for remount\n", p); |
1415 | return -EINVAL; | 1324 | return -EINVAL; |
1325 | #else | ||
1326 | return 0; | ||
1327 | #endif | ||
1416 | } | 1328 | } |
1417 | } | 1329 | } |
1418 | 1330 | ||
@@ -1517,6 +1429,8 @@ xfs_start_flags( | |||
1517 | struct xfs_mount_args *ap, | 1429 | struct xfs_mount_args *ap, |
1518 | struct xfs_mount *mp) | 1430 | struct xfs_mount *mp) |
1519 | { | 1431 | { |
1432 | int error; | ||
1433 | |||
1520 | /* Values are in BBs */ | 1434 | /* Values are in BBs */ |
1521 | if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { | 1435 | if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { |
1522 | /* | 1436 | /* |
@@ -1549,17 +1463,27 @@ xfs_start_flags( | |||
1549 | ap->logbufsize); | 1463 | ap->logbufsize); |
1550 | return XFS_ERROR(EINVAL); | 1464 | return XFS_ERROR(EINVAL); |
1551 | } | 1465 | } |
1466 | |||
1467 | error = ENOMEM; | ||
1468 | |||
1552 | mp->m_logbsize = ap->logbufsize; | 1469 | mp->m_logbsize = ap->logbufsize; |
1553 | mp->m_fsname_len = strlen(ap->fsname) + 1; | 1470 | mp->m_fsname_len = strlen(ap->fsname) + 1; |
1554 | mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP); | 1471 | |
1555 | strcpy(mp->m_fsname, ap->fsname); | 1472 | mp->m_fsname = kstrdup(ap->fsname, GFP_KERNEL); |
1473 | if (!mp->m_fsname) | ||
1474 | goto out; | ||
1475 | |||
1556 | if (ap->rtname[0]) { | 1476 | if (ap->rtname[0]) { |
1557 | mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP); | 1477 | mp->m_rtname = kstrdup(ap->rtname, GFP_KERNEL); |
1558 | strcpy(mp->m_rtname, ap->rtname); | 1478 | if (!mp->m_rtname) |
1479 | goto out_free_fsname; | ||
1480 | |||
1559 | } | 1481 | } |
1482 | |||
1560 | if (ap->logname[0]) { | 1483 | if (ap->logname[0]) { |
1561 | mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP); | 1484 | mp->m_logname = kstrdup(ap->logname, GFP_KERNEL); |
1562 | strcpy(mp->m_logname, ap->logname); | 1485 | if (!mp->m_logname) |
1486 | goto out_free_rtname; | ||
1563 | } | 1487 | } |
1564 | 1488 | ||
1565 | if (ap->flags & XFSMNT_WSYNC) | 1489 | if (ap->flags & XFSMNT_WSYNC) |
@@ -1632,6 +1556,14 @@ xfs_start_flags( | |||
1632 | if (ap->flags & XFSMNT_DMAPI) | 1556 | if (ap->flags & XFSMNT_DMAPI) |
1633 | mp->m_flags |= XFS_MOUNT_DMAPI; | 1557 | mp->m_flags |= XFS_MOUNT_DMAPI; |
1634 | return 0; | 1558 | return 0; |
1559 | |||
1560 | |||
1561 | out_free_rtname: | ||
1562 | kfree(mp->m_rtname); | ||
1563 | out_free_fsname: | ||
1564 | kfree(mp->m_fsname); | ||
1565 | out: | ||
1566 | return error; | ||
1635 | } | 1567 | } |
1636 | 1568 | ||
1637 | /* | 1569 | /* |
@@ -1792,10 +1724,10 @@ xfs_fs_fill_super( | |||
1792 | */ | 1724 | */ |
1793 | error = xfs_start_flags(args, mp); | 1725 | error = xfs_start_flags(args, mp); |
1794 | if (error) | 1726 | if (error) |
1795 | goto out_destroy_counters; | 1727 | goto out_free_fsname; |
1796 | error = xfs_readsb(mp, flags); | 1728 | error = xfs_readsb(mp, flags); |
1797 | if (error) | 1729 | if (error) |
1798 | goto out_destroy_counters; | 1730 | goto out_free_fsname; |
1799 | error = xfs_finish_flags(args, mp); | 1731 | error = xfs_finish_flags(args, mp); |
1800 | if (error) | 1732 | if (error) |
1801 | goto out_free_sb; | 1733 | goto out_free_sb; |
@@ -1811,7 +1743,7 @@ xfs_fs_fill_super( | |||
1811 | if (error) | 1743 | if (error) |
1812 | goto out_free_sb; | 1744 | goto out_free_sb; |
1813 | 1745 | ||
1814 | error = xfs_mountfs(mp, flags); | 1746 | error = xfs_mountfs(mp); |
1815 | if (error) | 1747 | if (error) |
1816 | goto out_filestream_unmount; | 1748 | goto out_filestream_unmount; |
1817 | 1749 | ||
@@ -1825,7 +1757,7 @@ xfs_fs_fill_super( | |||
1825 | sb->s_time_gran = 1; | 1757 | sb->s_time_gran = 1; |
1826 | set_posix_acl_flag(sb); | 1758 | set_posix_acl_flag(sb); |
1827 | 1759 | ||
1828 | root = igrab(mp->m_rootip->i_vnode); | 1760 | root = igrab(VFS_I(mp->m_rootip)); |
1829 | if (!root) { | 1761 | if (!root) { |
1830 | error = ENOENT; | 1762 | error = ENOENT; |
1831 | goto fail_unmount; | 1763 | goto fail_unmount; |
@@ -1857,7 +1789,8 @@ xfs_fs_fill_super( | |||
1857 | xfs_filestream_unmount(mp); | 1789 | xfs_filestream_unmount(mp); |
1858 | out_free_sb: | 1790 | out_free_sb: |
1859 | xfs_freesb(mp); | 1791 | xfs_freesb(mp); |
1860 | out_destroy_counters: | 1792 | out_free_fsname: |
1793 | xfs_free_fsname(mp); | ||
1861 | xfs_icsb_destroy_counters(mp); | 1794 | xfs_icsb_destroy_counters(mp); |
1862 | xfs_close_devices(mp); | 1795 | xfs_close_devices(mp); |
1863 | out_put_qmops: | 1796 | out_put_qmops: |
@@ -1890,10 +1823,8 @@ xfs_fs_fill_super( | |||
1890 | error = xfs_unmount_flush(mp, 0); | 1823 | error = xfs_unmount_flush(mp, 0); |
1891 | WARN_ON(error); | 1824 | WARN_ON(error); |
1892 | 1825 | ||
1893 | IRELE(mp->m_rootip); | ||
1894 | |||
1895 | xfs_unmountfs(mp); | 1826 | xfs_unmountfs(mp); |
1896 | goto out_destroy_counters; | 1827 | goto out_free_sb; |
1897 | } | 1828 | } |
1898 | 1829 | ||
1899 | STATIC int | 1830 | STATIC int |
@@ -2014,7 +1945,7 @@ xfs_free_trace_bufs(void) | |||
2014 | STATIC int __init | 1945 | STATIC int __init |
2015 | xfs_init_zones(void) | 1946 | xfs_init_zones(void) |
2016 | { | 1947 | { |
2017 | xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode", | 1948 | xfs_vnode_zone = kmem_zone_init_flags(sizeof(struct inode), "xfs_vnode", |
2018 | KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | | 1949 | KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | |
2019 | KM_ZONE_SPREAD, | 1950 | KM_ZONE_SPREAD, |
2020 | xfs_fs_inode_init_once); | 1951 | xfs_fs_inode_init_once); |
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index b7d13da01bd6..fe2ef4e6a0f9 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h | |||
@@ -101,9 +101,6 @@ struct block_device; | |||
101 | 101 | ||
102 | extern __uint64_t xfs_max_file_offset(unsigned int); | 102 | extern __uint64_t xfs_max_file_offset(unsigned int); |
103 | 103 | ||
104 | extern void xfs_initialize_vnode(struct xfs_mount *mp, bhv_vnode_t *vp, | ||
105 | struct xfs_inode *ip); | ||
106 | |||
107 | extern void xfs_flush_inode(struct xfs_inode *); | 104 | extern void xfs_flush_inode(struct xfs_inode *); |
108 | extern void xfs_flush_device(struct xfs_inode *); | 105 | extern void xfs_flush_device(struct xfs_inode *); |
109 | 106 | ||
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 25488b6d9881..b52528bbbfff 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c | |||
@@ -33,7 +33,7 @@ | |||
33 | 33 | ||
34 | 34 | ||
35 | /* | 35 | /* |
36 | * Dedicated vnode inactive/reclaim sync semaphores. | 36 | * Dedicated vnode inactive/reclaim sync wait queues. |
37 | * Prime number of hash buckets since address is used as the key. | 37 | * Prime number of hash buckets since address is used as the key. |
38 | */ | 38 | */ |
39 | #define NVSYNC 37 | 39 | #define NVSYNC 37 |
@@ -82,24 +82,6 @@ vn_ioerror( | |||
82 | xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l); | 82 | xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l); |
83 | } | 83 | } |
84 | 84 | ||
85 | |||
86 | /* | ||
87 | * Add a reference to a referenced vnode. | ||
88 | */ | ||
89 | bhv_vnode_t * | ||
90 | vn_hold( | ||
91 | bhv_vnode_t *vp) | ||
92 | { | ||
93 | struct inode *inode; | ||
94 | |||
95 | XFS_STATS_INC(vn_hold); | ||
96 | |||
97 | inode = igrab(vn_to_inode(vp)); | ||
98 | ASSERT(inode); | ||
99 | |||
100 | return vp; | ||
101 | } | ||
102 | |||
103 | #ifdef XFS_INODE_TRACE | 85 | #ifdef XFS_INODE_TRACE |
104 | 86 | ||
105 | /* | 87 | /* |
@@ -108,7 +90,7 @@ vn_hold( | |||
108 | */ | 90 | */ |
109 | static inline int xfs_icount(struct xfs_inode *ip) | 91 | static inline int xfs_icount(struct xfs_inode *ip) |
110 | { | 92 | { |
111 | bhv_vnode_t *vp = XFS_ITOV_NULL(ip); | 93 | struct inode *vp = VFS_I(ip); |
112 | 94 | ||
113 | if (vp) | 95 | if (vp) |
114 | return vn_count(vp); | 96 | return vn_count(vp); |
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 41ca2cec5d31..683ce16210ff 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h | |||
@@ -22,20 +22,6 @@ struct file; | |||
22 | struct xfs_iomap; | 22 | struct xfs_iomap; |
23 | struct attrlist_cursor_kern; | 23 | struct attrlist_cursor_kern; |
24 | 24 | ||
25 | typedef struct inode bhv_vnode_t; | ||
26 | |||
27 | /* | ||
28 | * Vnode to Linux inode mapping. | ||
29 | */ | ||
30 | static inline bhv_vnode_t *vn_from_inode(struct inode *inode) | ||
31 | { | ||
32 | return inode; | ||
33 | } | ||
34 | static inline struct inode *vn_to_inode(bhv_vnode_t *vnode) | ||
35 | { | ||
36 | return vnode; | ||
37 | } | ||
38 | |||
39 | /* | 25 | /* |
40 | * Return values for xfs_inactive. A return value of | 26 | * Return values for xfs_inactive. A return value of |
41 | * VN_INACTIVE_NOCACHE implies that the file system behavior | 27 | * VN_INACTIVE_NOCACHE implies that the file system behavior |
@@ -76,57 +62,52 @@ extern void vn_iowait(struct xfs_inode *ip); | |||
76 | extern void vn_iowake(struct xfs_inode *ip); | 62 | extern void vn_iowake(struct xfs_inode *ip); |
77 | extern void vn_ioerror(struct xfs_inode *ip, int error, char *f, int l); | 63 | extern void vn_ioerror(struct xfs_inode *ip, int error, char *f, int l); |
78 | 64 | ||
79 | static inline int vn_count(bhv_vnode_t *vp) | 65 | static inline int vn_count(struct inode *vp) |
80 | { | 66 | { |
81 | return atomic_read(&vn_to_inode(vp)->i_count); | 67 | return atomic_read(&vp->i_count); |
82 | } | 68 | } |
83 | 69 | ||
84 | /* | 70 | #define IHOLD(ip) \ |
85 | * Vnode reference counting functions (and macros for compatibility). | 71 | do { \ |
86 | */ | 72 | ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ |
87 | extern bhv_vnode_t *vn_hold(bhv_vnode_t *); | 73 | atomic_inc(&(VFS_I(ip)->i_count)); \ |
74 | xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ | ||
75 | } while (0) | ||
88 | 76 | ||
89 | #if defined(XFS_INODE_TRACE) | 77 | #define IRELE(ip) \ |
90 | #define VN_HOLD(vp) \ | 78 | do { \ |
91 | ((void)vn_hold(vp), \ | 79 | xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ |
92 | xfs_itrace_hold(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address)) | 80 | iput(VFS_I(ip)); \ |
93 | #define VN_RELE(vp) \ | 81 | } while (0) |
94 | (xfs_itrace_rele(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address), \ | ||
95 | iput(vn_to_inode(vp))) | ||
96 | #else | ||
97 | #define VN_HOLD(vp) ((void)vn_hold(vp)) | ||
98 | #define VN_RELE(vp) (iput(vn_to_inode(vp))) | ||
99 | #endif | ||
100 | 82 | ||
101 | static inline bhv_vnode_t *vn_grab(bhv_vnode_t *vp) | 83 | static inline struct inode *vn_grab(struct inode *vp) |
102 | { | 84 | { |
103 | struct inode *inode = igrab(vn_to_inode(vp)); | 85 | return igrab(vp); |
104 | return inode ? vn_from_inode(inode) : NULL; | ||
105 | } | 86 | } |
106 | 87 | ||
107 | /* | 88 | /* |
108 | * Dealing with bad inodes | 89 | * Dealing with bad inodes |
109 | */ | 90 | */ |
110 | static inline int VN_BAD(bhv_vnode_t *vp) | 91 | static inline int VN_BAD(struct inode *vp) |
111 | { | 92 | { |
112 | return is_bad_inode(vn_to_inode(vp)); | 93 | return is_bad_inode(vp); |
113 | } | 94 | } |
114 | 95 | ||
115 | /* | 96 | /* |
116 | * Extracting atime values in various formats | 97 | * Extracting atime values in various formats |
117 | */ | 98 | */ |
118 | static inline void vn_atime_to_bstime(bhv_vnode_t *vp, xfs_bstime_t *bs_atime) | 99 | static inline void vn_atime_to_bstime(struct inode *vp, xfs_bstime_t *bs_atime) |
119 | { | 100 | { |
120 | bs_atime->tv_sec = vp->i_atime.tv_sec; | 101 | bs_atime->tv_sec = vp->i_atime.tv_sec; |
121 | bs_atime->tv_nsec = vp->i_atime.tv_nsec; | 102 | bs_atime->tv_nsec = vp->i_atime.tv_nsec; |
122 | } | 103 | } |
123 | 104 | ||
124 | static inline void vn_atime_to_timespec(bhv_vnode_t *vp, struct timespec *ts) | 105 | static inline void vn_atime_to_timespec(struct inode *vp, struct timespec *ts) |
125 | { | 106 | { |
126 | *ts = vp->i_atime; | 107 | *ts = vp->i_atime; |
127 | } | 108 | } |
128 | 109 | ||
129 | static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt) | 110 | static inline void vn_atime_to_time_t(struct inode *vp, time_t *tt) |
130 | { | 111 | { |
131 | *tt = vp->i_atime.tv_sec; | 112 | *tt = vp->i_atime.tv_sec; |
132 | } | 113 | } |
@@ -134,9 +115,9 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt) | |||
134 | /* | 115 | /* |
135 | * Some useful predicates. | 116 | * Some useful predicates. |
136 | */ | 117 | */ |
137 | #define VN_MAPPED(vp) mapping_mapped(vn_to_inode(vp)->i_mapping) | 118 | #define VN_MAPPED(vp) mapping_mapped(vp->i_mapping) |
138 | #define VN_CACHED(vp) (vn_to_inode(vp)->i_mapping->nrpages) | 119 | #define VN_CACHED(vp) (vp->i_mapping->nrpages) |
139 | #define VN_DIRTY(vp) mapping_tagged(vn_to_inode(vp)->i_mapping, \ | 120 | #define VN_DIRTY(vp) mapping_tagged(vp->i_mapping, \ |
140 | PAGECACHE_TAG_DIRTY) | 121 | PAGECACHE_TAG_DIRTY) |
141 | 122 | ||
142 | 123 | ||
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index fc9f3fb39b7b..f2705f2fd43c 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c | |||
@@ -101,11 +101,18 @@ xfs_qm_dqinit( | |||
101 | if (brandnewdquot) { | 101 | if (brandnewdquot) { |
102 | dqp->dq_flnext = dqp->dq_flprev = dqp; | 102 | dqp->dq_flnext = dqp->dq_flprev = dqp; |
103 | mutex_init(&dqp->q_qlock); | 103 | mutex_init(&dqp->q_qlock); |
104 | initnsema(&dqp->q_flock, 1, "fdq"); | ||
105 | sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq"); | 104 | sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq"); |
106 | 105 | ||
106 | /* | ||
107 | * Because we want to use a counting completion, complete | ||
108 | * the flush completion once to allow a single access to | ||
109 | * the flush completion without blocking. | ||
110 | */ | ||
111 | init_completion(&dqp->q_flush); | ||
112 | complete(&dqp->q_flush); | ||
113 | |||
107 | #ifdef XFS_DQUOT_TRACE | 114 | #ifdef XFS_DQUOT_TRACE |
108 | dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_SLEEP); | 115 | dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_NOFS); |
109 | xfs_dqtrace_entry(dqp, "DQINIT"); | 116 | xfs_dqtrace_entry(dqp, "DQINIT"); |
110 | #endif | 117 | #endif |
111 | } else { | 118 | } else { |
@@ -150,7 +157,6 @@ xfs_qm_dqdestroy( | |||
150 | ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp)); | 157 | ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp)); |
151 | 158 | ||
152 | mutex_destroy(&dqp->q_qlock); | 159 | mutex_destroy(&dqp->q_qlock); |
153 | freesema(&dqp->q_flock); | ||
154 | sv_destroy(&dqp->q_pinwait); | 160 | sv_destroy(&dqp->q_pinwait); |
155 | 161 | ||
156 | #ifdef XFS_DQUOT_TRACE | 162 | #ifdef XFS_DQUOT_TRACE |
@@ -431,7 +437,7 @@ xfs_qm_dqalloc( | |||
431 | * when it unlocks the inode. Since we want to keep the quota | 437 | * when it unlocks the inode. Since we want to keep the quota |
432 | * inode around, we bump the vnode ref count now. | 438 | * inode around, we bump the vnode ref count now. |
433 | */ | 439 | */ |
434 | VN_HOLD(XFS_ITOV(quotip)); | 440 | IHOLD(quotip); |
435 | 441 | ||
436 | xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); | 442 | xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); |
437 | nmaps = 1; | 443 | nmaps = 1; |
@@ -1211,7 +1217,7 @@ xfs_qm_dqflush( | |||
1211 | int error; | 1217 | int error; |
1212 | 1218 | ||
1213 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | 1219 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); |
1214 | ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp)); | 1220 | ASSERT(!completion_done(&dqp->q_flush)); |
1215 | xfs_dqtrace_entry(dqp, "DQFLUSH"); | 1221 | xfs_dqtrace_entry(dqp, "DQFLUSH"); |
1216 | 1222 | ||
1217 | /* | 1223 | /* |
@@ -1348,34 +1354,18 @@ xfs_qm_dqflush_done( | |||
1348 | xfs_dqfunlock(dqp); | 1354 | xfs_dqfunlock(dqp); |
1349 | } | 1355 | } |
1350 | 1356 | ||
1351 | |||
1352 | int | ||
1353 | xfs_qm_dqflock_nowait( | ||
1354 | xfs_dquot_t *dqp) | ||
1355 | { | ||
1356 | int locked; | ||
1357 | |||
1358 | locked = cpsema(&((dqp)->q_flock)); | ||
1359 | |||
1360 | /* XXX ifdef these out */ | ||
1361 | if (locked) | ||
1362 | (dqp)->dq_flags |= XFS_DQ_FLOCKED; | ||
1363 | return (locked); | ||
1364 | } | ||
1365 | |||
1366 | |||
1367 | int | 1357 | int |
1368 | xfs_qm_dqlock_nowait( | 1358 | xfs_qm_dqlock_nowait( |
1369 | xfs_dquot_t *dqp) | 1359 | xfs_dquot_t *dqp) |
1370 | { | 1360 | { |
1371 | return (mutex_trylock(&((dqp)->q_qlock))); | 1361 | return mutex_trylock(&dqp->q_qlock); |
1372 | } | 1362 | } |
1373 | 1363 | ||
1374 | void | 1364 | void |
1375 | xfs_dqlock( | 1365 | xfs_dqlock( |
1376 | xfs_dquot_t *dqp) | 1366 | xfs_dquot_t *dqp) |
1377 | { | 1367 | { |
1378 | mutex_lock(&(dqp->q_qlock)); | 1368 | mutex_lock(&dqp->q_qlock); |
1379 | } | 1369 | } |
1380 | 1370 | ||
1381 | void | 1371 | void |
@@ -1468,7 +1458,7 @@ xfs_qm_dqpurge( | |||
1468 | * if we're turning off quotas. Basically, we need this flush | 1458 | * if we're turning off quotas. Basically, we need this flush |
1469 | * lock, and are willing to block on it. | 1459 | * lock, and are willing to block on it. |
1470 | */ | 1460 | */ |
1471 | if (! xfs_qm_dqflock_nowait(dqp)) { | 1461 | if (!xfs_dqflock_nowait(dqp)) { |
1472 | /* | 1462 | /* |
1473 | * Block on the flush lock after nudging dquot buffer, | 1463 | * Block on the flush lock after nudging dquot buffer, |
1474 | * if it is incore. | 1464 | * if it is incore. |
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index f7393bba4e95..8958d0faf8d3 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h | |||
@@ -82,7 +82,7 @@ typedef struct xfs_dquot { | |||
82 | xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */ | 82 | xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */ |
83 | xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ | 83 | xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ |
84 | mutex_t q_qlock; /* quota lock */ | 84 | mutex_t q_qlock; /* quota lock */ |
85 | sema_t q_flock; /* flush lock */ | 85 | struct completion q_flush; /* flush completion queue */ |
86 | uint q_pincount; /* pin count for this dquot */ | 86 | uint q_pincount; /* pin count for this dquot */ |
87 | sv_t q_pinwait; /* sync var for pinning */ | 87 | sv_t q_pinwait; /* sync var for pinning */ |
88 | #ifdef XFS_DQUOT_TRACE | 88 | #ifdef XFS_DQUOT_TRACE |
@@ -113,17 +113,25 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) | |||
113 | 113 | ||
114 | 114 | ||
115 | /* | 115 | /* |
116 | * The following three routines simply manage the q_flock | 116 | * Manage the q_flush completion queue embedded in the dquot. This completion |
117 | * semaphore embedded in the dquot. This semaphore synchronizes | 117 | * queue synchronizes processes attempting to flush the in-core dquot back to |
118 | * processes attempting to flush the in-core dquot back to disk. | 118 | * disk. |
119 | */ | 119 | */ |
120 | #define xfs_dqflock(dqp) { psema(&((dqp)->q_flock), PINOD | PRECALC);\ | 120 | static inline void xfs_dqflock(xfs_dquot_t *dqp) |
121 | (dqp)->dq_flags |= XFS_DQ_FLOCKED; } | 121 | { |
122 | #define xfs_dqfunlock(dqp) { ASSERT(issemalocked(&((dqp)->q_flock))); \ | 122 | wait_for_completion(&dqp->q_flush); |
123 | vsema(&((dqp)->q_flock)); \ | 123 | } |
124 | (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); } | 124 | |
125 | static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp) | ||
126 | { | ||
127 | return try_wait_for_completion(&dqp->q_flush); | ||
128 | } | ||
129 | |||
130 | static inline void xfs_dqfunlock(xfs_dquot_t *dqp) | ||
131 | { | ||
132 | complete(&dqp->q_flush); | ||
133 | } | ||
125 | 134 | ||
126 | #define XFS_DQ_IS_FLUSH_LOCKED(dqp) (issemalocked(&((dqp)->q_flock))) | ||
127 | #define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp)) | 135 | #define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp)) |
128 | #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) | 136 | #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) |
129 | #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) | 137 | #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) |
@@ -167,7 +175,6 @@ extern int xfs_qm_dqflush(xfs_dquot_t *, uint); | |||
167 | extern int xfs_qm_dqpurge(xfs_dquot_t *); | 175 | extern int xfs_qm_dqpurge(xfs_dquot_t *); |
168 | extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); | 176 | extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); |
169 | extern int xfs_qm_dqlock_nowait(xfs_dquot_t *); | 177 | extern int xfs_qm_dqlock_nowait(xfs_dquot_t *); |
170 | extern int xfs_qm_dqflock_nowait(xfs_dquot_t *); | ||
171 | extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp); | 178 | extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp); |
172 | extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, | 179 | extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, |
173 | xfs_disk_dquot_t *); | 180 | xfs_disk_dquot_t *); |
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 08d2fc89e6a1..f028644caa5e 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c | |||
@@ -151,7 +151,7 @@ xfs_qm_dquot_logitem_push( | |||
151 | dqp = logitem->qli_dquot; | 151 | dqp = logitem->qli_dquot; |
152 | 152 | ||
153 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | 153 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); |
154 | ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp)); | 154 | ASSERT(!completion_done(&dqp->q_flush)); |
155 | 155 | ||
156 | /* | 156 | /* |
157 | * Since we were able to lock the dquot's flush lock and | 157 | * Since we were able to lock the dquot's flush lock and |
@@ -245,7 +245,7 @@ xfs_qm_dquot_logitem_pushbuf( | |||
245 | * inode flush completed and the inode was taken off the AIL. | 245 | * inode flush completed and the inode was taken off the AIL. |
246 | * So, just get out. | 246 | * So, just get out. |
247 | */ | 247 | */ |
248 | if (!issemalocked(&(dqp->q_flock)) || | 248 | if (completion_done(&dqp->q_flush) || |
249 | ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { | 249 | ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { |
250 | qip->qli_pushbuf_flag = 0; | 250 | qip->qli_pushbuf_flag = 0; |
251 | xfs_dqunlock(dqp); | 251 | xfs_dqunlock(dqp); |
@@ -258,7 +258,7 @@ xfs_qm_dquot_logitem_pushbuf( | |||
258 | if (bp != NULL) { | 258 | if (bp != NULL) { |
259 | if (XFS_BUF_ISDELAYWRITE(bp)) { | 259 | if (XFS_BUF_ISDELAYWRITE(bp)) { |
260 | dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && | 260 | dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && |
261 | issemalocked(&(dqp->q_flock))); | 261 | !completion_done(&dqp->q_flush)); |
262 | qip->qli_pushbuf_flag = 0; | 262 | qip->qli_pushbuf_flag = 0; |
263 | xfs_dqunlock(dqp); | 263 | xfs_dqunlock(dqp); |
264 | 264 | ||
@@ -317,7 +317,7 @@ xfs_qm_dquot_logitem_trylock( | |||
317 | return (XFS_ITEM_LOCKED); | 317 | return (XFS_ITEM_LOCKED); |
318 | 318 | ||
319 | retval = XFS_ITEM_SUCCESS; | 319 | retval = XFS_ITEM_SUCCESS; |
320 | if (! xfs_qm_dqflock_nowait(dqp)) { | 320 | if (!xfs_dqflock_nowait(dqp)) { |
321 | /* | 321 | /* |
322 | * The dquot is already being flushed. It may have been | 322 | * The dquot is already being flushed. It may have been |
323 | * flushed delayed write, however, and we don't want to | 323 | * flushed delayed write, however, and we don't want to |
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 021934a3d456..df0ffef9775a 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c | |||
@@ -310,8 +310,7 @@ xfs_qm_unmount_quotadestroy( | |||
310 | */ | 310 | */ |
311 | void | 311 | void |
312 | xfs_qm_mount_quotas( | 312 | xfs_qm_mount_quotas( |
313 | xfs_mount_t *mp, | 313 | xfs_mount_t *mp) |
314 | int mfsi_flags) | ||
315 | { | 314 | { |
316 | int error = 0; | 315 | int error = 0; |
317 | uint sbf; | 316 | uint sbf; |
@@ -346,8 +345,7 @@ xfs_qm_mount_quotas( | |||
346 | /* | 345 | /* |
347 | * If any of the quotas are not consistent, do a quotacheck. | 346 | * If any of the quotas are not consistent, do a quotacheck. |
348 | */ | 347 | */ |
349 | if (XFS_QM_NEED_QUOTACHECK(mp) && | 348 | if (XFS_QM_NEED_QUOTACHECK(mp)) { |
350 | !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) { | ||
351 | error = xfs_qm_quotacheck(mp); | 349 | error = xfs_qm_quotacheck(mp); |
352 | if (error) { | 350 | if (error) { |
353 | /* Quotacheck failed and disabled quotas. */ | 351 | /* Quotacheck failed and disabled quotas. */ |
@@ -484,7 +482,7 @@ again: | |||
484 | xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY"); | 482 | xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY"); |
485 | /* XXX a sentinel would be better */ | 483 | /* XXX a sentinel would be better */ |
486 | recl = XFS_QI_MPLRECLAIMS(mp); | 484 | recl = XFS_QI_MPLRECLAIMS(mp); |
487 | if (! xfs_qm_dqflock_nowait(dqp)) { | 485 | if (!xfs_dqflock_nowait(dqp)) { |
488 | /* | 486 | /* |
489 | * If we can't grab the flush lock then check | 487 | * If we can't grab the flush lock then check |
490 | * to see if the dquot has been flushed delayed | 488 | * to see if the dquot has been flushed delayed |
@@ -1062,7 +1060,7 @@ xfs_qm_sync( | |||
1062 | 1060 | ||
1063 | /* XXX a sentinel would be better */ | 1061 | /* XXX a sentinel would be better */ |
1064 | recl = XFS_QI_MPLRECLAIMS(mp); | 1062 | recl = XFS_QI_MPLRECLAIMS(mp); |
1065 | if (! xfs_qm_dqflock_nowait(dqp)) { | 1063 | if (!xfs_dqflock_nowait(dqp)) { |
1066 | if (nowait) { | 1064 | if (nowait) { |
1067 | xfs_dqunlock(dqp); | 1065 | xfs_dqunlock(dqp); |
1068 | continue; | 1066 | continue; |
@@ -2079,7 +2077,7 @@ xfs_qm_shake_freelist( | |||
2079 | * Try to grab the flush lock. If this dquot is in the process of | 2077 | * Try to grab the flush lock. If this dquot is in the process of |
2080 | * getting flushed to disk, we don't want to reclaim it. | 2078 | * getting flushed to disk, we don't want to reclaim it. |
2081 | */ | 2079 | */ |
2082 | if (! xfs_qm_dqflock_nowait(dqp)) { | 2080 | if (!xfs_dqflock_nowait(dqp)) { |
2083 | xfs_dqunlock(dqp); | 2081 | xfs_dqunlock(dqp); |
2084 | dqp = dqp->dq_flnext; | 2082 | dqp = dqp->dq_flnext; |
2085 | continue; | 2083 | continue; |
@@ -2257,7 +2255,7 @@ xfs_qm_dqreclaim_one(void) | |||
2257 | * Try to grab the flush lock. If this dquot is in the process of | 2255 | * Try to grab the flush lock. If this dquot is in the process of |
2258 | * getting flushed to disk, we don't want to reclaim it. | 2256 | * getting flushed to disk, we don't want to reclaim it. |
2259 | */ | 2257 | */ |
2260 | if (! xfs_qm_dqflock_nowait(dqp)) { | 2258 | if (!xfs_dqflock_nowait(dqp)) { |
2261 | xfs_dqunlock(dqp); | 2259 | xfs_dqunlock(dqp); |
2262 | continue; | 2260 | continue; |
2263 | } | 2261 | } |
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index cd2300e374af..44f25349e478 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h | |||
@@ -165,7 +165,7 @@ typedef struct xfs_dquot_acct { | |||
165 | #define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) | 165 | #define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) |
166 | 166 | ||
167 | extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); | 167 | extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); |
168 | extern void xfs_qm_mount_quotas(xfs_mount_t *, int); | 168 | extern void xfs_qm_mount_quotas(xfs_mount_t *); |
169 | extern int xfs_qm_quotacheck(xfs_mount_t *); | 169 | extern int xfs_qm_quotacheck(xfs_mount_t *); |
170 | extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); | 170 | extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); |
171 | extern int xfs_qm_unmount_quotas(xfs_mount_t *); | 171 | extern int xfs_qm_unmount_quotas(xfs_mount_t *); |
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index f4f6c4c861d7..eea2e60b456b 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c | |||
@@ -162,7 +162,7 @@ xfs_qm_newmount( | |||
162 | * mounting, and get on with the boring life | 162 | * mounting, and get on with the boring life |
163 | * without disk quotas. | 163 | * without disk quotas. |
164 | */ | 164 | */ |
165 | xfs_qm_mount_quotas(mp, 0); | 165 | xfs_qm_mount_quotas(mp); |
166 | } else { | 166 | } else { |
167 | /* | 167 | /* |
168 | * Clear the quota flags, but remember them. This | 168 | * Clear the quota flags, but remember them. This |
@@ -184,13 +184,12 @@ STATIC int | |||
184 | xfs_qm_endmount( | 184 | xfs_qm_endmount( |
185 | xfs_mount_t *mp, | 185 | xfs_mount_t *mp, |
186 | uint needquotamount, | 186 | uint needquotamount, |
187 | uint quotaflags, | 187 | uint quotaflags) |
188 | int mfsi_flags) | ||
189 | { | 188 | { |
190 | if (needquotamount) { | 189 | if (needquotamount) { |
191 | ASSERT(mp->m_qflags == 0); | 190 | ASSERT(mp->m_qflags == 0); |
192 | mp->m_qflags = quotaflags; | 191 | mp->m_qflags = quotaflags; |
193 | xfs_qm_mount_quotas(mp, mfsi_flags); | 192 | xfs_qm_mount_quotas(mp); |
194 | } | 193 | } |
195 | 194 | ||
196 | #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) | 195 | #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) |
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index adfb8723f65a..1a3b803dfa55 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c | |||
@@ -1034,7 +1034,7 @@ xfs_qm_dqrele_all_inodes( | |||
1034 | { | 1034 | { |
1035 | xfs_inode_t *ip, *topino; | 1035 | xfs_inode_t *ip, *topino; |
1036 | uint ireclaims; | 1036 | uint ireclaims; |
1037 | bhv_vnode_t *vp; | 1037 | struct inode *vp; |
1038 | boolean_t vnode_refd; | 1038 | boolean_t vnode_refd; |
1039 | 1039 | ||
1040 | ASSERT(mp->m_quotainfo); | 1040 | ASSERT(mp->m_quotainfo); |
@@ -1059,7 +1059,7 @@ again: | |||
1059 | ip = ip->i_mnext; | 1059 | ip = ip->i_mnext; |
1060 | continue; | 1060 | continue; |
1061 | } | 1061 | } |
1062 | vp = XFS_ITOV_NULL(ip); | 1062 | vp = VFS_I(ip); |
1063 | if (!vp) { | 1063 | if (!vp) { |
1064 | ASSERT(ip->i_udquot == NULL); | 1064 | ASSERT(ip->i_udquot == NULL); |
1065 | ASSERT(ip->i_gdquot == NULL); | 1065 | ASSERT(ip->i_gdquot == NULL); |
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 3e4648ad9cfc..b2f639a1416f 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c | |||
@@ -37,15 +37,15 @@ | |||
37 | #include <linux/capability.h> | 37 | #include <linux/capability.h> |
38 | #include <linux/posix_acl_xattr.h> | 38 | #include <linux/posix_acl_xattr.h> |
39 | 39 | ||
40 | STATIC int xfs_acl_setmode(bhv_vnode_t *, xfs_acl_t *, int *); | 40 | STATIC int xfs_acl_setmode(struct inode *, xfs_acl_t *, int *); |
41 | STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *); | 41 | STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *); |
42 | STATIC void xfs_acl_get_endian(xfs_acl_t *); | 42 | STATIC void xfs_acl_get_endian(xfs_acl_t *); |
43 | STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *); | 43 | STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *); |
44 | STATIC int xfs_acl_invalid(xfs_acl_t *); | 44 | STATIC int xfs_acl_invalid(xfs_acl_t *); |
45 | STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *); | 45 | STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *); |
46 | STATIC void xfs_acl_get_attr(bhv_vnode_t *, xfs_acl_t *, int, int, int *); | 46 | STATIC void xfs_acl_get_attr(struct inode *, xfs_acl_t *, int, int, int *); |
47 | STATIC void xfs_acl_set_attr(bhv_vnode_t *, xfs_acl_t *, int, int *); | 47 | STATIC void xfs_acl_set_attr(struct inode *, xfs_acl_t *, int, int *); |
48 | STATIC int xfs_acl_allow_set(bhv_vnode_t *, int); | 48 | STATIC int xfs_acl_allow_set(struct inode *, int); |
49 | 49 | ||
50 | kmem_zone_t *xfs_acl_zone; | 50 | kmem_zone_t *xfs_acl_zone; |
51 | 51 | ||
@@ -55,7 +55,7 @@ kmem_zone_t *xfs_acl_zone; | |||
55 | */ | 55 | */ |
56 | int | 56 | int |
57 | xfs_acl_vhasacl_access( | 57 | xfs_acl_vhasacl_access( |
58 | bhv_vnode_t *vp) | 58 | struct inode *vp) |
59 | { | 59 | { |
60 | int error; | 60 | int error; |
61 | 61 | ||
@@ -68,7 +68,7 @@ xfs_acl_vhasacl_access( | |||
68 | */ | 68 | */ |
69 | int | 69 | int |
70 | xfs_acl_vhasacl_default( | 70 | xfs_acl_vhasacl_default( |
71 | bhv_vnode_t *vp) | 71 | struct inode *vp) |
72 | { | 72 | { |
73 | int error; | 73 | int error; |
74 | 74 | ||
@@ -207,7 +207,7 @@ posix_acl_xfs_to_xattr( | |||
207 | 207 | ||
208 | int | 208 | int |
209 | xfs_acl_vget( | 209 | xfs_acl_vget( |
210 | bhv_vnode_t *vp, | 210 | struct inode *vp, |
211 | void *acl, | 211 | void *acl, |
212 | size_t size, | 212 | size_t size, |
213 | int kind) | 213 | int kind) |
@@ -217,7 +217,6 @@ xfs_acl_vget( | |||
217 | posix_acl_xattr_header *ext_acl = acl; | 217 | posix_acl_xattr_header *ext_acl = acl; |
218 | int flags = 0; | 218 | int flags = 0; |
219 | 219 | ||
220 | VN_HOLD(vp); | ||
221 | if(size) { | 220 | if(size) { |
222 | if (!(_ACL_ALLOC(xfs_acl))) { | 221 | if (!(_ACL_ALLOC(xfs_acl))) { |
223 | error = ENOMEM; | 222 | error = ENOMEM; |
@@ -239,11 +238,10 @@ xfs_acl_vget( | |||
239 | goto out; | 238 | goto out; |
240 | } | 239 | } |
241 | if (kind == _ACL_TYPE_ACCESS) | 240 | if (kind == _ACL_TYPE_ACCESS) |
242 | xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, xfs_acl); | 241 | xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, xfs_acl); |
243 | error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size); | 242 | error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size); |
244 | } | 243 | } |
245 | out: | 244 | out: |
246 | VN_RELE(vp); | ||
247 | if(xfs_acl) | 245 | if(xfs_acl) |
248 | _ACL_FREE(xfs_acl); | 246 | _ACL_FREE(xfs_acl); |
249 | return -error; | 247 | return -error; |
@@ -251,28 +249,26 @@ out: | |||
251 | 249 | ||
252 | int | 250 | int |
253 | xfs_acl_vremove( | 251 | xfs_acl_vremove( |
254 | bhv_vnode_t *vp, | 252 | struct inode *vp, |
255 | int kind) | 253 | int kind) |
256 | { | 254 | { |
257 | int error; | 255 | int error; |
258 | 256 | ||
259 | VN_HOLD(vp); | ||
260 | error = xfs_acl_allow_set(vp, kind); | 257 | error = xfs_acl_allow_set(vp, kind); |
261 | if (!error) { | 258 | if (!error) { |
262 | error = xfs_attr_remove(xfs_vtoi(vp), | 259 | error = xfs_attr_remove(XFS_I(vp), |
263 | kind == _ACL_TYPE_DEFAULT? | 260 | kind == _ACL_TYPE_DEFAULT? |
264 | SGI_ACL_DEFAULT: SGI_ACL_FILE, | 261 | SGI_ACL_DEFAULT: SGI_ACL_FILE, |
265 | ATTR_ROOT); | 262 | ATTR_ROOT); |
266 | if (error == ENOATTR) | 263 | if (error == ENOATTR) |
267 | error = 0; /* 'scool */ | 264 | error = 0; /* 'scool */ |
268 | } | 265 | } |
269 | VN_RELE(vp); | ||
270 | return -error; | 266 | return -error; |
271 | } | 267 | } |
272 | 268 | ||
273 | int | 269 | int |
274 | xfs_acl_vset( | 270 | xfs_acl_vset( |
275 | bhv_vnode_t *vp, | 271 | struct inode *vp, |
276 | void *acl, | 272 | void *acl, |
277 | size_t size, | 273 | size_t size, |
278 | int kind) | 274 | int kind) |
@@ -298,7 +294,6 @@ xfs_acl_vset( | |||
298 | return 0; | 294 | return 0; |
299 | } | 295 | } |
300 | 296 | ||
301 | VN_HOLD(vp); | ||
302 | error = xfs_acl_allow_set(vp, kind); | 297 | error = xfs_acl_allow_set(vp, kind); |
303 | 298 | ||
304 | /* Incoming ACL exists, set file mode based on its value */ | 299 | /* Incoming ACL exists, set file mode based on its value */ |
@@ -321,7 +316,6 @@ xfs_acl_vset( | |||
321 | } | 316 | } |
322 | 317 | ||
323 | out: | 318 | out: |
324 | VN_RELE(vp); | ||
325 | _ACL_FREE(xfs_acl); | 319 | _ACL_FREE(xfs_acl); |
326 | return -error; | 320 | return -error; |
327 | } | 321 | } |
@@ -363,7 +357,7 @@ xfs_acl_iaccess( | |||
363 | 357 | ||
364 | STATIC int | 358 | STATIC int |
365 | xfs_acl_allow_set( | 359 | xfs_acl_allow_set( |
366 | bhv_vnode_t *vp, | 360 | struct inode *vp, |
367 | int kind) | 361 | int kind) |
368 | { | 362 | { |
369 | if (vp->i_flags & (S_IMMUTABLE|S_APPEND)) | 363 | if (vp->i_flags & (S_IMMUTABLE|S_APPEND)) |
@@ -372,7 +366,7 @@ xfs_acl_allow_set( | |||
372 | return ENOTDIR; | 366 | return ENOTDIR; |
373 | if (vp->i_sb->s_flags & MS_RDONLY) | 367 | if (vp->i_sb->s_flags & MS_RDONLY) |
374 | return EROFS; | 368 | return EROFS; |
375 | if (xfs_vtoi(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER)) | 369 | if (XFS_I(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER)) |
376 | return EPERM; | 370 | return EPERM; |
377 | return 0; | 371 | return 0; |
378 | } | 372 | } |
@@ -566,7 +560,7 @@ xfs_acl_get_endian( | |||
566 | */ | 560 | */ |
567 | STATIC void | 561 | STATIC void |
568 | xfs_acl_get_attr( | 562 | xfs_acl_get_attr( |
569 | bhv_vnode_t *vp, | 563 | struct inode *vp, |
570 | xfs_acl_t *aclp, | 564 | xfs_acl_t *aclp, |
571 | int kind, | 565 | int kind, |
572 | int flags, | 566 | int flags, |
@@ -576,7 +570,7 @@ xfs_acl_get_attr( | |||
576 | 570 | ||
577 | ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1); | 571 | ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1); |
578 | flags |= ATTR_ROOT; | 572 | flags |= ATTR_ROOT; |
579 | *error = xfs_attr_get(xfs_vtoi(vp), | 573 | *error = xfs_attr_get(XFS_I(vp), |
580 | kind == _ACL_TYPE_ACCESS ? | 574 | kind == _ACL_TYPE_ACCESS ? |
581 | SGI_ACL_FILE : SGI_ACL_DEFAULT, | 575 | SGI_ACL_FILE : SGI_ACL_DEFAULT, |
582 | (char *)aclp, &len, flags); | 576 | (char *)aclp, &len, flags); |
@@ -590,7 +584,7 @@ xfs_acl_get_attr( | |||
590 | */ | 584 | */ |
591 | STATIC void | 585 | STATIC void |
592 | xfs_acl_set_attr( | 586 | xfs_acl_set_attr( |
593 | bhv_vnode_t *vp, | 587 | struct inode *vp, |
594 | xfs_acl_t *aclp, | 588 | xfs_acl_t *aclp, |
595 | int kind, | 589 | int kind, |
596 | int *error) | 590 | int *error) |
@@ -615,7 +609,7 @@ xfs_acl_set_attr( | |||
615 | INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm); | 609 | INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm); |
616 | } | 610 | } |
617 | INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt); | 611 | INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt); |
618 | *error = xfs_attr_set(xfs_vtoi(vp), | 612 | *error = xfs_attr_set(XFS_I(vp), |
619 | kind == _ACL_TYPE_ACCESS ? | 613 | kind == _ACL_TYPE_ACCESS ? |
620 | SGI_ACL_FILE: SGI_ACL_DEFAULT, | 614 | SGI_ACL_FILE: SGI_ACL_DEFAULT, |
621 | (char *)newacl, len, ATTR_ROOT); | 615 | (char *)newacl, len, ATTR_ROOT); |
@@ -624,7 +618,7 @@ xfs_acl_set_attr( | |||
624 | 618 | ||
625 | int | 619 | int |
626 | xfs_acl_vtoacl( | 620 | xfs_acl_vtoacl( |
627 | bhv_vnode_t *vp, | 621 | struct inode *vp, |
628 | xfs_acl_t *access_acl, | 622 | xfs_acl_t *access_acl, |
629 | xfs_acl_t *default_acl) | 623 | xfs_acl_t *default_acl) |
630 | { | 624 | { |
@@ -639,7 +633,7 @@ xfs_acl_vtoacl( | |||
639 | if (error) | 633 | if (error) |
640 | access_acl->acl_cnt = XFS_ACL_NOT_PRESENT; | 634 | access_acl->acl_cnt = XFS_ACL_NOT_PRESENT; |
641 | else /* We have a good ACL and the file mode, synchronize. */ | 635 | else /* We have a good ACL and the file mode, synchronize. */ |
642 | xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, access_acl); | 636 | xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, access_acl); |
643 | } | 637 | } |
644 | 638 | ||
645 | if (default_acl) { | 639 | if (default_acl) { |
@@ -656,7 +650,7 @@ xfs_acl_vtoacl( | |||
656 | */ | 650 | */ |
657 | int | 651 | int |
658 | xfs_acl_inherit( | 652 | xfs_acl_inherit( |
659 | bhv_vnode_t *vp, | 653 | struct inode *vp, |
660 | mode_t mode, | 654 | mode_t mode, |
661 | xfs_acl_t *pdaclp) | 655 | xfs_acl_t *pdaclp) |
662 | { | 656 | { |
@@ -715,7 +709,7 @@ out_error: | |||
715 | */ | 709 | */ |
716 | STATIC int | 710 | STATIC int |
717 | xfs_acl_setmode( | 711 | xfs_acl_setmode( |
718 | bhv_vnode_t *vp, | 712 | struct inode *vp, |
719 | xfs_acl_t *acl, | 713 | xfs_acl_t *acl, |
720 | int *basicperms) | 714 | int *basicperms) |
721 | { | 715 | { |
@@ -734,7 +728,7 @@ xfs_acl_setmode( | |||
734 | * mode. The m:: bits take precedence over the g:: bits. | 728 | * mode. The m:: bits take precedence over the g:: bits. |
735 | */ | 729 | */ |
736 | iattr.ia_valid = ATTR_MODE; | 730 | iattr.ia_valid = ATTR_MODE; |
737 | iattr.ia_mode = xfs_vtoi(vp)->i_d.di_mode; | 731 | iattr.ia_mode = XFS_I(vp)->i_d.di_mode; |
738 | iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO); | 732 | iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO); |
739 | ap = acl->acl_entry; | 733 | ap = acl->acl_entry; |
740 | for (i = 0; i < acl->acl_cnt; ++i) { | 734 | for (i = 0; i < acl->acl_cnt; ++i) { |
@@ -764,7 +758,7 @@ xfs_acl_setmode( | |||
764 | if (gap && nomask) | 758 | if (gap && nomask) |
765 | iattr.ia_mode |= gap->ae_perm << 3; | 759 | iattr.ia_mode |= gap->ae_perm << 3; |
766 | 760 | ||
767 | return xfs_setattr(xfs_vtoi(vp), &iattr, 0, sys_cred); | 761 | return xfs_setattr(XFS_I(vp), &iattr, 0, sys_cred); |
768 | } | 762 | } |
769 | 763 | ||
770 | /* | 764 | /* |
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 323ee94cf831..a4e293b93efa 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h | |||
@@ -59,14 +59,14 @@ extern struct kmem_zone *xfs_acl_zone; | |||
59 | (zone) = kmem_zone_init(sizeof(xfs_acl_t), (name)) | 59 | (zone) = kmem_zone_init(sizeof(xfs_acl_t), (name)) |
60 | #define xfs_acl_zone_destroy(zone) kmem_zone_destroy(zone) | 60 | #define xfs_acl_zone_destroy(zone) kmem_zone_destroy(zone) |
61 | 61 | ||
62 | extern int xfs_acl_inherit(bhv_vnode_t *, mode_t mode, xfs_acl_t *); | 62 | extern int xfs_acl_inherit(struct inode *, mode_t mode, xfs_acl_t *); |
63 | extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *); | 63 | extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *); |
64 | extern int xfs_acl_vtoacl(bhv_vnode_t *, xfs_acl_t *, xfs_acl_t *); | 64 | extern int xfs_acl_vtoacl(struct inode *, xfs_acl_t *, xfs_acl_t *); |
65 | extern int xfs_acl_vhasacl_access(bhv_vnode_t *); | 65 | extern int xfs_acl_vhasacl_access(struct inode *); |
66 | extern int xfs_acl_vhasacl_default(bhv_vnode_t *); | 66 | extern int xfs_acl_vhasacl_default(struct inode *); |
67 | extern int xfs_acl_vset(bhv_vnode_t *, void *, size_t, int); | 67 | extern int xfs_acl_vset(struct inode *, void *, size_t, int); |
68 | extern int xfs_acl_vget(bhv_vnode_t *, void *, size_t, int); | 68 | extern int xfs_acl_vget(struct inode *, void *, size_t, int); |
69 | extern int xfs_acl_vremove(bhv_vnode_t *, int); | 69 | extern int xfs_acl_vremove(struct inode *, int); |
70 | 70 | ||
71 | #define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE)) | 71 | #define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE)) |
72 | 72 | ||
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h index f9472a2076d4..0b3b5efe848c 100644 --- a/fs/xfs/xfs_arch.h +++ b/fs/xfs/xfs_arch.h | |||
@@ -92,16 +92,6 @@ | |||
92 | ((__u8*)(pointer))[1] = (((value) ) & 0xff); \ | 92 | ((__u8*)(pointer))[1] = (((value) ) & 0xff); \ |
93 | } | 93 | } |
94 | 94 | ||
95 | /* define generic INT_ macros */ | ||
96 | |||
97 | #define INT_GET(reference,arch) \ | ||
98 | (((arch) == ARCH_NOCONVERT) \ | ||
99 | ? \ | ||
100 | (reference) \ | ||
101 | : \ | ||
102 | INT_SWAP((reference),(reference)) \ | ||
103 | ) | ||
104 | |||
105 | /* does not return a value */ | 95 | /* does not return a value */ |
106 | #define INT_SET(reference,arch,valueref) \ | 96 | #define INT_SET(reference,arch,valueref) \ |
107 | (__builtin_constant_p(valueref) ? \ | 97 | (__builtin_constant_p(valueref) ? \ |
@@ -112,64 +102,6 @@ | |||
112 | ) \ | 102 | ) \ |
113 | ) | 103 | ) |
114 | 104 | ||
115 | /* does not return a value */ | ||
116 | #define INT_MOD_EXPR(reference,arch,code) \ | ||
117 | (((arch) == ARCH_NOCONVERT) \ | ||
118 | ? \ | ||
119 | (void)((reference) code) \ | ||
120 | : \ | ||
121 | (void)( \ | ||
122 | (reference) = INT_GET((reference),arch) , \ | ||
123 | ((reference) code), \ | ||
124 | INT_SET(reference, arch, reference) \ | ||
125 | ) \ | ||
126 | ) | ||
127 | |||
128 | /* does not return a value */ | ||
129 | #define INT_MOD(reference,arch,delta) \ | ||
130 | (void)( \ | ||
131 | INT_MOD_EXPR(reference,arch,+=(delta)) \ | ||
132 | ) | ||
133 | |||
134 | /* | ||
135 | * INT_COPY - copy a value between two locations with the | ||
136 | * _same architecture_ but _potentially different sizes_ | ||
137 | * | ||
138 | * if the types of the two parameters are equal or they are | ||
139 | * in native architecture, a simple copy is done | ||
140 | * | ||
141 | * otherwise, architecture conversions are done | ||
142 | * | ||
143 | */ | ||
144 | |||
145 | /* does not return a value */ | ||
146 | #define INT_COPY(dst,src,arch) \ | ||
147 | ( \ | ||
148 | ((sizeof(dst) == sizeof(src)) || ((arch) == ARCH_NOCONVERT)) \ | ||
149 | ? \ | ||
150 | (void)((dst) = (src)) \ | ||
151 | : \ | ||
152 | INT_SET(dst, arch, INT_GET(src, arch)) \ | ||
153 | ) | ||
154 | |||
155 | /* | ||
156 | * INT_XLATE - copy a value in either direction between two locations | ||
157 | * with different architectures | ||
158 | * | ||
159 | * dir < 0 - copy from memory to buffer (native to arch) | ||
160 | * dir > 0 - copy from buffer to memory (arch to native) | ||
161 | */ | ||
162 | |||
163 | /* does not return a value */ | ||
164 | #define INT_XLATE(buf,mem,dir,arch) {\ | ||
165 | ASSERT(dir); \ | ||
166 | if (dir>0) { \ | ||
167 | (mem)=INT_GET(buf, arch); \ | ||
168 | } else { \ | ||
169 | INT_SET(buf, arch, mem); \ | ||
170 | } \ | ||
171 | } | ||
172 | |||
173 | /* | 105 | /* |
174 | * In directories inode numbers are stored as unaligned arrays of unsigned | 106 | * In directories inode numbers are stored as unaligned arrays of unsigned |
175 | * 8bit integers on disk. | 107 | * 8bit integers on disk. |
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 78de80e3caa2..f7cdc28aff41 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c | |||
@@ -194,6 +194,46 @@ xfs_attr_get( | |||
194 | return(error); | 194 | return(error); |
195 | } | 195 | } |
196 | 196 | ||
197 | /* | ||
198 | * Calculate how many blocks we need for the new attribute, | ||
199 | */ | ||
200 | int | ||
201 | xfs_attr_calc_size( | ||
202 | struct xfs_inode *ip, | ||
203 | int namelen, | ||
204 | int valuelen, | ||
205 | int *local) | ||
206 | { | ||
207 | struct xfs_mount *mp = ip->i_mount; | ||
208 | int size; | ||
209 | int nblks; | ||
210 | |||
211 | /* | ||
212 | * Determine space new attribute will use, and if it would be | ||
213 | * "local" or "remote" (note: local != inline). | ||
214 | */ | ||
215 | size = xfs_attr_leaf_newentsize(namelen, valuelen, | ||
216 | mp->m_sb.sb_blocksize, local); | ||
217 | |||
218 | nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); | ||
219 | if (*local) { | ||
220 | if (size > (mp->m_sb.sb_blocksize >> 1)) { | ||
221 | /* Double split possible */ | ||
222 | nblks *= 2; | ||
223 | } | ||
224 | } else { | ||
225 | /* | ||
226 | * Out of line attribute, cannot double split, but | ||
227 | * make room for the attribute value itself. | ||
228 | */ | ||
229 | uint dblocks = XFS_B_TO_FSB(mp, valuelen); | ||
230 | nblks += dblocks; | ||
231 | nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK); | ||
232 | } | ||
233 | |||
234 | return nblks; | ||
235 | } | ||
236 | |||
197 | STATIC int | 237 | STATIC int |
198 | xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, | 238 | xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, |
199 | char *value, int valuelen, int flags) | 239 | char *value, int valuelen, int flags) |
@@ -202,10 +242,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, | |||
202 | xfs_fsblock_t firstblock; | 242 | xfs_fsblock_t firstblock; |
203 | xfs_bmap_free_t flist; | 243 | xfs_bmap_free_t flist; |
204 | int error, err2, committed; | 244 | int error, err2, committed; |
205 | int local, size; | ||
206 | uint nblks; | ||
207 | xfs_mount_t *mp = dp->i_mount; | 245 | xfs_mount_t *mp = dp->i_mount; |
208 | int rsvd = (flags & ATTR_ROOT) != 0; | 246 | int rsvd = (flags & ATTR_ROOT) != 0; |
247 | int local; | ||
209 | 248 | ||
210 | /* | 249 | /* |
211 | * Attach the dquots to the inode. | 250 | * Attach the dquots to the inode. |
@@ -241,30 +280,8 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, | |||
241 | args.whichfork = XFS_ATTR_FORK; | 280 | args.whichfork = XFS_ATTR_FORK; |
242 | args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; | 281 | args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; |
243 | 282 | ||
244 | /* | ||
245 | * Determine space new attribute will use, and if it would be | ||
246 | * "local" or "remote" (note: local != inline). | ||
247 | */ | ||
248 | size = xfs_attr_leaf_newentsize(name->len, valuelen, | ||
249 | mp->m_sb.sb_blocksize, &local); | ||
250 | |||
251 | nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); | ||
252 | if (local) { | ||
253 | if (size > (mp->m_sb.sb_blocksize >> 1)) { | ||
254 | /* Double split possible */ | ||
255 | nblks <<= 1; | ||
256 | } | ||
257 | } else { | ||
258 | uint dblocks = XFS_B_TO_FSB(mp, valuelen); | ||
259 | /* Out of line attribute, cannot double split, but make | ||
260 | * room for the attribute value itself. | ||
261 | */ | ||
262 | nblks += dblocks; | ||
263 | nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK); | ||
264 | } | ||
265 | |||
266 | /* Size is now blocks for attribute data */ | 283 | /* Size is now blocks for attribute data */ |
267 | args.total = nblks; | 284 | args.total = xfs_attr_calc_size(dp, name->len, valuelen, &local); |
268 | 285 | ||
269 | /* | 286 | /* |
270 | * Start our first transaction of the day. | 287 | * Start our first transaction of the day. |
@@ -286,18 +303,17 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, | |||
286 | if (rsvd) | 303 | if (rsvd) |
287 | args.trans->t_flags |= XFS_TRANS_RESERVE; | 304 | args.trans->t_flags |= XFS_TRANS_RESERVE; |
288 | 305 | ||
289 | if ((error = xfs_trans_reserve(args.trans, (uint) nblks, | 306 | if ((error = xfs_trans_reserve(args.trans, args.total, |
290 | XFS_ATTRSET_LOG_RES(mp, nblks), | 307 | XFS_ATTRSET_LOG_RES(mp, args.total), 0, |
291 | 0, XFS_TRANS_PERM_LOG_RES, | 308 | XFS_TRANS_PERM_LOG_RES, XFS_ATTRSET_LOG_COUNT))) { |
292 | XFS_ATTRSET_LOG_COUNT))) { | ||
293 | xfs_trans_cancel(args.trans, 0); | 309 | xfs_trans_cancel(args.trans, 0); |
294 | return(error); | 310 | return(error); |
295 | } | 311 | } |
296 | xfs_ilock(dp, XFS_ILOCK_EXCL); | 312 | xfs_ilock(dp, XFS_ILOCK_EXCL); |
297 | 313 | ||
298 | error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, nblks, 0, | 314 | error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, args.total, 0, |
299 | rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : | 315 | rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : |
300 | XFS_QMOPT_RES_REGBLKS); | 316 | XFS_QMOPT_RES_REGBLKS); |
301 | if (error) { | 317 | if (error) { |
302 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | 318 | xfs_iunlock(dp, XFS_ILOCK_EXCL); |
303 | xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES); | 319 | xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES); |
@@ -384,7 +400,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, | |||
384 | * Commit the leaf transformation. We'll need another (linked) | 400 | * Commit the leaf transformation. We'll need another (linked) |
385 | * transaction to add the new attribute to the leaf. | 401 | * transaction to add the new attribute to the leaf. |
386 | */ | 402 | */ |
387 | if ((error = xfs_attr_rolltrans(&args.trans, dp))) | 403 | |
404 | error = xfs_trans_roll(&args.trans, dp); | ||
405 | if (error) | ||
388 | goto out; | 406 | goto out; |
389 | 407 | ||
390 | } | 408 | } |
@@ -964,7 +982,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) | |||
964 | * Commit the current trans (including the inode) and start | 982 | * Commit the current trans (including the inode) and start |
965 | * a new one. | 983 | * a new one. |
966 | */ | 984 | */ |
967 | if ((error = xfs_attr_rolltrans(&args->trans, dp))) | 985 | error = xfs_trans_roll(&args->trans, dp); |
986 | if (error) | ||
968 | return (error); | 987 | return (error); |
969 | 988 | ||
970 | /* | 989 | /* |
@@ -978,7 +997,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) | |||
978 | * Commit the transaction that added the attr name so that | 997 | * Commit the transaction that added the attr name so that |
979 | * later routines can manage their own transactions. | 998 | * later routines can manage their own transactions. |
980 | */ | 999 | */ |
981 | if ((error = xfs_attr_rolltrans(&args->trans, dp))) | 1000 | error = xfs_trans_roll(&args->trans, dp); |
1001 | if (error) | ||
982 | return (error); | 1002 | return (error); |
983 | 1003 | ||
984 | /* | 1004 | /* |
@@ -1067,7 +1087,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) | |||
1067 | /* | 1087 | /* |
1068 | * Commit the remove and start the next trans in series. | 1088 | * Commit the remove and start the next trans in series. |
1069 | */ | 1089 | */ |
1070 | error = xfs_attr_rolltrans(&args->trans, dp); | 1090 | error = xfs_trans_roll(&args->trans, dp); |
1071 | 1091 | ||
1072 | } else if (args->rmtblkno > 0) { | 1092 | } else if (args->rmtblkno > 0) { |
1073 | /* | 1093 | /* |
@@ -1298,7 +1318,8 @@ restart: | |||
1298 | * Commit the node conversion and start the next | 1318 | * Commit the node conversion and start the next |
1299 | * trans in the chain. | 1319 | * trans in the chain. |
1300 | */ | 1320 | */ |
1301 | if ((error = xfs_attr_rolltrans(&args->trans, dp))) | 1321 | error = xfs_trans_roll(&args->trans, dp); |
1322 | if (error) | ||
1302 | goto out; | 1323 | goto out; |
1303 | 1324 | ||
1304 | goto restart; | 1325 | goto restart; |
@@ -1349,7 +1370,8 @@ restart: | |||
1349 | * Commit the leaf addition or btree split and start the next | 1370 | * Commit the leaf addition or btree split and start the next |
1350 | * trans in the chain. | 1371 | * trans in the chain. |
1351 | */ | 1372 | */ |
1352 | if ((error = xfs_attr_rolltrans(&args->trans, dp))) | 1373 | error = xfs_trans_roll(&args->trans, dp); |
1374 | if (error) | ||
1353 | goto out; | 1375 | goto out; |
1354 | 1376 | ||
1355 | /* | 1377 | /* |
@@ -1449,7 +1471,8 @@ restart: | |||
1449 | /* | 1471 | /* |
1450 | * Commit and start the next trans in the chain. | 1472 | * Commit and start the next trans in the chain. |
1451 | */ | 1473 | */ |
1452 | if ((error = xfs_attr_rolltrans(&args->trans, dp))) | 1474 | error = xfs_trans_roll(&args->trans, dp); |
1475 | if (error) | ||
1453 | goto out; | 1476 | goto out; |
1454 | 1477 | ||
1455 | } else if (args->rmtblkno > 0) { | 1478 | } else if (args->rmtblkno > 0) { |
@@ -1581,7 +1604,8 @@ xfs_attr_node_removename(xfs_da_args_t *args) | |||
1581 | /* | 1604 | /* |
1582 | * Commit the Btree join operation and start a new trans. | 1605 | * Commit the Btree join operation and start a new trans. |
1583 | */ | 1606 | */ |
1584 | if ((error = xfs_attr_rolltrans(&args->trans, dp))) | 1607 | error = xfs_trans_roll(&args->trans, dp); |
1608 | if (error) | ||
1585 | goto out; | 1609 | goto out; |
1586 | } | 1610 | } |
1587 | 1611 | ||
@@ -2082,7 +2106,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) | |||
2082 | /* | 2106 | /* |
2083 | * Start the next trans in the chain. | 2107 | * Start the next trans in the chain. |
2084 | */ | 2108 | */ |
2085 | if ((error = xfs_attr_rolltrans(&args->trans, dp))) | 2109 | error = xfs_trans_roll(&args->trans, dp); |
2110 | if (error) | ||
2086 | return (error); | 2111 | return (error); |
2087 | } | 2112 | } |
2088 | 2113 | ||
@@ -2232,7 +2257,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) | |||
2232 | /* | 2257 | /* |
2233 | * Close out trans and start the next one in the chain. | 2258 | * Close out trans and start the next one in the chain. |
2234 | */ | 2259 | */ |
2235 | if ((error = xfs_attr_rolltrans(&args->trans, args->dp))) | 2260 | error = xfs_trans_roll(&args->trans, args->dp); |
2261 | if (error) | ||
2236 | return (error); | 2262 | return (error); |
2237 | } | 2263 | } |
2238 | return(0); | 2264 | return(0); |
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h index 8b2d31c19e4d..fb3b2a68b9b9 100644 --- a/fs/xfs/xfs_attr.h +++ b/fs/xfs/xfs_attr.h | |||
@@ -129,6 +129,7 @@ typedef struct xfs_attr_list_context { | |||
129 | /* | 129 | /* |
130 | * Overall external interface routines. | 130 | * Overall external interface routines. |
131 | */ | 131 | */ |
132 | int xfs_attr_calc_size(struct xfs_inode *, int, int, int *); | ||
132 | int xfs_attr_inactive(struct xfs_inode *dp); | 133 | int xfs_attr_inactive(struct xfs_inode *dp); |
133 | int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int); | 134 | int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int); |
134 | int xfs_attr_rmtval_get(struct xfs_da_args *args); | 135 | int xfs_attr_rmtval_get(struct xfs_da_args *args); |
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 23ef5d7c87e1..79da6b2ea99e 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c | |||
@@ -2498,9 +2498,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) | |||
2498 | /* | 2498 | /* |
2499 | * Commit the flag value change and start the next trans in series. | 2499 | * Commit the flag value change and start the next trans in series. |
2500 | */ | 2500 | */ |
2501 | error = xfs_attr_rolltrans(&args->trans, args->dp); | 2501 | return xfs_trans_roll(&args->trans, args->dp); |
2502 | |||
2503 | return(error); | ||
2504 | } | 2502 | } |
2505 | 2503 | ||
2506 | /* | 2504 | /* |
@@ -2547,9 +2545,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args) | |||
2547 | /* | 2545 | /* |
2548 | * Commit the flag value change and start the next trans in series. | 2546 | * Commit the flag value change and start the next trans in series. |
2549 | */ | 2547 | */ |
2550 | error = xfs_attr_rolltrans(&args->trans, args->dp); | 2548 | return xfs_trans_roll(&args->trans, args->dp); |
2551 | |||
2552 | return(error); | ||
2553 | } | 2549 | } |
2554 | 2550 | ||
2555 | /* | 2551 | /* |
@@ -2665,7 +2661,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args) | |||
2665 | /* | 2661 | /* |
2666 | * Commit the flag value change and start the next trans in series. | 2662 | * Commit the flag value change and start the next trans in series. |
2667 | */ | 2663 | */ |
2668 | error = xfs_attr_rolltrans(&args->trans, args->dp); | 2664 | error = xfs_trans_roll(&args->trans, args->dp); |
2669 | 2665 | ||
2670 | return(error); | 2666 | return(error); |
2671 | } | 2667 | } |
@@ -2723,7 +2719,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp) | |||
2723 | /* | 2719 | /* |
2724 | * Commit the invalidate and start the next transaction. | 2720 | * Commit the invalidate and start the next transaction. |
2725 | */ | 2721 | */ |
2726 | error = xfs_attr_rolltrans(trans, dp); | 2722 | error = xfs_trans_roll(trans, dp); |
2727 | 2723 | ||
2728 | return (error); | 2724 | return (error); |
2729 | } | 2725 | } |
@@ -2825,7 +2821,8 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, | |||
2825 | /* | 2821 | /* |
2826 | * Atomically commit the whole invalidate stuff. | 2822 | * Atomically commit the whole invalidate stuff. |
2827 | */ | 2823 | */ |
2828 | if ((error = xfs_attr_rolltrans(trans, dp))) | 2824 | error = xfs_trans_roll(trans, dp); |
2825 | if (error) | ||
2829 | return (error); | 2826 | return (error); |
2830 | } | 2827 | } |
2831 | 2828 | ||
@@ -2964,7 +2961,8 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, | |||
2964 | /* | 2961 | /* |
2965 | * Roll to next transaction. | 2962 | * Roll to next transaction. |
2966 | */ | 2963 | */ |
2967 | if ((error = xfs_attr_rolltrans(trans, dp))) | 2964 | error = xfs_trans_roll(trans, dp); |
2965 | if (error) | ||
2968 | return (error); | 2966 | return (error); |
2969 | } | 2967 | } |
2970 | 2968 | ||
@@ -2974,60 +2972,3 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, | |||
2974 | 2972 | ||
2975 | return(0); | 2973 | return(0); |
2976 | } | 2974 | } |
2977 | |||
2978 | |||
2979 | /* | ||
2980 | * Roll from one trans in the sequence of PERMANENT transactions to the next. | ||
2981 | */ | ||
2982 | int | ||
2983 | xfs_attr_rolltrans(xfs_trans_t **transp, xfs_inode_t *dp) | ||
2984 | { | ||
2985 | xfs_trans_t *trans; | ||
2986 | unsigned int logres, count; | ||
2987 | int error; | ||
2988 | |||
2989 | /* | ||
2990 | * Ensure that the inode is always logged. | ||
2991 | */ | ||
2992 | trans = *transp; | ||
2993 | xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); | ||
2994 | |||
2995 | /* | ||
2996 | * Copy the critical parameters from one trans to the next. | ||
2997 | */ | ||
2998 | logres = trans->t_log_res; | ||
2999 | count = trans->t_log_count; | ||
3000 | *transp = xfs_trans_dup(trans); | ||
3001 | |||
3002 | /* | ||
3003 | * Commit the current transaction. | ||
3004 | * If this commit failed, then it'd just unlock those items that | ||
3005 | * are not marked ihold. That also means that a filesystem shutdown | ||
3006 | * is in progress. The caller takes the responsibility to cancel | ||
3007 | * the duplicate transaction that gets returned. | ||
3008 | */ | ||
3009 | if ((error = xfs_trans_commit(trans, 0))) | ||
3010 | return (error); | ||
3011 | |||
3012 | trans = *transp; | ||
3013 | |||
3014 | /* | ||
3015 | * Reserve space in the log for th next transaction. | ||
3016 | * This also pushes items in the "AIL", the list of logged items, | ||
3017 | * out to disk if they are taking up space at the tail of the log | ||
3018 | * that we want to use. This requires that either nothing be locked | ||
3019 | * across this call, or that anything that is locked be logged in | ||
3020 | * the prior and the next transactions. | ||
3021 | */ | ||
3022 | error = xfs_trans_reserve(trans, 0, logres, 0, | ||
3023 | XFS_TRANS_PERM_LOG_RES, count); | ||
3024 | /* | ||
3025 | * Ensure that the inode is in the new transaction and locked. | ||
3026 | */ | ||
3027 | if (!error) { | ||
3028 | xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL); | ||
3029 | xfs_trans_ihold(trans, dp); | ||
3030 | } | ||
3031 | return (error); | ||
3032 | |||
3033 | } | ||
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h index 5ecf437b7825..83e9af417ca2 100644 --- a/fs/xfs/xfs_attr_leaf.h +++ b/fs/xfs/xfs_attr_leaf.h | |||
@@ -274,6 +274,4 @@ int xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp, | |||
274 | struct xfs_dabuf *leaf2_bp); | 274 | struct xfs_dabuf *leaf2_bp); |
275 | int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, | 275 | int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, |
276 | int *local); | 276 | int *local); |
277 | int xfs_attr_rolltrans(struct xfs_trans **transp, struct xfs_inode *dp); | ||
278 | |||
279 | #endif /* __XFS_ATTR_LEAF_H__ */ | 277 | #endif /* __XFS_ATTR_LEAF_H__ */ |
diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c index fab0b6d5a41b..48228848f5ae 100644 --- a/fs/xfs/xfs_bit.c +++ b/fs/xfs/xfs_bit.c | |||
@@ -25,109 +25,6 @@ | |||
25 | * XFS bit manipulation routines, used in non-realtime code. | 25 | * XFS bit manipulation routines, used in non-realtime code. |
26 | */ | 26 | */ |
27 | 27 | ||
28 | #ifndef HAVE_ARCH_HIGHBIT | ||
29 | /* | ||
30 | * Index of high bit number in byte, -1 for none set, 0..7 otherwise. | ||
31 | */ | ||
32 | static const char xfs_highbit[256] = { | ||
33 | -1, 0, 1, 1, 2, 2, 2, 2, /* 00 .. 07 */ | ||
34 | 3, 3, 3, 3, 3, 3, 3, 3, /* 08 .. 0f */ | ||
35 | 4, 4, 4, 4, 4, 4, 4, 4, /* 10 .. 17 */ | ||
36 | 4, 4, 4, 4, 4, 4, 4, 4, /* 18 .. 1f */ | ||
37 | 5, 5, 5, 5, 5, 5, 5, 5, /* 20 .. 27 */ | ||
38 | 5, 5, 5, 5, 5, 5, 5, 5, /* 28 .. 2f */ | ||
39 | 5, 5, 5, 5, 5, 5, 5, 5, /* 30 .. 37 */ | ||
40 | 5, 5, 5, 5, 5, 5, 5, 5, /* 38 .. 3f */ | ||
41 | 6, 6, 6, 6, 6, 6, 6, 6, /* 40 .. 47 */ | ||
42 | 6, 6, 6, 6, 6, 6, 6, 6, /* 48 .. 4f */ | ||
43 | 6, 6, 6, 6, 6, 6, 6, 6, /* 50 .. 57 */ | ||
44 | 6, 6, 6, 6, 6, 6, 6, 6, /* 58 .. 5f */ | ||
45 | 6, 6, 6, 6, 6, 6, 6, 6, /* 60 .. 67 */ | ||
46 | 6, 6, 6, 6, 6, 6, 6, 6, /* 68 .. 6f */ | ||
47 | 6, 6, 6, 6, 6, 6, 6, 6, /* 70 .. 77 */ | ||
48 | 6, 6, 6, 6, 6, 6, 6, 6, /* 78 .. 7f */ | ||
49 | 7, 7, 7, 7, 7, 7, 7, 7, /* 80 .. 87 */ | ||
50 | 7, 7, 7, 7, 7, 7, 7, 7, /* 88 .. 8f */ | ||
51 | 7, 7, 7, 7, 7, 7, 7, 7, /* 90 .. 97 */ | ||
52 | 7, 7, 7, 7, 7, 7, 7, 7, /* 98 .. 9f */ | ||
53 | 7, 7, 7, 7, 7, 7, 7, 7, /* a0 .. a7 */ | ||
54 | 7, 7, 7, 7, 7, 7, 7, 7, /* a8 .. af */ | ||
55 | 7, 7, 7, 7, 7, 7, 7, 7, /* b0 .. b7 */ | ||
56 | 7, 7, 7, 7, 7, 7, 7, 7, /* b8 .. bf */ | ||
57 | 7, 7, 7, 7, 7, 7, 7, 7, /* c0 .. c7 */ | ||
58 | 7, 7, 7, 7, 7, 7, 7, 7, /* c8 .. cf */ | ||
59 | 7, 7, 7, 7, 7, 7, 7, 7, /* d0 .. d7 */ | ||
60 | 7, 7, 7, 7, 7, 7, 7, 7, /* d8 .. df */ | ||
61 | 7, 7, 7, 7, 7, 7, 7, 7, /* e0 .. e7 */ | ||
62 | 7, 7, 7, 7, 7, 7, 7, 7, /* e8 .. ef */ | ||
63 | 7, 7, 7, 7, 7, 7, 7, 7, /* f0 .. f7 */ | ||
64 | 7, 7, 7, 7, 7, 7, 7, 7, /* f8 .. ff */ | ||
65 | }; | ||
66 | #endif | ||
67 | |||
68 | /* | ||
69 | * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set. | ||
70 | */ | ||
71 | inline int | ||
72 | xfs_highbit32( | ||
73 | __uint32_t v) | ||
74 | { | ||
75 | #ifdef HAVE_ARCH_HIGHBIT | ||
76 | return highbit32(v); | ||
77 | #else | ||
78 | int i; | ||
79 | |||
80 | if (v & 0xffff0000) | ||
81 | if (v & 0xff000000) | ||
82 | i = 24; | ||
83 | else | ||
84 | i = 16; | ||
85 | else if (v & 0x0000ffff) | ||
86 | if (v & 0x0000ff00) | ||
87 | i = 8; | ||
88 | else | ||
89 | i = 0; | ||
90 | else | ||
91 | return -1; | ||
92 | return i + xfs_highbit[(v >> i) & 0xff]; | ||
93 | #endif | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set. | ||
98 | */ | ||
99 | int | ||
100 | xfs_lowbit64( | ||
101 | __uint64_t v) | ||
102 | { | ||
103 | __uint32_t w = (__uint32_t)v; | ||
104 | int n = 0; | ||
105 | |||
106 | if (w) { /* lower bits */ | ||
107 | n = ffs(w); | ||
108 | } else { /* upper bits */ | ||
109 | w = (__uint32_t)(v >> 32); | ||
110 | if (w && (n = ffs(w))) | ||
111 | n += 32; | ||
112 | } | ||
113 | return n - 1; | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set. | ||
118 | */ | ||
119 | int | ||
120 | xfs_highbit64( | ||
121 | __uint64_t v) | ||
122 | { | ||
123 | __uint32_t h = (__uint32_t)(v >> 32); | ||
124 | |||
125 | if (h) | ||
126 | return xfs_highbit32(h) + 32; | ||
127 | return xfs_highbit32((__uint32_t)v); | ||
128 | } | ||
129 | |||
130 | |||
131 | /* | 28 | /* |
132 | * Return whether bitmap is empty. | 29 | * Return whether bitmap is empty. |
133 | * Size is number of words in the bitmap, which is padded to word boundary | 30 | * Size is number of words in the bitmap, which is padded to word boundary |
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h index 082641a9782c..8e0e463dae2d 100644 --- a/fs/xfs/xfs_bit.h +++ b/fs/xfs/xfs_bit.h | |||
@@ -47,13 +47,39 @@ static inline __uint64_t xfs_mask64lo(int n) | |||
47 | } | 47 | } |
48 | 48 | ||
49 | /* Get high bit set out of 32-bit argument, -1 if none set */ | 49 | /* Get high bit set out of 32-bit argument, -1 if none set */ |
50 | extern int xfs_highbit32(__uint32_t v); | 50 | static inline int xfs_highbit32(__uint32_t v) |
51 | { | ||
52 | return fls(v) - 1; | ||
53 | } | ||
54 | |||
55 | /* Get high bit set out of 64-bit argument, -1 if none set */ | ||
56 | static inline int xfs_highbit64(__uint64_t v) | ||
57 | { | ||
58 | return fls64(v) - 1; | ||
59 | } | ||
60 | |||
61 | /* Get low bit set out of 32-bit argument, -1 if none set */ | ||
62 | static inline int xfs_lowbit32(__uint32_t v) | ||
63 | { | ||
64 | unsigned long t = v; | ||
65 | return (v) ? find_first_bit(&t, 32) : -1; | ||
66 | } | ||
51 | 67 | ||
52 | /* Get low bit set out of 64-bit argument, -1 if none set */ | 68 | /* Get low bit set out of 64-bit argument, -1 if none set */ |
53 | extern int xfs_lowbit64(__uint64_t v); | 69 | static inline int xfs_lowbit64(__uint64_t v) |
70 | { | ||
71 | __uint32_t w = (__uint32_t)v; | ||
72 | int n = 0; | ||
54 | 73 | ||
55 | /* Get high bit set out of 64-bit argument, -1 if none set */ | 74 | if (w) { /* lower bits */ |
56 | extern int xfs_highbit64(__uint64_t); | 75 | n = ffs(w); |
76 | } else { /* upper bits */ | ||
77 | w = (__uint32_t)(v >> 32); | ||
78 | if (w && (n = ffs(w))) | ||
79 | n += 32; | ||
80 | } | ||
81 | return n - 1; | ||
82 | } | ||
57 | 83 | ||
58 | /* Return whether bitmap is empty (1 == empty) */ | 84 | /* Return whether bitmap is empty (1 == empty) */ |
59 | extern int xfs_bitmap_empty(uint *map, uint size); | 85 | extern int xfs_bitmap_empty(uint *map, uint size); |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 3c4beb3a4326..a1aab9275d5a 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -384,14 +384,14 @@ xfs_bmap_count_tree( | |||
384 | int levelin, | 384 | int levelin, |
385 | int *count); | 385 | int *count); |
386 | 386 | ||
387 | STATIC int | 387 | STATIC void |
388 | xfs_bmap_count_leaves( | 388 | xfs_bmap_count_leaves( |
389 | xfs_ifork_t *ifp, | 389 | xfs_ifork_t *ifp, |
390 | xfs_extnum_t idx, | 390 | xfs_extnum_t idx, |
391 | int numrecs, | 391 | int numrecs, |
392 | int *count); | 392 | int *count); |
393 | 393 | ||
394 | STATIC int | 394 | STATIC void |
395 | xfs_bmap_disk_count_leaves( | 395 | xfs_bmap_disk_count_leaves( |
396 | xfs_extnum_t idx, | 396 | xfs_extnum_t idx, |
397 | xfs_bmbt_block_t *block, | 397 | xfs_bmbt_block_t *block, |
@@ -4000,7 +4000,7 @@ xfs_bmap_add_attrfork( | |||
4000 | ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; | 4000 | ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; |
4001 | } | 4001 | } |
4002 | ASSERT(ip->i_d.di_anextents == 0); | 4002 | ASSERT(ip->i_d.di_anextents == 0); |
4003 | VN_HOLD(XFS_ITOV(ip)); | 4003 | IHOLD(ip); |
4004 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | 4004 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
4005 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 4005 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
4006 | switch (ip->i_d.di_format) { | 4006 | switch (ip->i_d.di_format) { |
@@ -6096,7 +6096,7 @@ xfs_bmap_get_bp( | |||
6096 | tp = cur->bc_tp; | 6096 | tp = cur->bc_tp; |
6097 | licp = &tp->t_items; | 6097 | licp = &tp->t_items; |
6098 | while (!bp && licp != NULL) { | 6098 | while (!bp && licp != NULL) { |
6099 | if (XFS_LIC_ARE_ALL_FREE(licp)) { | 6099 | if (xfs_lic_are_all_free(licp)) { |
6100 | licp = licp->lic_next; | 6100 | licp = licp->lic_next; |
6101 | continue; | 6101 | continue; |
6102 | } | 6102 | } |
@@ -6106,11 +6106,11 @@ xfs_bmap_get_bp( | |||
6106 | xfs_buf_log_item_t *bip; | 6106 | xfs_buf_log_item_t *bip; |
6107 | xfs_buf_t *lbp; | 6107 | xfs_buf_t *lbp; |
6108 | 6108 | ||
6109 | if (XFS_LIC_ISFREE(licp, i)) { | 6109 | if (xfs_lic_isfree(licp, i)) { |
6110 | continue; | 6110 | continue; |
6111 | } | 6111 | } |
6112 | 6112 | ||
6113 | lidp = XFS_LIC_SLOT(licp, i); | 6113 | lidp = xfs_lic_slot(licp, i); |
6114 | lip = lidp->lid_item; | 6114 | lip = lidp->lid_item; |
6115 | if (lip->li_type != XFS_LI_BUF) | 6115 | if (lip->li_type != XFS_LI_BUF) |
6116 | continue; | 6116 | continue; |
@@ -6367,13 +6367,9 @@ xfs_bmap_count_blocks( | |||
6367 | mp = ip->i_mount; | 6367 | mp = ip->i_mount; |
6368 | ifp = XFS_IFORK_PTR(ip, whichfork); | 6368 | ifp = XFS_IFORK_PTR(ip, whichfork); |
6369 | if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { | 6369 | if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { |
6370 | if (unlikely(xfs_bmap_count_leaves(ifp, 0, | 6370 | xfs_bmap_count_leaves(ifp, 0, |
6371 | ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), | 6371 | ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), |
6372 | count) < 0)) { | 6372 | count); |
6373 | XFS_ERROR_REPORT("xfs_bmap_count_blocks(1)", | ||
6374 | XFS_ERRLEVEL_LOW, mp); | ||
6375 | return XFS_ERROR(EFSCORRUPTED); | ||
6376 | } | ||
6377 | return 0; | 6373 | return 0; |
6378 | } | 6374 | } |
6379 | 6375 | ||
@@ -6454,13 +6450,7 @@ xfs_bmap_count_tree( | |||
6454 | for (;;) { | 6450 | for (;;) { |
6455 | nextbno = be64_to_cpu(block->bb_rightsib); | 6451 | nextbno = be64_to_cpu(block->bb_rightsib); |
6456 | numrecs = be16_to_cpu(block->bb_numrecs); | 6452 | numrecs = be16_to_cpu(block->bb_numrecs); |
6457 | if (unlikely(xfs_bmap_disk_count_leaves(0, | 6453 | xfs_bmap_disk_count_leaves(0, block, numrecs, count); |
6458 | block, numrecs, count) < 0)) { | ||
6459 | xfs_trans_brelse(tp, bp); | ||
6460 | XFS_ERROR_REPORT("xfs_bmap_count_tree(2)", | ||
6461 | XFS_ERRLEVEL_LOW, mp); | ||
6462 | return XFS_ERROR(EFSCORRUPTED); | ||
6463 | } | ||
6464 | xfs_trans_brelse(tp, bp); | 6454 | xfs_trans_brelse(tp, bp); |
6465 | if (nextbno == NULLFSBLOCK) | 6455 | if (nextbno == NULLFSBLOCK) |
6466 | break; | 6456 | break; |
@@ -6478,7 +6468,7 @@ xfs_bmap_count_tree( | |||
6478 | /* | 6468 | /* |
6479 | * Count leaf blocks given a range of extent records. | 6469 | * Count leaf blocks given a range of extent records. |
6480 | */ | 6470 | */ |
6481 | STATIC int | 6471 | STATIC void |
6482 | xfs_bmap_count_leaves( | 6472 | xfs_bmap_count_leaves( |
6483 | xfs_ifork_t *ifp, | 6473 | xfs_ifork_t *ifp, |
6484 | xfs_extnum_t idx, | 6474 | xfs_extnum_t idx, |
@@ -6491,14 +6481,13 @@ xfs_bmap_count_leaves( | |||
6491 | xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b); | 6481 | xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b); |
6492 | *count += xfs_bmbt_get_blockcount(frp); | 6482 | *count += xfs_bmbt_get_blockcount(frp); |
6493 | } | 6483 | } |
6494 | return 0; | ||
6495 | } | 6484 | } |
6496 | 6485 | ||
6497 | /* | 6486 | /* |
6498 | * Count leaf blocks given a range of extent records originally | 6487 | * Count leaf blocks given a range of extent records originally |
6499 | * in btree format. | 6488 | * in btree format. |
6500 | */ | 6489 | */ |
6501 | STATIC int | 6490 | STATIC void |
6502 | xfs_bmap_disk_count_leaves( | 6491 | xfs_bmap_disk_count_leaves( |
6503 | xfs_extnum_t idx, | 6492 | xfs_extnum_t idx, |
6504 | xfs_bmbt_block_t *block, | 6493 | xfs_bmbt_block_t *block, |
@@ -6512,5 +6501,4 @@ xfs_bmap_disk_count_leaves( | |||
6512 | frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b); | 6501 | frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b); |
6513 | *count += xfs_bmbt_disk_get_blockcount(frp); | 6502 | *count += xfs_bmbt_disk_get_blockcount(frp); |
6514 | } | 6503 | } |
6515 | return 0; | ||
6516 | } | 6504 | } |
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index aeb87ca69fcc..cc593a84c345 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c | |||
@@ -46,38 +46,11 @@ kmem_zone_t *xfs_btree_cur_zone; | |||
46 | /* | 46 | /* |
47 | * Btree magic numbers. | 47 | * Btree magic numbers. |
48 | */ | 48 | */ |
49 | const __uint32_t xfs_magics[XFS_BTNUM_MAX] = | 49 | const __uint32_t xfs_magics[XFS_BTNUM_MAX] = { |
50 | { | ||
51 | XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC | 50 | XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC |
52 | }; | 51 | }; |
53 | 52 | ||
54 | /* | 53 | /* |
55 | * Prototypes for internal routines. | ||
56 | */ | ||
57 | |||
58 | /* | ||
59 | * Checking routine: return maxrecs for the block. | ||
60 | */ | ||
61 | STATIC int /* number of records fitting in block */ | ||
62 | xfs_btree_maxrecs( | ||
63 | xfs_btree_cur_t *cur, /* btree cursor */ | ||
64 | xfs_btree_block_t *block);/* generic btree block pointer */ | ||
65 | |||
66 | /* | ||
67 | * Internal routines. | ||
68 | */ | ||
69 | |||
70 | /* | ||
71 | * Retrieve the block pointer from the cursor at the given level. | ||
72 | * This may be a bmap btree root or from a buffer. | ||
73 | */ | ||
74 | STATIC xfs_btree_block_t * /* generic btree block pointer */ | ||
75 | xfs_btree_get_block( | ||
76 | xfs_btree_cur_t *cur, /* btree cursor */ | ||
77 | int level, /* level in btree */ | ||
78 | struct xfs_buf **bpp); /* buffer containing the block */ | ||
79 | |||
80 | /* | ||
81 | * Checking routine: return maxrecs for the block. | 54 | * Checking routine: return maxrecs for the block. |
82 | */ | 55 | */ |
83 | STATIC int /* number of records fitting in block */ | 56 | STATIC int /* number of records fitting in block */ |
@@ -457,35 +430,6 @@ xfs_btree_dup_cursor( | |||
457 | } | 430 | } |
458 | 431 | ||
459 | /* | 432 | /* |
460 | * Change the cursor to point to the first record at the given level. | ||
461 | * Other levels are unaffected. | ||
462 | */ | ||
463 | int /* success=1, failure=0 */ | ||
464 | xfs_btree_firstrec( | ||
465 | xfs_btree_cur_t *cur, /* btree cursor */ | ||
466 | int level) /* level to change */ | ||
467 | { | ||
468 | xfs_btree_block_t *block; /* generic btree block pointer */ | ||
469 | xfs_buf_t *bp; /* buffer containing block */ | ||
470 | |||
471 | /* | ||
472 | * Get the block pointer for this level. | ||
473 | */ | ||
474 | block = xfs_btree_get_block(cur, level, &bp); | ||
475 | xfs_btree_check_block(cur, block, level, bp); | ||
476 | /* | ||
477 | * It's empty, there is no such record. | ||
478 | */ | ||
479 | if (!block->bb_h.bb_numrecs) | ||
480 | return 0; | ||
481 | /* | ||
482 | * Set the ptr value to 1, that's the first record/key. | ||
483 | */ | ||
484 | cur->bc_ptrs[level] = 1; | ||
485 | return 1; | ||
486 | } | ||
487 | |||
488 | /* | ||
489 | * Retrieve the block pointer from the cursor at the given level. | 433 | * Retrieve the block pointer from the cursor at the given level. |
490 | * This may be a bmap btree root or from a buffer. | 434 | * This may be a bmap btree root or from a buffer. |
491 | */ | 435 | */ |
@@ -626,6 +570,13 @@ xfs_btree_init_cursor( | |||
626 | cur->bc_private.a.agbp = agbp; | 570 | cur->bc_private.a.agbp = agbp; |
627 | cur->bc_private.a.agno = agno; | 571 | cur->bc_private.a.agno = agno; |
628 | break; | 572 | break; |
573 | case XFS_BTNUM_INO: | ||
574 | /* | ||
575 | * Inode allocation btree fields. | ||
576 | */ | ||
577 | cur->bc_private.a.agbp = agbp; | ||
578 | cur->bc_private.a.agno = agno; | ||
579 | break; | ||
629 | case XFS_BTNUM_BMAP: | 580 | case XFS_BTNUM_BMAP: |
630 | /* | 581 | /* |
631 | * Bmap btree fields. | 582 | * Bmap btree fields. |
@@ -638,13 +589,6 @@ xfs_btree_init_cursor( | |||
638 | cur->bc_private.b.flags = 0; | 589 | cur->bc_private.b.flags = 0; |
639 | cur->bc_private.b.whichfork = whichfork; | 590 | cur->bc_private.b.whichfork = whichfork; |
640 | break; | 591 | break; |
641 | case XFS_BTNUM_INO: | ||
642 | /* | ||
643 | * Inode allocation btree fields. | ||
644 | */ | ||
645 | cur->bc_private.i.agbp = agbp; | ||
646 | cur->bc_private.i.agno = agno; | ||
647 | break; | ||
648 | default: | 592 | default: |
649 | ASSERT(0); | 593 | ASSERT(0); |
650 | } | 594 | } |
@@ -671,6 +615,35 @@ xfs_btree_islastblock( | |||
671 | } | 615 | } |
672 | 616 | ||
673 | /* | 617 | /* |
618 | * Change the cursor to point to the first record at the given level. | ||
619 | * Other levels are unaffected. | ||
620 | */ | ||
621 | int /* success=1, failure=0 */ | ||
622 | xfs_btree_firstrec( | ||
623 | xfs_btree_cur_t *cur, /* btree cursor */ | ||
624 | int level) /* level to change */ | ||
625 | { | ||
626 | xfs_btree_block_t *block; /* generic btree block pointer */ | ||
627 | xfs_buf_t *bp; /* buffer containing block */ | ||
628 | |||
629 | /* | ||
630 | * Get the block pointer for this level. | ||
631 | */ | ||
632 | block = xfs_btree_get_block(cur, level, &bp); | ||
633 | xfs_btree_check_block(cur, block, level, bp); | ||
634 | /* | ||
635 | * It's empty, there is no such record. | ||
636 | */ | ||
637 | if (!block->bb_h.bb_numrecs) | ||
638 | return 0; | ||
639 | /* | ||
640 | * Set the ptr value to 1, that's the first record/key. | ||
641 | */ | ||
642 | cur->bc_ptrs[level] = 1; | ||
643 | return 1; | ||
644 | } | ||
645 | |||
646 | /* | ||
674 | * Change the cursor to point to the last record in the current block | 647 | * Change the cursor to point to the last record in the current block |
675 | * at the given level. Other levels are unaffected. | 648 | * at the given level. Other levels are unaffected. |
676 | */ | 649 | */ |
@@ -890,12 +863,12 @@ xfs_btree_readahead_core( | |||
890 | case XFS_BTNUM_INO: | 863 | case XFS_BTNUM_INO: |
891 | i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); | 864 | i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); |
892 | if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) { | 865 | if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) { |
893 | xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno, | 866 | xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, |
894 | be32_to_cpu(i->bb_leftsib), 1); | 867 | be32_to_cpu(i->bb_leftsib), 1); |
895 | rval++; | 868 | rval++; |
896 | } | 869 | } |
897 | if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) { | 870 | if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) { |
898 | xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno, | 871 | xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, |
899 | be32_to_cpu(i->bb_rightsib), 1); | 872 | be32_to_cpu(i->bb_rightsib), 1); |
900 | rval++; | 873 | rval++; |
901 | } | 874 | } |
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 7440b78f9cec..1f528a2a3754 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h | |||
@@ -158,8 +158,8 @@ typedef struct xfs_btree_cur | |||
158 | __uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */ | 158 | __uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */ |
159 | xfs_btnum_t bc_btnum; /* identifies which btree type */ | 159 | xfs_btnum_t bc_btnum; /* identifies which btree type */ |
160 | union { | 160 | union { |
161 | struct { /* needed for BNO, CNT */ | 161 | struct { /* needed for BNO, CNT, INO */ |
162 | struct xfs_buf *agbp; /* agf buffer pointer */ | 162 | struct xfs_buf *agbp; /* agf/agi buffer pointer */ |
163 | xfs_agnumber_t agno; /* ag number */ | 163 | xfs_agnumber_t agno; /* ag number */ |
164 | } a; | 164 | } a; |
165 | struct { /* needed for BMAP */ | 165 | struct { /* needed for BMAP */ |
@@ -172,10 +172,6 @@ typedef struct xfs_btree_cur | |||
172 | char flags; /* flags */ | 172 | char flags; /* flags */ |
173 | #define XFS_BTCUR_BPRV_WASDEL 1 /* was delayed */ | 173 | #define XFS_BTCUR_BPRV_WASDEL 1 /* was delayed */ |
174 | } b; | 174 | } b; |
175 | struct { /* needed for INO */ | ||
176 | struct xfs_buf *agbp; /* agi buffer pointer */ | ||
177 | xfs_agnumber_t agno; /* ag number */ | ||
178 | } i; | ||
179 | } bc_private; /* per-btree type data */ | 175 | } bc_private; /* per-btree type data */ |
180 | } xfs_btree_cur_t; | 176 | } xfs_btree_cur_t; |
181 | 177 | ||
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index d86ca2c03a70..002fc2617c8e 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -732,12 +732,13 @@ xfs_buf_item_init( | |||
732 | bip->bli_item.li_ops = &xfs_buf_item_ops; | 732 | bip->bli_item.li_ops = &xfs_buf_item_ops; |
733 | bip->bli_item.li_mountp = mp; | 733 | bip->bli_item.li_mountp = mp; |
734 | bip->bli_buf = bp; | 734 | bip->bli_buf = bp; |
735 | xfs_buf_hold(bp); | ||
735 | bip->bli_format.blf_type = XFS_LI_BUF; | 736 | bip->bli_format.blf_type = XFS_LI_BUF; |
736 | bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); | 737 | bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); |
737 | bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); | 738 | bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); |
738 | bip->bli_format.blf_map_size = map_size; | 739 | bip->bli_format.blf_map_size = map_size; |
739 | #ifdef XFS_BLI_TRACE | 740 | #ifdef XFS_BLI_TRACE |
740 | bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_SLEEP); | 741 | bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_NOFS); |
741 | #endif | 742 | #endif |
742 | 743 | ||
743 | #ifdef XFS_TRANS_DEBUG | 744 | #ifdef XFS_TRANS_DEBUG |
@@ -867,6 +868,21 @@ xfs_buf_item_dirty( | |||
867 | return (bip->bli_flags & XFS_BLI_DIRTY); | 868 | return (bip->bli_flags & XFS_BLI_DIRTY); |
868 | } | 869 | } |
869 | 870 | ||
871 | STATIC void | ||
872 | xfs_buf_item_free( | ||
873 | xfs_buf_log_item_t *bip) | ||
874 | { | ||
875 | #ifdef XFS_TRANS_DEBUG | ||
876 | kmem_free(bip->bli_orig); | ||
877 | kmem_free(bip->bli_logged); | ||
878 | #endif /* XFS_TRANS_DEBUG */ | ||
879 | |||
880 | #ifdef XFS_BLI_TRACE | ||
881 | ktrace_free(bip->bli_trace); | ||
882 | #endif | ||
883 | kmem_zone_free(xfs_buf_item_zone, bip); | ||
884 | } | ||
885 | |||
870 | /* | 886 | /* |
871 | * This is called when the buf log item is no longer needed. It should | 887 | * This is called when the buf log item is no longer needed. It should |
872 | * free the buf log item associated with the given buffer and clear | 888 | * free the buf log item associated with the given buffer and clear |
@@ -887,18 +903,8 @@ xfs_buf_item_relse( | |||
887 | (XFS_BUF_IODONE_FUNC(bp) != NULL)) { | 903 | (XFS_BUF_IODONE_FUNC(bp) != NULL)) { |
888 | XFS_BUF_CLR_IODONE_FUNC(bp); | 904 | XFS_BUF_CLR_IODONE_FUNC(bp); |
889 | } | 905 | } |
890 | 906 | xfs_buf_rele(bp); | |
891 | #ifdef XFS_TRANS_DEBUG | 907 | xfs_buf_item_free(bip); |
892 | kmem_free(bip->bli_orig); | ||
893 | bip->bli_orig = NULL; | ||
894 | kmem_free(bip->bli_logged); | ||
895 | bip->bli_logged = NULL; | ||
896 | #endif /* XFS_TRANS_DEBUG */ | ||
897 | |||
898 | #ifdef XFS_BLI_TRACE | ||
899 | ktrace_free(bip->bli_trace); | ||
900 | #endif | ||
901 | kmem_zone_free(xfs_buf_item_zone, bip); | ||
902 | } | 908 | } |
903 | 909 | ||
904 | 910 | ||
@@ -1056,7 +1062,7 @@ xfs_buf_iodone_callbacks( | |||
1056 | anyway. */ | 1062 | anyway. */ |
1057 | XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse); | 1063 | XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse); |
1058 | XFS_BUF_DONE(bp); | 1064 | XFS_BUF_DONE(bp); |
1059 | XFS_BUF_V_IODONESEMA(bp); | 1065 | XFS_BUF_FINISH_IOWAIT(bp); |
1060 | } | 1066 | } |
1061 | return; | 1067 | return; |
1062 | } | 1068 | } |
@@ -1120,6 +1126,7 @@ xfs_buf_iodone( | |||
1120 | 1126 | ||
1121 | ASSERT(bip->bli_buf == bp); | 1127 | ASSERT(bip->bli_buf == bp); |
1122 | 1128 | ||
1129 | xfs_buf_rele(bp); | ||
1123 | mp = bip->bli_item.li_mountp; | 1130 | mp = bip->bli_item.li_mountp; |
1124 | 1131 | ||
1125 | /* | 1132 | /* |
@@ -1136,18 +1143,7 @@ xfs_buf_iodone( | |||
1136 | * xfs_trans_delete_ail() drops the AIL lock. | 1143 | * xfs_trans_delete_ail() drops the AIL lock. |
1137 | */ | 1144 | */ |
1138 | xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); | 1145 | xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); |
1139 | 1146 | xfs_buf_item_free(bip); | |
1140 | #ifdef XFS_TRANS_DEBUG | ||
1141 | kmem_free(bip->bli_orig); | ||
1142 | bip->bli_orig = NULL; | ||
1143 | kmem_free(bip->bli_logged); | ||
1144 | bip->bli_logged = NULL; | ||
1145 | #endif /* XFS_TRANS_DEBUG */ | ||
1146 | |||
1147 | #ifdef XFS_BLI_TRACE | ||
1148 | ktrace_free(bip->bli_trace); | ||
1149 | #endif | ||
1150 | kmem_zone_free(xfs_buf_item_zone, bip); | ||
1151 | } | 1147 | } |
1152 | 1148 | ||
1153 | #if defined(XFS_BLI_TRACE) | 1149 | #if defined(XFS_BLI_TRACE) |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 2211e885ef24..75b0cd4da0ea 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -128,10 +128,8 @@ xfs_swap_extents( | |||
128 | xfs_swapext_t *sxp) | 128 | xfs_swapext_t *sxp) |
129 | { | 129 | { |
130 | xfs_mount_t *mp; | 130 | xfs_mount_t *mp; |
131 | xfs_inode_t *ips[2]; | ||
132 | xfs_trans_t *tp; | 131 | xfs_trans_t *tp; |
133 | xfs_bstat_t *sbp = &sxp->sx_stat; | 132 | xfs_bstat_t *sbp = &sxp->sx_stat; |
134 | bhv_vnode_t *vp, *tvp; | ||
135 | xfs_ifork_t *tempifp, *ifp, *tifp; | 133 | xfs_ifork_t *tempifp, *ifp, *tifp; |
136 | int ilf_fields, tilf_fields; | 134 | int ilf_fields, tilf_fields; |
137 | static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; | 135 | static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; |
@@ -150,19 +148,15 @@ xfs_swap_extents( | |||
150 | } | 148 | } |
151 | 149 | ||
152 | sbp = &sxp->sx_stat; | 150 | sbp = &sxp->sx_stat; |
153 | vp = XFS_ITOV(ip); | ||
154 | tvp = XFS_ITOV(tip); | ||
155 | |||
156 | /* Lock in i_ino order */ | ||
157 | if (ip->i_ino < tip->i_ino) { | ||
158 | ips[0] = ip; | ||
159 | ips[1] = tip; | ||
160 | } else { | ||
161 | ips[0] = tip; | ||
162 | ips[1] = ip; | ||
163 | } | ||
164 | 151 | ||
165 | xfs_lock_inodes(ips, 2, lock_flags); | 152 | /* |
153 | * we have to do two separate lock calls here to keep lockdep | ||
154 | * happy. If we try to get all the locks in one call, lock will | ||
155 | * report false positives when we drop the ILOCK and regain them | ||
156 | * below. | ||
157 | */ | ||
158 | xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); | ||
159 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); | ||
166 | locked = 1; | 160 | locked = 1; |
167 | 161 | ||
168 | /* Verify that both files have the same format */ | 162 | /* Verify that both files have the same format */ |
@@ -184,7 +178,7 @@ xfs_swap_extents( | |||
184 | goto error0; | 178 | goto error0; |
185 | } | 179 | } |
186 | 180 | ||
187 | if (VN_CACHED(tvp) != 0) { | 181 | if (VN_CACHED(VFS_I(tip)) != 0) { |
188 | xfs_inval_cached_trace(tip, 0, -1, 0, -1); | 182 | xfs_inval_cached_trace(tip, 0, -1, 0, -1); |
189 | error = xfs_flushinval_pages(tip, 0, -1, | 183 | error = xfs_flushinval_pages(tip, 0, -1, |
190 | FI_REMAPF_LOCKED); | 184 | FI_REMAPF_LOCKED); |
@@ -193,7 +187,7 @@ xfs_swap_extents( | |||
193 | } | 187 | } |
194 | 188 | ||
195 | /* Verify O_DIRECT for ftmp */ | 189 | /* Verify O_DIRECT for ftmp */ |
196 | if (VN_CACHED(tvp) != 0) { | 190 | if (VN_CACHED(VFS_I(tip)) != 0) { |
197 | error = XFS_ERROR(EINVAL); | 191 | error = XFS_ERROR(EINVAL); |
198 | goto error0; | 192 | goto error0; |
199 | } | 193 | } |
@@ -237,7 +231,7 @@ xfs_swap_extents( | |||
237 | * vop_read (or write in the case of autogrow) they block on the iolock | 231 | * vop_read (or write in the case of autogrow) they block on the iolock |
238 | * until we have switched the extents. | 232 | * until we have switched the extents. |
239 | */ | 233 | */ |
240 | if (VN_MAPPED(vp)) { | 234 | if (VN_MAPPED(VFS_I(ip))) { |
241 | error = XFS_ERROR(EBUSY); | 235 | error = XFS_ERROR(EBUSY); |
242 | goto error0; | 236 | goto error0; |
243 | } | 237 | } |
@@ -265,7 +259,7 @@ xfs_swap_extents( | |||
265 | locked = 0; | 259 | locked = 0; |
266 | goto error0; | 260 | goto error0; |
267 | } | 261 | } |
268 | xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); | 262 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); |
269 | 263 | ||
270 | /* | 264 | /* |
271 | * Count the number of extended attribute blocks | 265 | * Count the number of extended attribute blocks |
@@ -350,15 +344,11 @@ xfs_swap_extents( | |||
350 | break; | 344 | break; |
351 | } | 345 | } |
352 | 346 | ||
353 | /* | ||
354 | * Increment vnode ref counts since xfs_trans_commit & | ||
355 | * xfs_trans_cancel will both unlock the inodes and | ||
356 | * decrement the associated ref counts. | ||
357 | */ | ||
358 | VN_HOLD(vp); | ||
359 | VN_HOLD(tvp); | ||
360 | 347 | ||
348 | IHOLD(ip); | ||
361 | xfs_trans_ijoin(tp, ip, lock_flags); | 349 | xfs_trans_ijoin(tp, ip, lock_flags); |
350 | |||
351 | IHOLD(tip); | ||
362 | xfs_trans_ijoin(tp, tip, lock_flags); | 352 | xfs_trans_ijoin(tp, tip, lock_flags); |
363 | 353 | ||
364 | xfs_trans_log_inode(tp, ip, ilf_fields); | 354 | xfs_trans_log_inode(tp, ip, ilf_fields); |
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h index cdc2d3464a1a..2813cdd72375 100644 --- a/fs/xfs/xfs_dmapi.h +++ b/fs/xfs/xfs_dmapi.h | |||
@@ -18,7 +18,6 @@ | |||
18 | #ifndef __XFS_DMAPI_H__ | 18 | #ifndef __XFS_DMAPI_H__ |
19 | #define __XFS_DMAPI_H__ | 19 | #define __XFS_DMAPI_H__ |
20 | 20 | ||
21 | #include <linux/version.h> | ||
22 | /* Values used to define the on-disk version of dm_attrname_t. All | 21 | /* Values used to define the on-disk version of dm_attrname_t. All |
23 | * on-disk attribute names start with the 8-byte string "SGI_DMI_". | 22 | * on-disk attribute names start with the 8-byte string "SGI_DMI_". |
24 | * | 23 | * |
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index f66756cfb5e8..f227ecd1a294 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c | |||
@@ -58,9 +58,6 @@ xfs_error_trap(int e) | |||
58 | } | 58 | } |
59 | return e; | 59 | return e; |
60 | } | 60 | } |
61 | #endif | ||
62 | |||
63 | #if (defined(DEBUG) || defined(INDUCE_IO_ERROR)) | ||
64 | 61 | ||
65 | int xfs_etest[XFS_NUM_INJECT_ERROR]; | 62 | int xfs_etest[XFS_NUM_INJECT_ERROR]; |
66 | int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; | 63 | int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; |
@@ -154,7 +151,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud) | |||
154 | 151 | ||
155 | return 0; | 152 | return 0; |
156 | } | 153 | } |
157 | #endif /* DEBUG || INDUCE_IO_ERROR */ | 154 | #endif /* DEBUG */ |
158 | 155 | ||
159 | static void | 156 | static void |
160 | xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap) | 157 | xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap) |
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index d8559d132efa..11543f10b0c6 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h | |||
@@ -125,22 +125,14 @@ extern void xfs_corruption_error(char *tag, int level, struct xfs_mount *mp, | |||
125 | #define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10) | 125 | #define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10) |
126 | #define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT | 126 | #define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT |
127 | 127 | ||
128 | #if (defined(DEBUG) || defined(INDUCE_IO_ERROR)) | 128 | #ifdef DEBUG |
129 | extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); | 129 | extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); |
130 | 130 | ||
131 | #define XFS_NUM_INJECT_ERROR 10 | 131 | #define XFS_NUM_INJECT_ERROR 10 |
132 | |||
133 | #ifdef __ANSI_CPP__ | ||
134 | #define XFS_TEST_ERROR(expr, mp, tag, rf) \ | ||
135 | ((expr) || \ | ||
136 | xfs_error_test((tag), (mp)->m_fixedfsid, #expr, __LINE__, __FILE__, \ | ||
137 | (rf))) | ||
138 | #else | ||
139 | #define XFS_TEST_ERROR(expr, mp, tag, rf) \ | 132 | #define XFS_TEST_ERROR(expr, mp, tag, rf) \ |
140 | ((expr) || \ | 133 | ((expr) || \ |
141 | xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ | 134 | xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ |
142 | (rf))) | 135 | (rf))) |
143 | #endif /* __ANSI_CPP__ */ | ||
144 | 136 | ||
145 | extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); | 137 | extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); |
146 | extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); | 138 | extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); |
@@ -148,7 +140,7 @@ extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); | |||
148 | #define XFS_TEST_ERROR(expr, mp, tag, rf) (expr) | 140 | #define XFS_TEST_ERROR(expr, mp, tag, rf) (expr) |
149 | #define xfs_errortag_add(tag, mp) (ENOSYS) | 141 | #define xfs_errortag_add(tag, mp) (ENOSYS) |
150 | #define xfs_errortag_clearall(mp, loud) (ENOSYS) | 142 | #define xfs_errortag_clearall(mp, loud) (ENOSYS) |
151 | #endif /* (DEBUG || INDUCE_IO_ERROR) */ | 143 | #endif /* DEBUG */ |
152 | 144 | ||
153 | /* | 145 | /* |
154 | * XFS panic tags -- allow a call to xfs_cmn_err() be turned into | 146 | * XFS panic tags -- allow a call to xfs_cmn_err() be turned into |
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index c38fd14fca29..f3bb75da384e 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c | |||
@@ -400,7 +400,7 @@ xfs_filestream_init(void) | |||
400 | if (!item_zone) | 400 | if (!item_zone) |
401 | return -ENOMEM; | 401 | return -ENOMEM; |
402 | #ifdef XFS_FILESTREAMS_TRACE | 402 | #ifdef XFS_FILESTREAMS_TRACE |
403 | xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP); | 403 | xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_NOFS); |
404 | #endif | 404 | #endif |
405 | return 0; | 405 | return 0; |
406 | } | 406 | } |
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index e5310c90e50f..83502f3edef0 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c | |||
@@ -181,7 +181,7 @@ xfs_inobt_delrec( | |||
181 | * then we can get rid of this level. | 181 | * then we can get rid of this level. |
182 | */ | 182 | */ |
183 | if (numrecs == 1 && level > 0) { | 183 | if (numrecs == 1 && level > 0) { |
184 | agbp = cur->bc_private.i.agbp; | 184 | agbp = cur->bc_private.a.agbp; |
185 | agi = XFS_BUF_TO_AGI(agbp); | 185 | agi = XFS_BUF_TO_AGI(agbp); |
186 | /* | 186 | /* |
187 | * pp is still set to the first pointer in the block. | 187 | * pp is still set to the first pointer in the block. |
@@ -194,7 +194,7 @@ xfs_inobt_delrec( | |||
194 | * Free the block. | 194 | * Free the block. |
195 | */ | 195 | */ |
196 | if ((error = xfs_free_extent(cur->bc_tp, | 196 | if ((error = xfs_free_extent(cur->bc_tp, |
197 | XFS_AGB_TO_FSB(mp, cur->bc_private.i.agno, bno), 1))) | 197 | XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, bno), 1))) |
198 | return error; | 198 | return error; |
199 | xfs_trans_binval(cur->bc_tp, bp); | 199 | xfs_trans_binval(cur->bc_tp, bp); |
200 | xfs_ialloc_log_agi(cur->bc_tp, agbp, | 200 | xfs_ialloc_log_agi(cur->bc_tp, agbp, |
@@ -379,7 +379,7 @@ xfs_inobt_delrec( | |||
379 | rrecs = be16_to_cpu(right->bb_numrecs); | 379 | rrecs = be16_to_cpu(right->bb_numrecs); |
380 | rbp = bp; | 380 | rbp = bp; |
381 | if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, | 381 | if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, |
382 | cur->bc_private.i.agno, lbno, 0, &lbp, | 382 | cur->bc_private.a.agno, lbno, 0, &lbp, |
383 | XFS_INO_BTREE_REF))) | 383 | XFS_INO_BTREE_REF))) |
384 | return error; | 384 | return error; |
385 | left = XFS_BUF_TO_INOBT_BLOCK(lbp); | 385 | left = XFS_BUF_TO_INOBT_BLOCK(lbp); |
@@ -401,7 +401,7 @@ xfs_inobt_delrec( | |||
401 | lrecs = be16_to_cpu(left->bb_numrecs); | 401 | lrecs = be16_to_cpu(left->bb_numrecs); |
402 | lbp = bp; | 402 | lbp = bp; |
403 | if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, | 403 | if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, |
404 | cur->bc_private.i.agno, rbno, 0, &rbp, | 404 | cur->bc_private.a.agno, rbno, 0, &rbp, |
405 | XFS_INO_BTREE_REF))) | 405 | XFS_INO_BTREE_REF))) |
406 | return error; | 406 | return error; |
407 | right = XFS_BUF_TO_INOBT_BLOCK(rbp); | 407 | right = XFS_BUF_TO_INOBT_BLOCK(rbp); |
@@ -484,7 +484,7 @@ xfs_inobt_delrec( | |||
484 | xfs_buf_t *rrbp; | 484 | xfs_buf_t *rrbp; |
485 | 485 | ||
486 | if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, | 486 | if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, |
487 | cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib), 0, | 487 | cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0, |
488 | &rrbp, XFS_INO_BTREE_REF))) | 488 | &rrbp, XFS_INO_BTREE_REF))) |
489 | return error; | 489 | return error; |
490 | rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp); | 490 | rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp); |
@@ -497,7 +497,7 @@ xfs_inobt_delrec( | |||
497 | * Free the deleting block. | 497 | * Free the deleting block. |
498 | */ | 498 | */ |
499 | if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp, | 499 | if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp, |
500 | cur->bc_private.i.agno, rbno), 1))) | 500 | cur->bc_private.a.agno, rbno), 1))) |
501 | return error; | 501 | return error; |
502 | xfs_trans_binval(cur->bc_tp, rbp); | 502 | xfs_trans_binval(cur->bc_tp, rbp); |
503 | /* | 503 | /* |
@@ -854,7 +854,7 @@ xfs_inobt_lookup( | |||
854 | { | 854 | { |
855 | xfs_agi_t *agi; /* a.g. inode header */ | 855 | xfs_agi_t *agi; /* a.g. inode header */ |
856 | 856 | ||
857 | agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp); | 857 | agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); |
858 | agno = be32_to_cpu(agi->agi_seqno); | 858 | agno = be32_to_cpu(agi->agi_seqno); |
859 | agbno = be32_to_cpu(agi->agi_root); | 859 | agbno = be32_to_cpu(agi->agi_root); |
860 | } | 860 | } |
@@ -1089,7 +1089,7 @@ xfs_inobt_lshift( | |||
1089 | * Set up the left neighbor as "left". | 1089 | * Set up the left neighbor as "left". |
1090 | */ | 1090 | */ |
1091 | if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, | 1091 | if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, |
1092 | cur->bc_private.i.agno, be32_to_cpu(right->bb_leftsib), | 1092 | cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib), |
1093 | 0, &lbp, XFS_INO_BTREE_REF))) | 1093 | 0, &lbp, XFS_INO_BTREE_REF))) |
1094 | return error; | 1094 | return error; |
1095 | left = XFS_BUF_TO_INOBT_BLOCK(lbp); | 1095 | left = XFS_BUF_TO_INOBT_BLOCK(lbp); |
@@ -1207,10 +1207,10 @@ xfs_inobt_newroot( | |||
1207 | /* | 1207 | /* |
1208 | * Get a block & a buffer. | 1208 | * Get a block & a buffer. |
1209 | */ | 1209 | */ |
1210 | agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp); | 1210 | agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); |
1211 | args.tp = cur->bc_tp; | 1211 | args.tp = cur->bc_tp; |
1212 | args.mp = cur->bc_mp; | 1212 | args.mp = cur->bc_mp; |
1213 | args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, | 1213 | args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, |
1214 | be32_to_cpu(agi->agi_root)); | 1214 | be32_to_cpu(agi->agi_root)); |
1215 | args.mod = args.minleft = args.alignment = args.total = args.wasdel = | 1215 | args.mod = args.minleft = args.alignment = args.total = args.wasdel = |
1216 | args.isfl = args.userdata = args.minalignslop = 0; | 1216 | args.isfl = args.userdata = args.minalignslop = 0; |
@@ -1233,7 +1233,7 @@ xfs_inobt_newroot( | |||
1233 | */ | 1233 | */ |
1234 | agi->agi_root = cpu_to_be32(args.agbno); | 1234 | agi->agi_root = cpu_to_be32(args.agbno); |
1235 | be32_add_cpu(&agi->agi_level, 1); | 1235 | be32_add_cpu(&agi->agi_level, 1); |
1236 | xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp, | 1236 | xfs_ialloc_log_agi(args.tp, cur->bc_private.a.agbp, |
1237 | XFS_AGI_ROOT | XFS_AGI_LEVEL); | 1237 | XFS_AGI_ROOT | XFS_AGI_LEVEL); |
1238 | /* | 1238 | /* |
1239 | * At the previous root level there are now two blocks: the old | 1239 | * At the previous root level there are now two blocks: the old |
@@ -1376,7 +1376,7 @@ xfs_inobt_rshift( | |||
1376 | * Set up the right neighbor as "right". | 1376 | * Set up the right neighbor as "right". |
1377 | */ | 1377 | */ |
1378 | if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, | 1378 | if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, |
1379 | cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib), | 1379 | cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), |
1380 | 0, &rbp, XFS_INO_BTREE_REF))) | 1380 | 0, &rbp, XFS_INO_BTREE_REF))) |
1381 | return error; | 1381 | return error; |
1382 | right = XFS_BUF_TO_INOBT_BLOCK(rbp); | 1382 | right = XFS_BUF_TO_INOBT_BLOCK(rbp); |
@@ -1492,7 +1492,7 @@ xfs_inobt_split( | |||
1492 | * Allocate the new block. | 1492 | * Allocate the new block. |
1493 | * If we can't do it, we're toast. Give up. | 1493 | * If we can't do it, we're toast. Give up. |
1494 | */ | 1494 | */ |
1495 | args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, lbno); | 1495 | args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, lbno); |
1496 | args.mod = args.minleft = args.alignment = args.total = args.wasdel = | 1496 | args.mod = args.minleft = args.alignment = args.total = args.wasdel = |
1497 | args.isfl = args.userdata = args.minalignslop = 0; | 1497 | args.isfl = args.userdata = args.minalignslop = 0; |
1498 | args.minlen = args.maxlen = args.prod = 1; | 1498 | args.minlen = args.maxlen = args.prod = 1; |
@@ -1725,7 +1725,7 @@ xfs_inobt_decrement( | |||
1725 | 1725 | ||
1726 | agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); | 1726 | agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); |
1727 | if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, | 1727 | if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, |
1728 | cur->bc_private.i.agno, agbno, 0, &bp, | 1728 | cur->bc_private.a.agno, agbno, 0, &bp, |
1729 | XFS_INO_BTREE_REF))) | 1729 | XFS_INO_BTREE_REF))) |
1730 | return error; | 1730 | return error; |
1731 | lev--; | 1731 | lev--; |
@@ -1897,7 +1897,7 @@ xfs_inobt_increment( | |||
1897 | 1897 | ||
1898 | agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); | 1898 | agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); |
1899 | if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, | 1899 | if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, |
1900 | cur->bc_private.i.agno, agbno, 0, &bp, | 1900 | cur->bc_private.a.agno, agbno, 0, &bp, |
1901 | XFS_INO_BTREE_REF))) | 1901 | XFS_INO_BTREE_REF))) |
1902 | return error; | 1902 | return error; |
1903 | lev--; | 1903 | lev--; |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index b07604b94d9f..e229e9e001c2 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -216,7 +216,14 @@ finish_inode: | |||
216 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | 216 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); |
217 | init_waitqueue_head(&ip->i_ipin_wait); | 217 | init_waitqueue_head(&ip->i_ipin_wait); |
218 | atomic_set(&ip->i_pincount, 0); | 218 | atomic_set(&ip->i_pincount, 0); |
219 | initnsema(&ip->i_flock, 1, "xfsfino"); | 219 | |
220 | /* | ||
221 | * Because we want to use a counting completion, complete | ||
222 | * the flush completion once to allow a single access to | ||
223 | * the flush completion without blocking. | ||
224 | */ | ||
225 | init_completion(&ip->i_flush); | ||
226 | complete(&ip->i_flush); | ||
220 | 227 | ||
221 | if (lock_flags) | 228 | if (lock_flags) |
222 | xfs_ilock(ip, lock_flags); | 229 | xfs_ilock(ip, lock_flags); |
@@ -288,10 +295,17 @@ finish_inode: | |||
288 | *ipp = ip; | 295 | *ipp = ip; |
289 | 296 | ||
290 | /* | 297 | /* |
298 | * Set up the Linux with the Linux inode. | ||
299 | */ | ||
300 | ip->i_vnode = inode; | ||
301 | inode->i_private = ip; | ||
302 | |||
303 | /* | ||
291 | * If we have a real type for an on-disk inode, we can set ops(&unlock) | 304 | * If we have a real type for an on-disk inode, we can set ops(&unlock) |
292 | * now. If it's a new inode being created, xfs_ialloc will handle it. | 305 | * now. If it's a new inode being created, xfs_ialloc will handle it. |
293 | */ | 306 | */ |
294 | xfs_initialize_vnode(mp, inode, ip); | 307 | if (ip->i_d.di_mode != 0) |
308 | xfs_setup_inode(ip); | ||
295 | return 0; | 309 | return 0; |
296 | } | 310 | } |
297 | 311 | ||
@@ -411,10 +425,11 @@ xfs_iput(xfs_inode_t *ip, | |||
411 | * Special iput for brand-new inodes that are still locked | 425 | * Special iput for brand-new inodes that are still locked |
412 | */ | 426 | */ |
413 | void | 427 | void |
414 | xfs_iput_new(xfs_inode_t *ip, | 428 | xfs_iput_new( |
415 | uint lock_flags) | 429 | xfs_inode_t *ip, |
430 | uint lock_flags) | ||
416 | { | 431 | { |
417 | struct inode *inode = ip->i_vnode; | 432 | struct inode *inode = VFS_I(ip); |
418 | 433 | ||
419 | xfs_itrace_entry(ip); | 434 | xfs_itrace_entry(ip); |
420 | 435 | ||
@@ -775,26 +790,3 @@ xfs_isilocked( | |||
775 | } | 790 | } |
776 | #endif | 791 | #endif |
777 | 792 | ||
778 | /* | ||
779 | * The following three routines simply manage the i_flock | ||
780 | * semaphore embedded in the inode. This semaphore synchronizes | ||
781 | * processes attempting to flush the in-core inode back to disk. | ||
782 | */ | ||
783 | void | ||
784 | xfs_iflock(xfs_inode_t *ip) | ||
785 | { | ||
786 | psema(&(ip->i_flock), PINOD|PLTWAIT); | ||
787 | } | ||
788 | |||
789 | int | ||
790 | xfs_iflock_nowait(xfs_inode_t *ip) | ||
791 | { | ||
792 | return (cpsema(&(ip->i_flock))); | ||
793 | } | ||
794 | |||
795 | void | ||
796 | xfs_ifunlock(xfs_inode_t *ip) | ||
797 | { | ||
798 | ASSERT(issemalocked(&(ip->i_flock))); | ||
799 | vsema(&(ip->i_flock)); | ||
800 | } | ||
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bedc66163176..dbd9cef852ec 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -580,8 +580,8 @@ xfs_iformat_extents( | |||
580 | xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); | 580 | xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); |
581 | for (i = 0; i < nex; i++, dp++) { | 581 | for (i = 0; i < nex; i++, dp++) { |
582 | xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); | 582 | xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); |
583 | ep->l0 = be64_to_cpu(get_unaligned(&dp->l0)); | 583 | ep->l0 = get_unaligned_be64(&dp->l0); |
584 | ep->l1 = be64_to_cpu(get_unaligned(&dp->l1)); | 584 | ep->l1 = get_unaligned_be64(&dp->l1); |
585 | } | 585 | } |
586 | XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); | 586 | XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); |
587 | if (whichfork != XFS_DATA_FORK || | 587 | if (whichfork != XFS_DATA_FORK || |
@@ -835,22 +835,22 @@ xfs_iread( | |||
835 | * Do this before xfs_iformat in case it adds entries. | 835 | * Do this before xfs_iformat in case it adds entries. |
836 | */ | 836 | */ |
837 | #ifdef XFS_INODE_TRACE | 837 | #ifdef XFS_INODE_TRACE |
838 | ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_SLEEP); | 838 | ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); |
839 | #endif | 839 | #endif |
840 | #ifdef XFS_BMAP_TRACE | 840 | #ifdef XFS_BMAP_TRACE |
841 | ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP); | 841 | ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); |
842 | #endif | 842 | #endif |
843 | #ifdef XFS_BMBT_TRACE | 843 | #ifdef XFS_BMBT_TRACE |
844 | ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_SLEEP); | 844 | ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); |
845 | #endif | 845 | #endif |
846 | #ifdef XFS_RW_TRACE | 846 | #ifdef XFS_RW_TRACE |
847 | ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_SLEEP); | 847 | ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS); |
848 | #endif | 848 | #endif |
849 | #ifdef XFS_ILOCK_TRACE | 849 | #ifdef XFS_ILOCK_TRACE |
850 | ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_SLEEP); | 850 | ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS); |
851 | #endif | 851 | #endif |
852 | #ifdef XFS_DIR2_TRACE | 852 | #ifdef XFS_DIR2_TRACE |
853 | ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_SLEEP); | 853 | ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); |
854 | #endif | 854 | #endif |
855 | 855 | ||
856 | /* | 856 | /* |
@@ -1046,9 +1046,9 @@ xfs_ialloc( | |||
1046 | { | 1046 | { |
1047 | xfs_ino_t ino; | 1047 | xfs_ino_t ino; |
1048 | xfs_inode_t *ip; | 1048 | xfs_inode_t *ip; |
1049 | bhv_vnode_t *vp; | ||
1050 | uint flags; | 1049 | uint flags; |
1051 | int error; | 1050 | int error; |
1051 | timespec_t tv; | ||
1052 | 1052 | ||
1053 | /* | 1053 | /* |
1054 | * Call the space management code to pick | 1054 | * Call the space management code to pick |
@@ -1077,13 +1077,12 @@ xfs_ialloc( | |||
1077 | } | 1077 | } |
1078 | ASSERT(ip != NULL); | 1078 | ASSERT(ip != NULL); |
1079 | 1079 | ||
1080 | vp = XFS_ITOV(ip); | ||
1081 | ip->i_d.di_mode = (__uint16_t)mode; | 1080 | ip->i_d.di_mode = (__uint16_t)mode; |
1082 | ip->i_d.di_onlink = 0; | 1081 | ip->i_d.di_onlink = 0; |
1083 | ip->i_d.di_nlink = nlink; | 1082 | ip->i_d.di_nlink = nlink; |
1084 | ASSERT(ip->i_d.di_nlink == nlink); | 1083 | ASSERT(ip->i_d.di_nlink == nlink); |
1085 | ip->i_d.di_uid = current_fsuid(cr); | 1084 | ip->i_d.di_uid = current_fsuid(); |
1086 | ip->i_d.di_gid = current_fsgid(cr); | 1085 | ip->i_d.di_gid = current_fsgid(); |
1087 | ip->i_d.di_projid = prid; | 1086 | ip->i_d.di_projid = prid; |
1088 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); | 1087 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); |
1089 | 1088 | ||
@@ -1130,7 +1129,13 @@ xfs_ialloc( | |||
1130 | ip->i_size = 0; | 1129 | ip->i_size = 0; |
1131 | ip->i_d.di_nextents = 0; | 1130 | ip->i_d.di_nextents = 0; |
1132 | ASSERT(ip->i_d.di_nblocks == 0); | 1131 | ASSERT(ip->i_d.di_nblocks == 0); |
1133 | xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD); | 1132 | |
1133 | nanotime(&tv); | ||
1134 | ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; | ||
1135 | ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; | ||
1136 | ip->i_d.di_atime = ip->i_d.di_mtime; | ||
1137 | ip->i_d.di_ctime = ip->i_d.di_mtime; | ||
1138 | |||
1134 | /* | 1139 | /* |
1135 | * di_gen will have been taken care of in xfs_iread. | 1140 | * di_gen will have been taken care of in xfs_iread. |
1136 | */ | 1141 | */ |
@@ -1220,7 +1225,7 @@ xfs_ialloc( | |||
1220 | xfs_trans_log_inode(tp, ip, flags); | 1225 | xfs_trans_log_inode(tp, ip, flags); |
1221 | 1226 | ||
1222 | /* now that we have an i_mode we can setup inode ops and unlock */ | 1227 | /* now that we have an i_mode we can setup inode ops and unlock */ |
1223 | xfs_initialize_vnode(tp->t_mountp, vp, ip); | 1228 | xfs_setup_inode(ip); |
1224 | 1229 | ||
1225 | *ipp = ip; | 1230 | *ipp = ip; |
1226 | return 0; | 1231 | return 0; |
@@ -1399,7 +1404,6 @@ xfs_itruncate_start( | |||
1399 | xfs_fsize_t last_byte; | 1404 | xfs_fsize_t last_byte; |
1400 | xfs_off_t toss_start; | 1405 | xfs_off_t toss_start; |
1401 | xfs_mount_t *mp; | 1406 | xfs_mount_t *mp; |
1402 | bhv_vnode_t *vp; | ||
1403 | int error = 0; | 1407 | int error = 0; |
1404 | 1408 | ||
1405 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | 1409 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); |
@@ -1408,7 +1412,6 @@ xfs_itruncate_start( | |||
1408 | (flags == XFS_ITRUNC_MAYBE)); | 1412 | (flags == XFS_ITRUNC_MAYBE)); |
1409 | 1413 | ||
1410 | mp = ip->i_mount; | 1414 | mp = ip->i_mount; |
1411 | vp = XFS_ITOV(ip); | ||
1412 | 1415 | ||
1413 | /* wait for the completion of any pending DIOs */ | 1416 | /* wait for the completion of any pending DIOs */ |
1414 | if (new_size < ip->i_size) | 1417 | if (new_size < ip->i_size) |
@@ -1457,7 +1460,7 @@ xfs_itruncate_start( | |||
1457 | 1460 | ||
1458 | #ifdef DEBUG | 1461 | #ifdef DEBUG |
1459 | if (new_size == 0) { | 1462 | if (new_size == 0) { |
1460 | ASSERT(VN_CACHED(vp) == 0); | 1463 | ASSERT(VN_CACHED(VFS_I(ip)) == 0); |
1461 | } | 1464 | } |
1462 | #endif | 1465 | #endif |
1463 | return error; | 1466 | return error; |
@@ -2630,7 +2633,6 @@ xfs_idestroy( | |||
2630 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | 2633 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); |
2631 | mrfree(&ip->i_lock); | 2634 | mrfree(&ip->i_lock); |
2632 | mrfree(&ip->i_iolock); | 2635 | mrfree(&ip->i_iolock); |
2633 | freesema(&ip->i_flock); | ||
2634 | 2636 | ||
2635 | #ifdef XFS_INODE_TRACE | 2637 | #ifdef XFS_INODE_TRACE |
2636 | ktrace_free(ip->i_trace); | 2638 | ktrace_free(ip->i_trace); |
@@ -3048,10 +3050,10 @@ cluster_corrupt_out: | |||
3048 | /* | 3050 | /* |
3049 | * xfs_iflush() will write a modified inode's changes out to the | 3051 | * xfs_iflush() will write a modified inode's changes out to the |
3050 | * inode's on disk home. The caller must have the inode lock held | 3052 | * inode's on disk home. The caller must have the inode lock held |
3051 | * in at least shared mode and the inode flush semaphore must be | 3053 | * in at least shared mode and the inode flush completion must be |
3052 | * held as well. The inode lock will still be held upon return from | 3054 | * active as well. The inode lock will still be held upon return from |
3053 | * the call and the caller is free to unlock it. | 3055 | * the call and the caller is free to unlock it. |
3054 | * The inode flush lock will be unlocked when the inode reaches the disk. | 3056 | * The inode flush will be completed when the inode reaches the disk. |
3055 | * The flags indicate how the inode's buffer should be written out. | 3057 | * The flags indicate how the inode's buffer should be written out. |
3056 | */ | 3058 | */ |
3057 | int | 3059 | int |
@@ -3070,7 +3072,7 @@ xfs_iflush( | |||
3070 | XFS_STATS_INC(xs_iflush_count); | 3072 | XFS_STATS_INC(xs_iflush_count); |
3071 | 3073 | ||
3072 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | 3074 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); |
3073 | ASSERT(issemalocked(&(ip->i_flock))); | 3075 | ASSERT(!completion_done(&ip->i_flush)); |
3074 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || | 3076 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
3075 | ip->i_d.di_nextents > ip->i_df.if_ext_max); | 3077 | ip->i_d.di_nextents > ip->i_df.if_ext_max); |
3076 | 3078 | ||
@@ -3233,7 +3235,7 @@ xfs_iflush_int( | |||
3233 | #endif | 3235 | #endif |
3234 | 3236 | ||
3235 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | 3237 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); |
3236 | ASSERT(issemalocked(&(ip->i_flock))); | 3238 | ASSERT(!completion_done(&ip->i_flush)); |
3237 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || | 3239 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
3238 | ip->i_d.di_nextents > ip->i_df.if_ext_max); | 3240 | ip->i_d.di_nextents > ip->i_df.if_ext_max); |
3239 | 3241 | ||
@@ -3465,7 +3467,6 @@ xfs_iflush_all( | |||
3465 | xfs_mount_t *mp) | 3467 | xfs_mount_t *mp) |
3466 | { | 3468 | { |
3467 | xfs_inode_t *ip; | 3469 | xfs_inode_t *ip; |
3468 | bhv_vnode_t *vp; | ||
3469 | 3470 | ||
3470 | again: | 3471 | again: |
3471 | XFS_MOUNT_ILOCK(mp); | 3472 | XFS_MOUNT_ILOCK(mp); |
@@ -3480,14 +3481,13 @@ xfs_iflush_all( | |||
3480 | continue; | 3481 | continue; |
3481 | } | 3482 | } |
3482 | 3483 | ||
3483 | vp = XFS_ITOV_NULL(ip); | 3484 | if (!VFS_I(ip)) { |
3484 | if (!vp) { | ||
3485 | XFS_MOUNT_IUNLOCK(mp); | 3485 | XFS_MOUNT_IUNLOCK(mp); |
3486 | xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC); | 3486 | xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC); |
3487 | goto again; | 3487 | goto again; |
3488 | } | 3488 | } |
3489 | 3489 | ||
3490 | ASSERT(vn_count(vp) == 0); | 3490 | ASSERT(vn_count(VFS_I(ip)) == 0); |
3491 | 3491 | ||
3492 | ip = ip->i_mnext; | 3492 | ip = ip->i_mnext; |
3493 | } while (ip != mp->m_inodes); | 3493 | } while (ip != mp->m_inodes); |
@@ -3707,7 +3707,7 @@ xfs_iext_add_indirect_multi( | |||
3707 | * (all extents past */ | 3707 | * (all extents past */ |
3708 | if (nex2) { | 3708 | if (nex2) { |
3709 | byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); | 3709 | byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); |
3710 | nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_SLEEP); | 3710 | nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS); |
3711 | memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); | 3711 | memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); |
3712 | erp->er_extcount -= nex2; | 3712 | erp->er_extcount -= nex2; |
3713 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); | 3713 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); |
@@ -4007,8 +4007,7 @@ xfs_iext_realloc_direct( | |||
4007 | ifp->if_u1.if_extents = | 4007 | ifp->if_u1.if_extents = |
4008 | kmem_realloc(ifp->if_u1.if_extents, | 4008 | kmem_realloc(ifp->if_u1.if_extents, |
4009 | rnew_size, | 4009 | rnew_size, |
4010 | ifp->if_real_bytes, | 4010 | ifp->if_real_bytes, KM_NOFS); |
4011 | KM_SLEEP); | ||
4012 | } | 4011 | } |
4013 | if (rnew_size > ifp->if_real_bytes) { | 4012 | if (rnew_size > ifp->if_real_bytes) { |
4014 | memset(&ifp->if_u1.if_extents[ifp->if_bytes / | 4013 | memset(&ifp->if_u1.if_extents[ifp->if_bytes / |
@@ -4067,7 +4066,7 @@ xfs_iext_inline_to_direct( | |||
4067 | xfs_ifork_t *ifp, /* inode fork pointer */ | 4066 | xfs_ifork_t *ifp, /* inode fork pointer */ |
4068 | int new_size) /* number of extents in file */ | 4067 | int new_size) /* number of extents in file */ |
4069 | { | 4068 | { |
4070 | ifp->if_u1.if_extents = kmem_alloc(new_size, KM_SLEEP); | 4069 | ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS); |
4071 | memset(ifp->if_u1.if_extents, 0, new_size); | 4070 | memset(ifp->if_u1.if_extents, 0, new_size); |
4072 | if (ifp->if_bytes) { | 4071 | if (ifp->if_bytes) { |
4073 | memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, | 4072 | memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, |
@@ -4099,7 +4098,7 @@ xfs_iext_realloc_indirect( | |||
4099 | } else { | 4098 | } else { |
4100 | ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) | 4099 | ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) |
4101 | kmem_realloc(ifp->if_u1.if_ext_irec, | 4100 | kmem_realloc(ifp->if_u1.if_ext_irec, |
4102 | new_size, size, KM_SLEEP); | 4101 | new_size, size, KM_NOFS); |
4103 | } | 4102 | } |
4104 | } | 4103 | } |
4105 | 4104 | ||
@@ -4119,7 +4118,7 @@ xfs_iext_indirect_to_direct( | |||
4119 | ASSERT(nextents <= XFS_LINEAR_EXTS); | 4118 | ASSERT(nextents <= XFS_LINEAR_EXTS); |
4120 | size = nextents * sizeof(xfs_bmbt_rec_t); | 4119 | size = nextents * sizeof(xfs_bmbt_rec_t); |
4121 | 4120 | ||
4122 | xfs_iext_irec_compact_full(ifp); | 4121 | xfs_iext_irec_compact_pages(ifp); |
4123 | ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); | 4122 | ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); |
4124 | 4123 | ||
4125 | ep = ifp->if_u1.if_ext_irec->er_extbuf; | 4124 | ep = ifp->if_u1.if_ext_irec->er_extbuf; |
@@ -4341,11 +4340,10 @@ xfs_iext_irec_init( | |||
4341 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 4340 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); |
4342 | ASSERT(nextents <= XFS_LINEAR_EXTS); | 4341 | ASSERT(nextents <= XFS_LINEAR_EXTS); |
4343 | 4342 | ||
4344 | erp = (xfs_ext_irec_t *) | 4343 | erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); |
4345 | kmem_alloc(sizeof(xfs_ext_irec_t), KM_SLEEP); | ||
4346 | 4344 | ||
4347 | if (nextents == 0) { | 4345 | if (nextents == 0) { |
4348 | ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); | 4346 | ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); |
4349 | } else if (!ifp->if_real_bytes) { | 4347 | } else if (!ifp->if_real_bytes) { |
4350 | xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); | 4348 | xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); |
4351 | } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { | 4349 | } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { |
@@ -4393,7 +4391,7 @@ xfs_iext_irec_new( | |||
4393 | 4391 | ||
4394 | /* Initialize new extent record */ | 4392 | /* Initialize new extent record */ |
4395 | erp = ifp->if_u1.if_ext_irec; | 4393 | erp = ifp->if_u1.if_ext_irec; |
4396 | erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); | 4394 | erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); |
4397 | ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; | 4395 | ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; |
4398 | memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); | 4396 | memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); |
4399 | erp[erp_idx].er_extcount = 0; | 4397 | erp[erp_idx].er_extcount = 0; |
@@ -4451,8 +4449,7 @@ xfs_iext_irec_remove( | |||
4451 | * compaction policy is as follows: | 4449 | * compaction policy is as follows: |
4452 | * | 4450 | * |
4453 | * Full Compaction: Extents fit into a single page (or inline buffer) | 4451 | * Full Compaction: Extents fit into a single page (or inline buffer) |
4454 | * Full Compaction: Extents occupy less than 10% of allocated space | 4452 | * Partial Compaction: Extents occupy less than 50% of allocated space |
4455 | * Partial Compaction: Extents occupy > 10% and < 50% of allocated space | ||
4456 | * No Compaction: Extents occupy at least 50% of allocated space | 4453 | * No Compaction: Extents occupy at least 50% of allocated space |
4457 | */ | 4454 | */ |
4458 | void | 4455 | void |
@@ -4473,8 +4470,6 @@ xfs_iext_irec_compact( | |||
4473 | xfs_iext_direct_to_inline(ifp, nextents); | 4470 | xfs_iext_direct_to_inline(ifp, nextents); |
4474 | } else if (nextents <= XFS_LINEAR_EXTS) { | 4471 | } else if (nextents <= XFS_LINEAR_EXTS) { |
4475 | xfs_iext_indirect_to_direct(ifp); | 4472 | xfs_iext_indirect_to_direct(ifp); |
4476 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) { | ||
4477 | xfs_iext_irec_compact_full(ifp); | ||
4478 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { | 4473 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { |
4479 | xfs_iext_irec_compact_pages(ifp); | 4474 | xfs_iext_irec_compact_pages(ifp); |
4480 | } | 4475 | } |
@@ -4498,7 +4493,7 @@ xfs_iext_irec_compact_pages( | |||
4498 | erp_next = erp + 1; | 4493 | erp_next = erp + 1; |
4499 | if (erp_next->er_extcount <= | 4494 | if (erp_next->er_extcount <= |
4500 | (XFS_LINEAR_EXTS - erp->er_extcount)) { | 4495 | (XFS_LINEAR_EXTS - erp->er_extcount)) { |
4501 | memmove(&erp->er_extbuf[erp->er_extcount], | 4496 | memcpy(&erp->er_extbuf[erp->er_extcount], |
4502 | erp_next->er_extbuf, erp_next->er_extcount * | 4497 | erp_next->er_extbuf, erp_next->er_extcount * |
4503 | sizeof(xfs_bmbt_rec_t)); | 4498 | sizeof(xfs_bmbt_rec_t)); |
4504 | erp->er_extcount += erp_next->er_extcount; | 4499 | erp->er_extcount += erp_next->er_extcount; |
@@ -4518,91 +4513,6 @@ xfs_iext_irec_compact_pages( | |||
4518 | } | 4513 | } |
4519 | 4514 | ||
4520 | /* | 4515 | /* |
4521 | * Fully compact the extent records managed by the indirection array. | ||
4522 | */ | ||
4523 | void | ||
4524 | xfs_iext_irec_compact_full( | ||
4525 | xfs_ifork_t *ifp) /* inode fork pointer */ | ||
4526 | { | ||
4527 | xfs_bmbt_rec_host_t *ep, *ep_next; /* extent record pointers */ | ||
4528 | xfs_ext_irec_t *erp, *erp_next; /* extent irec pointers */ | ||
4529 | int erp_idx = 0; /* extent irec index */ | ||
4530 | int ext_avail; /* empty entries in ex list */ | ||
4531 | int ext_diff; /* number of exts to add */ | ||
4532 | int nlists; /* number of irec's (ex lists) */ | ||
4533 | |||
4534 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
4535 | |||
4536 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
4537 | erp = ifp->if_u1.if_ext_irec; | ||
4538 | ep = &erp->er_extbuf[erp->er_extcount]; | ||
4539 | erp_next = erp + 1; | ||
4540 | ep_next = erp_next->er_extbuf; | ||
4541 | |||
4542 | while (erp_idx < nlists - 1) { | ||
4543 | /* | ||
4544 | * Check how many extent records are available in this irec. | ||
4545 | * If there is none skip the whole exercise. | ||
4546 | */ | ||
4547 | ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; | ||
4548 | if (ext_avail) { | ||
4549 | |||
4550 | /* | ||
4551 | * Copy over as many as possible extent records into | ||
4552 | * the previous page. | ||
4553 | */ | ||
4554 | ext_diff = MIN(ext_avail, erp_next->er_extcount); | ||
4555 | memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t)); | ||
4556 | erp->er_extcount += ext_diff; | ||
4557 | erp_next->er_extcount -= ext_diff; | ||
4558 | |||
4559 | /* | ||
4560 | * If the next irec is empty now we can simply | ||
4561 | * remove it. | ||
4562 | */ | ||
4563 | if (erp_next->er_extcount == 0) { | ||
4564 | /* | ||
4565 | * Free page before removing extent record | ||
4566 | * so er_extoffs don't get modified in | ||
4567 | * xfs_iext_irec_remove. | ||
4568 | */ | ||
4569 | kmem_free(erp_next->er_extbuf); | ||
4570 | erp_next->er_extbuf = NULL; | ||
4571 | xfs_iext_irec_remove(ifp, erp_idx + 1); | ||
4572 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
4573 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
4574 | |||
4575 | /* | ||
4576 | * If the next irec is not empty move up the content | ||
4577 | * that has not been copied to the previous page to | ||
4578 | * the beggining of this one. | ||
4579 | */ | ||
4580 | } else { | ||
4581 | memmove(erp_next->er_extbuf, &ep_next[ext_diff], | ||
4582 | erp_next->er_extcount * | ||
4583 | sizeof(xfs_bmbt_rec_t)); | ||
4584 | ep_next = erp_next->er_extbuf; | ||
4585 | memset(&ep_next[erp_next->er_extcount], 0, | ||
4586 | (XFS_LINEAR_EXTS - | ||
4587 | erp_next->er_extcount) * | ||
4588 | sizeof(xfs_bmbt_rec_t)); | ||
4589 | } | ||
4590 | } | ||
4591 | |||
4592 | if (erp->er_extcount == XFS_LINEAR_EXTS) { | ||
4593 | erp_idx++; | ||
4594 | if (erp_idx < nlists) | ||
4595 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
4596 | else | ||
4597 | break; | ||
4598 | } | ||
4599 | ep = &erp->er_extbuf[erp->er_extcount]; | ||
4600 | erp_next = erp + 1; | ||
4601 | ep_next = erp_next->er_extbuf; | ||
4602 | } | ||
4603 | } | ||
4604 | |||
4605 | /* | ||
4606 | * This is called to update the er_extoff field in the indirection | 4516 | * This is called to update the er_extoff field in the indirection |
4607 | * array when extents have been added or removed from one of the | 4517 | * array when extents have been added or removed from one of the |
4608 | * extent lists. erp_idx contains the irec index to begin updating | 4518 | * extent lists. erp_idx contains the irec index to begin updating |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 17a04b6321ed..1420c49674d7 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -87,8 +87,7 @@ typedef struct xfs_ifork { | |||
87 | * Flags for xfs_ichgtime(). | 87 | * Flags for xfs_ichgtime(). |
88 | */ | 88 | */ |
89 | #define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ | 89 | #define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ |
90 | #define XFS_ICHGTIME_ACC 0x2 /* data fork access timestamp */ | 90 | #define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ |
91 | #define XFS_ICHGTIME_CHG 0x4 /* inode field change timestamp */ | ||
92 | 91 | ||
93 | /* | 92 | /* |
94 | * Per-fork incore inode flags. | 93 | * Per-fork incore inode flags. |
@@ -204,7 +203,7 @@ typedef struct xfs_inode { | |||
204 | struct xfs_inode *i_mprev; /* ptr to prev inode */ | 203 | struct xfs_inode *i_mprev; /* ptr to prev inode */ |
205 | struct xfs_mount *i_mount; /* fs mount struct ptr */ | 204 | struct xfs_mount *i_mount; /* fs mount struct ptr */ |
206 | struct list_head i_reclaim; /* reclaim list */ | 205 | struct list_head i_reclaim; /* reclaim list */ |
207 | bhv_vnode_t *i_vnode; /* vnode backpointer */ | 206 | struct inode *i_vnode; /* vnode backpointer */ |
208 | struct xfs_dquot *i_udquot; /* user dquot */ | 207 | struct xfs_dquot *i_udquot; /* user dquot */ |
209 | struct xfs_dquot *i_gdquot; /* group dquot */ | 208 | struct xfs_dquot *i_gdquot; /* group dquot */ |
210 | 209 | ||
@@ -223,7 +222,7 @@ typedef struct xfs_inode { | |||
223 | struct xfs_inode_log_item *i_itemp; /* logging information */ | 222 | struct xfs_inode_log_item *i_itemp; /* logging information */ |
224 | mrlock_t i_lock; /* inode lock */ | 223 | mrlock_t i_lock; /* inode lock */ |
225 | mrlock_t i_iolock; /* inode IO lock */ | 224 | mrlock_t i_iolock; /* inode IO lock */ |
226 | sema_t i_flock; /* inode flush lock */ | 225 | struct completion i_flush; /* inode flush completion q */ |
227 | atomic_t i_pincount; /* inode pin count */ | 226 | atomic_t i_pincount; /* inode pin count */ |
228 | wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ | 227 | wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ |
229 | spinlock_t i_flags_lock; /* inode i_flags lock */ | 228 | spinlock_t i_flags_lock; /* inode i_flags lock */ |
@@ -263,6 +262,18 @@ typedef struct xfs_inode { | |||
263 | #define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \ | 262 | #define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \ |
264 | (ip)->i_size : (ip)->i_d.di_size; | 263 | (ip)->i_size : (ip)->i_d.di_size; |
265 | 264 | ||
265 | /* Convert from vfs inode to xfs inode */ | ||
266 | static inline struct xfs_inode *XFS_I(struct inode *inode) | ||
267 | { | ||
268 | return (struct xfs_inode *)inode->i_private; | ||
269 | } | ||
270 | |||
271 | /* convert from xfs inode to vfs inode */ | ||
272 | static inline struct inode *VFS_I(struct xfs_inode *ip) | ||
273 | { | ||
274 | return (struct inode *)ip->i_vnode; | ||
275 | } | ||
276 | |||
266 | /* | 277 | /* |
267 | * i_flags helper functions | 278 | * i_flags helper functions |
268 | */ | 279 | */ |
@@ -439,9 +450,6 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) | |||
439 | #define XFS_ITRUNC_DEFINITE 0x1 | 450 | #define XFS_ITRUNC_DEFINITE 0x1 |
440 | #define XFS_ITRUNC_MAYBE 0x2 | 451 | #define XFS_ITRUNC_MAYBE 0x2 |
441 | 452 | ||
442 | #define XFS_ITOV(ip) ((ip)->i_vnode) | ||
443 | #define XFS_ITOV_NULL(ip) ((ip)->i_vnode) | ||
444 | |||
445 | /* | 453 | /* |
446 | * For multiple groups support: if S_ISGID bit is set in the parent | 454 | * For multiple groups support: if S_ISGID bit is set in the parent |
447 | * directory, group of new file is set to that of the parent, and | 455 | * directory, group of new file is set to that of the parent, and |
@@ -473,11 +481,8 @@ int xfs_ilock_nowait(xfs_inode_t *, uint); | |||
473 | void xfs_iunlock(xfs_inode_t *, uint); | 481 | void xfs_iunlock(xfs_inode_t *, uint); |
474 | void xfs_ilock_demote(xfs_inode_t *, uint); | 482 | void xfs_ilock_demote(xfs_inode_t *, uint); |
475 | int xfs_isilocked(xfs_inode_t *, uint); | 483 | int xfs_isilocked(xfs_inode_t *, uint); |
476 | void xfs_iflock(xfs_inode_t *); | ||
477 | int xfs_iflock_nowait(xfs_inode_t *); | ||
478 | uint xfs_ilock_map_shared(xfs_inode_t *); | 484 | uint xfs_ilock_map_shared(xfs_inode_t *); |
479 | void xfs_iunlock_map_shared(xfs_inode_t *, uint); | 485 | void xfs_iunlock_map_shared(xfs_inode_t *, uint); |
480 | void xfs_ifunlock(xfs_inode_t *); | ||
481 | void xfs_ireclaim(xfs_inode_t *); | 486 | void xfs_ireclaim(xfs_inode_t *); |
482 | int xfs_finish_reclaim(xfs_inode_t *, int, int); | 487 | int xfs_finish_reclaim(xfs_inode_t *, int, int); |
483 | int xfs_finish_reclaim_all(struct xfs_mount *, int); | 488 | int xfs_finish_reclaim_all(struct xfs_mount *, int); |
@@ -522,6 +527,7 @@ void xfs_iflush_all(struct xfs_mount *); | |||
522 | void xfs_ichgtime(xfs_inode_t *, int); | 527 | void xfs_ichgtime(xfs_inode_t *, int); |
523 | xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); | 528 | xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); |
524 | void xfs_lock_inodes(xfs_inode_t **, int, uint); | 529 | void xfs_lock_inodes(xfs_inode_t **, int, uint); |
530 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); | ||
525 | 531 | ||
526 | void xfs_synchronize_atime(xfs_inode_t *); | 532 | void xfs_synchronize_atime(xfs_inode_t *); |
527 | void xfs_mark_inode_dirty_sync(xfs_inode_t *); | 533 | void xfs_mark_inode_dirty_sync(xfs_inode_t *); |
@@ -570,6 +576,26 @@ extern struct kmem_zone *xfs_ifork_zone; | |||
570 | extern struct kmem_zone *xfs_inode_zone; | 576 | extern struct kmem_zone *xfs_inode_zone; |
571 | extern struct kmem_zone *xfs_ili_zone; | 577 | extern struct kmem_zone *xfs_ili_zone; |
572 | 578 | ||
579 | /* | ||
580 | * Manage the i_flush queue embedded in the inode. This completion | ||
581 | * queue synchronizes processes attempting to flush the in-core | ||
582 | * inode back to disk. | ||
583 | */ | ||
584 | static inline void xfs_iflock(xfs_inode_t *ip) | ||
585 | { | ||
586 | wait_for_completion(&ip->i_flush); | ||
587 | } | ||
588 | |||
589 | static inline int xfs_iflock_nowait(xfs_inode_t *ip) | ||
590 | { | ||
591 | return try_wait_for_completion(&ip->i_flush); | ||
592 | } | ||
593 | |||
594 | static inline void xfs_ifunlock(xfs_inode_t *ip) | ||
595 | { | ||
596 | complete(&ip->i_flush); | ||
597 | } | ||
598 | |||
573 | #endif /* __KERNEL__ */ | 599 | #endif /* __KERNEL__ */ |
574 | 600 | ||
575 | #endif /* __XFS_INODE_H__ */ | 601 | #endif /* __XFS_INODE_H__ */ |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 0eee08a32c26..97c7452e2620 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -779,11 +779,10 @@ xfs_inode_item_pushbuf( | |||
779 | ASSERT(iip->ili_push_owner == current_pid()); | 779 | ASSERT(iip->ili_push_owner == current_pid()); |
780 | 780 | ||
781 | /* | 781 | /* |
782 | * If flushlock isn't locked anymore, chances are that the | 782 | * If a flush is not in progress anymore, chances are that the |
783 | * inode flush completed and the inode was taken off the AIL. | 783 | * inode was taken off the AIL. So, just get out. |
784 | * So, just get out. | ||
785 | */ | 784 | */ |
786 | if (!issemalocked(&(ip->i_flock)) || | 785 | if (completion_done(&ip->i_flush) || |
787 | ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { | 786 | ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { |
788 | iip->ili_pushbuf_flag = 0; | 787 | iip->ili_pushbuf_flag = 0; |
789 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 788 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
@@ -805,7 +804,7 @@ xfs_inode_item_pushbuf( | |||
805 | * If not, we can flush it async. | 804 | * If not, we can flush it async. |
806 | */ | 805 | */ |
807 | dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && | 806 | dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && |
808 | issemalocked(&(ip->i_flock))); | 807 | !completion_done(&ip->i_flush)); |
809 | iip->ili_pushbuf_flag = 0; | 808 | iip->ili_pushbuf_flag = 0; |
810 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 809 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
811 | xfs_buftrace("INODE ITEM PUSH", bp); | 810 | xfs_buftrace("INODE ITEM PUSH", bp); |
@@ -858,7 +857,7 @@ xfs_inode_item_push( | |||
858 | ip = iip->ili_inode; | 857 | ip = iip->ili_inode; |
859 | 858 | ||
860 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); | 859 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); |
861 | ASSERT(issemalocked(&(ip->i_flock))); | 860 | ASSERT(!completion_done(&ip->i_flush)); |
862 | /* | 861 | /* |
863 | * Since we were able to lock the inode's flush lock and | 862 | * Since we were able to lock the inode's flush lock and |
864 | * we found it on the AIL, the inode must be dirty. This | 863 | * we found it on the AIL, the inode must be dirty. This |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 9a3ef9dcaeb9..cf6754a3c5b3 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -59,7 +59,6 @@ xfs_bulkstat_one_iget( | |||
59 | { | 59 | { |
60 | xfs_icdinode_t *dic; /* dinode core info pointer */ | 60 | xfs_icdinode_t *dic; /* dinode core info pointer */ |
61 | xfs_inode_t *ip; /* incore inode pointer */ | 61 | xfs_inode_t *ip; /* incore inode pointer */ |
62 | bhv_vnode_t *vp; | ||
63 | int error; | 62 | int error; |
64 | 63 | ||
65 | error = xfs_iget(mp, NULL, ino, | 64 | error = xfs_iget(mp, NULL, ino, |
@@ -72,7 +71,6 @@ xfs_bulkstat_one_iget( | |||
72 | ASSERT(ip != NULL); | 71 | ASSERT(ip != NULL); |
73 | ASSERT(ip->i_blkno != (xfs_daddr_t)0); | 72 | ASSERT(ip->i_blkno != (xfs_daddr_t)0); |
74 | 73 | ||
75 | vp = XFS_ITOV(ip); | ||
76 | dic = &ip->i_d; | 74 | dic = &ip->i_d; |
77 | 75 | ||
78 | /* xfs_iget returns the following without needing | 76 | /* xfs_iget returns the following without needing |
@@ -85,7 +83,7 @@ xfs_bulkstat_one_iget( | |||
85 | buf->bs_uid = dic->di_uid; | 83 | buf->bs_uid = dic->di_uid; |
86 | buf->bs_gid = dic->di_gid; | 84 | buf->bs_gid = dic->di_gid; |
87 | buf->bs_size = dic->di_size; | 85 | buf->bs_size = dic->di_size; |
88 | vn_atime_to_bstime(vp, &buf->bs_atime); | 86 | vn_atime_to_bstime(VFS_I(ip), &buf->bs_atime); |
89 | buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; | 87 | buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; |
90 | buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; | 88 | buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; |
91 | buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; | 89 | buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 91b00a5686cd..0b02c6443551 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -124,16 +124,27 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, | |||
124 | STATIC int xlog_iclogs_empty(xlog_t *log); | 124 | STATIC int xlog_iclogs_empty(xlog_t *log); |
125 | 125 | ||
126 | #if defined(XFS_LOG_TRACE) | 126 | #if defined(XFS_LOG_TRACE) |
127 | |||
128 | #define XLOG_TRACE_LOGGRANT_SIZE 2048 | ||
129 | #define XLOG_TRACE_ICLOG_SIZE 256 | ||
130 | |||
131 | void | ||
132 | xlog_trace_loggrant_alloc(xlog_t *log) | ||
133 | { | ||
134 | log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS); | ||
135 | } | ||
136 | |||
137 | void | ||
138 | xlog_trace_loggrant_dealloc(xlog_t *log) | ||
139 | { | ||
140 | ktrace_free(log->l_grant_trace); | ||
141 | } | ||
142 | |||
127 | void | 143 | void |
128 | xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) | 144 | xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) |
129 | { | 145 | { |
130 | unsigned long cnts; | 146 | unsigned long cnts; |
131 | 147 | ||
132 | if (!log->l_grant_trace) { | ||
133 | log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP); | ||
134 | if (!log->l_grant_trace) | ||
135 | return; | ||
136 | } | ||
137 | /* ticket counts are 1 byte each */ | 148 | /* ticket counts are 1 byte each */ |
138 | cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; | 149 | cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; |
139 | 150 | ||
@@ -157,10 +168,20 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) | |||
157 | } | 168 | } |
158 | 169 | ||
159 | void | 170 | void |
171 | xlog_trace_iclog_alloc(xlog_in_core_t *iclog) | ||
172 | { | ||
173 | iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS); | ||
174 | } | ||
175 | |||
176 | void | ||
177 | xlog_trace_iclog_dealloc(xlog_in_core_t *iclog) | ||
178 | { | ||
179 | ktrace_free(iclog->ic_trace); | ||
180 | } | ||
181 | |||
182 | void | ||
160 | xlog_trace_iclog(xlog_in_core_t *iclog, uint state) | 183 | xlog_trace_iclog(xlog_in_core_t *iclog, uint state) |
161 | { | 184 | { |
162 | if (!iclog->ic_trace) | ||
163 | iclog->ic_trace = ktrace_alloc(256, KM_SLEEP); | ||
164 | ktrace_enter(iclog->ic_trace, | 185 | ktrace_enter(iclog->ic_trace, |
165 | (void *)((unsigned long)state), | 186 | (void *)((unsigned long)state), |
166 | (void *)((unsigned long)current_pid()), | 187 | (void *)((unsigned long)current_pid()), |
@@ -170,8 +191,15 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state) | |||
170 | (void *)NULL, (void *)NULL); | 191 | (void *)NULL, (void *)NULL); |
171 | } | 192 | } |
172 | #else | 193 | #else |
194 | |||
195 | #define xlog_trace_loggrant_alloc(log) | ||
196 | #define xlog_trace_loggrant_dealloc(log) | ||
173 | #define xlog_trace_loggrant(log,tic,string) | 197 | #define xlog_trace_loggrant(log,tic,string) |
198 | |||
199 | #define xlog_trace_iclog_alloc(iclog) | ||
200 | #define xlog_trace_iclog_dealloc(iclog) | ||
174 | #define xlog_trace_iclog(iclog,state) | 201 | #define xlog_trace_iclog(iclog,state) |
202 | |||
175 | #endif /* XFS_LOG_TRACE */ | 203 | #endif /* XFS_LOG_TRACE */ |
176 | 204 | ||
177 | 205 | ||
@@ -336,15 +364,12 @@ xfs_log_done(xfs_mount_t *mp, | |||
336 | } else { | 364 | } else { |
337 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); | 365 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); |
338 | xlog_regrant_reserve_log_space(log, ticket); | 366 | xlog_regrant_reserve_log_space(log, ticket); |
339 | } | 367 | /* If this ticket was a permanent reservation and we aren't |
340 | 368 | * trying to release it, reset the inited flags; so next time | |
341 | /* If this ticket was a permanent reservation and we aren't | 369 | * we write, a start record will be written out. |
342 | * trying to release it, reset the inited flags; so next time | 370 | */ |
343 | * we write, a start record will be written out. | ||
344 | */ | ||
345 | if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) && | ||
346 | (flags & XFS_LOG_REL_PERM_RESERV) == 0) | ||
347 | ticket->t_flags |= XLOG_TIC_INITED; | 371 | ticket->t_flags |= XLOG_TIC_INITED; |
372 | } | ||
348 | 373 | ||
349 | return lsn; | 374 | return lsn; |
350 | } /* xfs_log_done */ | 375 | } /* xfs_log_done */ |
@@ -357,11 +382,11 @@ xfs_log_done(xfs_mount_t *mp, | |||
357 | * Asynchronous forces are implemented by setting the WANT_SYNC | 382 | * Asynchronous forces are implemented by setting the WANT_SYNC |
358 | * bit in the appropriate in-core log and then returning. | 383 | * bit in the appropriate in-core log and then returning. |
359 | * | 384 | * |
360 | * Synchronous forces are implemented with a semaphore. All callers | 385 | * Synchronous forces are implemented with a signal variable. All callers |
361 | * to force a given lsn to disk will wait on a semaphore attached to the | 386 | * to force a given lsn to disk will wait on a the sv attached to the |
362 | * specific in-core log. When given in-core log finally completes its | 387 | * specific in-core log. When given in-core log finally completes its |
363 | * write to disk, that thread will wake up all threads waiting on the | 388 | * write to disk, that thread will wake up all threads waiting on the |
364 | * semaphore. | 389 | * sv. |
365 | */ | 390 | */ |
366 | int | 391 | int |
367 | _xfs_log_force( | 392 | _xfs_log_force( |
@@ -588,12 +613,12 @@ error: | |||
588 | * mp - ubiquitous xfs mount point structure | 613 | * mp - ubiquitous xfs mount point structure |
589 | */ | 614 | */ |
590 | int | 615 | int |
591 | xfs_log_mount_finish(xfs_mount_t *mp, int mfsi_flags) | 616 | xfs_log_mount_finish(xfs_mount_t *mp) |
592 | { | 617 | { |
593 | int error; | 618 | int error; |
594 | 619 | ||
595 | if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) | 620 | if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) |
596 | error = xlog_recover_finish(mp->m_log, mfsi_flags); | 621 | error = xlog_recover_finish(mp->m_log); |
597 | else { | 622 | else { |
598 | error = 0; | 623 | error = 0; |
599 | ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); | 624 | ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); |
@@ -707,7 +732,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
707 | if (!(iclog->ic_state == XLOG_STATE_ACTIVE || | 732 | if (!(iclog->ic_state == XLOG_STATE_ACTIVE || |
708 | iclog->ic_state == XLOG_STATE_DIRTY)) { | 733 | iclog->ic_state == XLOG_STATE_DIRTY)) { |
709 | if (!XLOG_FORCED_SHUTDOWN(log)) { | 734 | if (!XLOG_FORCED_SHUTDOWN(log)) { |
710 | sv_wait(&iclog->ic_forcesema, PMEM, | 735 | sv_wait(&iclog->ic_force_wait, PMEM, |
711 | &log->l_icloglock, s); | 736 | &log->l_icloglock, s); |
712 | } else { | 737 | } else { |
713 | spin_unlock(&log->l_icloglock); | 738 | spin_unlock(&log->l_icloglock); |
@@ -748,7 +773,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
748 | || iclog->ic_state == XLOG_STATE_DIRTY | 773 | || iclog->ic_state == XLOG_STATE_DIRTY |
749 | || iclog->ic_state == XLOG_STATE_IOERROR) ) { | 774 | || iclog->ic_state == XLOG_STATE_IOERROR) ) { |
750 | 775 | ||
751 | sv_wait(&iclog->ic_forcesema, PMEM, | 776 | sv_wait(&iclog->ic_force_wait, PMEM, |
752 | &log->l_icloglock, s); | 777 | &log->l_icloglock, s); |
753 | } else { | 778 | } else { |
754 | spin_unlock(&log->l_icloglock); | 779 | spin_unlock(&log->l_icloglock); |
@@ -838,7 +863,7 @@ xfs_log_move_tail(xfs_mount_t *mp, | |||
838 | break; | 863 | break; |
839 | tail_lsn = 0; | 864 | tail_lsn = 0; |
840 | free_bytes -= tic->t_unit_res; | 865 | free_bytes -= tic->t_unit_res; |
841 | sv_signal(&tic->t_sema); | 866 | sv_signal(&tic->t_wait); |
842 | tic = tic->t_next; | 867 | tic = tic->t_next; |
843 | } while (tic != log->l_write_headq); | 868 | } while (tic != log->l_write_headq); |
844 | } | 869 | } |
@@ -859,7 +884,7 @@ xfs_log_move_tail(xfs_mount_t *mp, | |||
859 | break; | 884 | break; |
860 | tail_lsn = 0; | 885 | tail_lsn = 0; |
861 | free_bytes -= need_bytes; | 886 | free_bytes -= need_bytes; |
862 | sv_signal(&tic->t_sema); | 887 | sv_signal(&tic->t_wait); |
863 | tic = tic->t_next; | 888 | tic = tic->t_next; |
864 | } while (tic != log->l_reserve_headq); | 889 | } while (tic != log->l_reserve_headq); |
865 | } | 890 | } |
@@ -1008,11 +1033,12 @@ xlog_iodone(xfs_buf_t *bp) | |||
1008 | l = iclog->ic_log; | 1033 | l = iclog->ic_log; |
1009 | 1034 | ||
1010 | /* | 1035 | /* |
1011 | * If the ordered flag has been removed by a lower | 1036 | * If the _XFS_BARRIER_FAILED flag was set by a lower |
1012 | * layer, it means the underlyin device no longer supports | 1037 | * layer, it means the underlying device no longer supports |
1013 | * barrier I/O. Warn loudly and turn off barriers. | 1038 | * barrier I/O. Warn loudly and turn off barriers. |
1014 | */ | 1039 | */ |
1015 | if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ORDERED(bp)) { | 1040 | if (bp->b_flags & _XFS_BARRIER_FAILED) { |
1041 | bp->b_flags &= ~_XFS_BARRIER_FAILED; | ||
1016 | l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; | 1042 | l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; |
1017 | xfs_fs_cmn_err(CE_WARN, l->l_mp, | 1043 | xfs_fs_cmn_err(CE_WARN, l->l_mp, |
1018 | "xlog_iodone: Barriers are no longer supported" | 1044 | "xlog_iodone: Barriers are no longer supported" |
@@ -1234,6 +1260,7 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1234 | spin_lock_init(&log->l_grant_lock); | 1260 | spin_lock_init(&log->l_grant_lock); |
1235 | sv_init(&log->l_flush_wait, 0, "flush_wait"); | 1261 | sv_init(&log->l_flush_wait, 0, "flush_wait"); |
1236 | 1262 | ||
1263 | xlog_trace_loggrant_alloc(log); | ||
1237 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ | 1264 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ |
1238 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); | 1265 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); |
1239 | 1266 | ||
@@ -1285,8 +1312,10 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1285 | 1312 | ||
1286 | ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); | 1313 | ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); |
1287 | ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); | 1314 | ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); |
1288 | sv_init(&iclog->ic_forcesema, SV_DEFAULT, "iclog-force"); | 1315 | sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); |
1289 | sv_init(&iclog->ic_writesema, SV_DEFAULT, "iclog-write"); | 1316 | sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); |
1317 | |||
1318 | xlog_trace_iclog_alloc(iclog); | ||
1290 | 1319 | ||
1291 | iclogp = &iclog->ic_next; | 1320 | iclogp = &iclog->ic_next; |
1292 | } | 1321 | } |
@@ -1565,14 +1594,10 @@ xlog_dealloc_log(xlog_t *log) | |||
1565 | 1594 | ||
1566 | iclog = log->l_iclog; | 1595 | iclog = log->l_iclog; |
1567 | for (i=0; i<log->l_iclog_bufs; i++) { | 1596 | for (i=0; i<log->l_iclog_bufs; i++) { |
1568 | sv_destroy(&iclog->ic_forcesema); | 1597 | sv_destroy(&iclog->ic_force_wait); |
1569 | sv_destroy(&iclog->ic_writesema); | 1598 | sv_destroy(&iclog->ic_write_wait); |
1570 | xfs_buf_free(iclog->ic_bp); | 1599 | xfs_buf_free(iclog->ic_bp); |
1571 | #ifdef XFS_LOG_TRACE | 1600 | xlog_trace_iclog_dealloc(iclog); |
1572 | if (iclog->ic_trace != NULL) { | ||
1573 | ktrace_free(iclog->ic_trace); | ||
1574 | } | ||
1575 | #endif | ||
1576 | next_iclog = iclog->ic_next; | 1601 | next_iclog = iclog->ic_next; |
1577 | kmem_free(iclog); | 1602 | kmem_free(iclog); |
1578 | iclog = next_iclog; | 1603 | iclog = next_iclog; |
@@ -1581,14 +1606,7 @@ xlog_dealloc_log(xlog_t *log) | |||
1581 | spinlock_destroy(&log->l_grant_lock); | 1606 | spinlock_destroy(&log->l_grant_lock); |
1582 | 1607 | ||
1583 | xfs_buf_free(log->l_xbuf); | 1608 | xfs_buf_free(log->l_xbuf); |
1584 | #ifdef XFS_LOG_TRACE | 1609 | xlog_trace_loggrant_dealloc(log); |
1585 | if (log->l_trace != NULL) { | ||
1586 | ktrace_free(log->l_trace); | ||
1587 | } | ||
1588 | if (log->l_grant_trace != NULL) { | ||
1589 | ktrace_free(log->l_grant_trace); | ||
1590 | } | ||
1591 | #endif | ||
1592 | log->l_mp->m_log = NULL; | 1610 | log->l_mp->m_log = NULL; |
1593 | kmem_free(log); | 1611 | kmem_free(log); |
1594 | } /* xlog_dealloc_log */ | 1612 | } /* xlog_dealloc_log */ |
@@ -1976,7 +1994,7 @@ xlog_write(xfs_mount_t * mp, | |||
1976 | /* Clean iclogs starting from the head. This ordering must be | 1994 | /* Clean iclogs starting from the head. This ordering must be |
1977 | * maintained, so an iclog doesn't become ACTIVE beyond one that | 1995 | * maintained, so an iclog doesn't become ACTIVE beyond one that |
1978 | * is SYNCING. This is also required to maintain the notion that we use | 1996 | * is SYNCING. This is also required to maintain the notion that we use |
1979 | * a counting semaphore to hold off would be writers to the log when every | 1997 | * a ordered wait queue to hold off would be writers to the log when every |
1980 | * iclog is trying to sync to disk. | 1998 | * iclog is trying to sync to disk. |
1981 | * | 1999 | * |
1982 | * State Change: DIRTY -> ACTIVE | 2000 | * State Change: DIRTY -> ACTIVE |
@@ -2240,7 +2258,7 @@ xlog_state_do_callback( | |||
2240 | xlog_state_clean_log(log); | 2258 | xlog_state_clean_log(log); |
2241 | 2259 | ||
2242 | /* wake up threads waiting in xfs_log_force() */ | 2260 | /* wake up threads waiting in xfs_log_force() */ |
2243 | sv_broadcast(&iclog->ic_forcesema); | 2261 | sv_broadcast(&iclog->ic_force_wait); |
2244 | 2262 | ||
2245 | iclog = iclog->ic_next; | 2263 | iclog = iclog->ic_next; |
2246 | } while (first_iclog != iclog); | 2264 | } while (first_iclog != iclog); |
@@ -2302,8 +2320,7 @@ xlog_state_do_callback( | |||
2302 | * the second completion goes through. | 2320 | * the second completion goes through. |
2303 | * | 2321 | * |
2304 | * Callbacks could take time, so they are done outside the scope of the | 2322 | * Callbacks could take time, so they are done outside the scope of the |
2305 | * global state machine log lock. Assume that the calls to cvsema won't | 2323 | * global state machine log lock. |
2306 | * take a long time. At least we know it won't sleep. | ||
2307 | */ | 2324 | */ |
2308 | STATIC void | 2325 | STATIC void |
2309 | xlog_state_done_syncing( | 2326 | xlog_state_done_syncing( |
@@ -2339,7 +2356,7 @@ xlog_state_done_syncing( | |||
2339 | * iclog buffer, we wake them all, one will get to do the | 2356 | * iclog buffer, we wake them all, one will get to do the |
2340 | * I/O, the others get to wait for the result. | 2357 | * I/O, the others get to wait for the result. |
2341 | */ | 2358 | */ |
2342 | sv_broadcast(&iclog->ic_writesema); | 2359 | sv_broadcast(&iclog->ic_write_wait); |
2343 | spin_unlock(&log->l_icloglock); | 2360 | spin_unlock(&log->l_icloglock); |
2344 | xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ | 2361 | xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ |
2345 | } /* xlog_state_done_syncing */ | 2362 | } /* xlog_state_done_syncing */ |
@@ -2347,11 +2364,9 @@ xlog_state_done_syncing( | |||
2347 | 2364 | ||
2348 | /* | 2365 | /* |
2349 | * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must | 2366 | * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must |
2350 | * sleep. The flush semaphore is set to the number of in-core buffers and | 2367 | * sleep. We wait on the flush queue on the head iclog as that should be |
2351 | * decremented around disk syncing. Therefore, if all buffers are syncing, | 2368 | * the first iclog to complete flushing. Hence if all iclogs are syncing, |
2352 | * this semaphore will cause new writes to sleep until a sync completes. | 2369 | * we will wait here and all new writes will sleep until a sync completes. |
2353 | * Otherwise, this code just does p() followed by v(). This approximates | ||
2354 | * a sleep/wakeup except we can't race. | ||
2355 | * | 2370 | * |
2356 | * The in-core logs are used in a circular fashion. They are not used | 2371 | * The in-core logs are used in a circular fashion. They are not used |
2357 | * out-of-order even when an iclog past the head is free. | 2372 | * out-of-order even when an iclog past the head is free. |
@@ -2508,7 +2523,7 @@ xlog_grant_log_space(xlog_t *log, | |||
2508 | goto error_return; | 2523 | goto error_return; |
2509 | 2524 | ||
2510 | XFS_STATS_INC(xs_sleep_logspace); | 2525 | XFS_STATS_INC(xs_sleep_logspace); |
2511 | sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); | 2526 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); |
2512 | /* | 2527 | /* |
2513 | * If we got an error, and the filesystem is shutting down, | 2528 | * If we got an error, and the filesystem is shutting down, |
2514 | * we'll catch it down below. So just continue... | 2529 | * we'll catch it down below. So just continue... |
@@ -2534,7 +2549,7 @@ redo: | |||
2534 | xlog_trace_loggrant(log, tic, | 2549 | xlog_trace_loggrant(log, tic, |
2535 | "xlog_grant_log_space: sleep 2"); | 2550 | "xlog_grant_log_space: sleep 2"); |
2536 | XFS_STATS_INC(xs_sleep_logspace); | 2551 | XFS_STATS_INC(xs_sleep_logspace); |
2537 | sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); | 2552 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); |
2538 | 2553 | ||
2539 | if (XLOG_FORCED_SHUTDOWN(log)) { | 2554 | if (XLOG_FORCED_SHUTDOWN(log)) { |
2540 | spin_lock(&log->l_grant_lock); | 2555 | spin_lock(&log->l_grant_lock); |
@@ -2633,7 +2648,7 @@ xlog_regrant_write_log_space(xlog_t *log, | |||
2633 | if (free_bytes < ntic->t_unit_res) | 2648 | if (free_bytes < ntic->t_unit_res) |
2634 | break; | 2649 | break; |
2635 | free_bytes -= ntic->t_unit_res; | 2650 | free_bytes -= ntic->t_unit_res; |
2636 | sv_signal(&ntic->t_sema); | 2651 | sv_signal(&ntic->t_wait); |
2637 | ntic = ntic->t_next; | 2652 | ntic = ntic->t_next; |
2638 | } while (ntic != log->l_write_headq); | 2653 | } while (ntic != log->l_write_headq); |
2639 | 2654 | ||
@@ -2644,7 +2659,7 @@ xlog_regrant_write_log_space(xlog_t *log, | |||
2644 | xlog_trace_loggrant(log, tic, | 2659 | xlog_trace_loggrant(log, tic, |
2645 | "xlog_regrant_write_log_space: sleep 1"); | 2660 | "xlog_regrant_write_log_space: sleep 1"); |
2646 | XFS_STATS_INC(xs_sleep_logspace); | 2661 | XFS_STATS_INC(xs_sleep_logspace); |
2647 | sv_wait(&tic->t_sema, PINOD|PLTWAIT, | 2662 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, |
2648 | &log->l_grant_lock, s); | 2663 | &log->l_grant_lock, s); |
2649 | 2664 | ||
2650 | /* If we're shutting down, this tic is already | 2665 | /* If we're shutting down, this tic is already |
@@ -2673,7 +2688,7 @@ redo: | |||
2673 | if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) | 2688 | if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) |
2674 | xlog_ins_ticketq(&log->l_write_headq, tic); | 2689 | xlog_ins_ticketq(&log->l_write_headq, tic); |
2675 | XFS_STATS_INC(xs_sleep_logspace); | 2690 | XFS_STATS_INC(xs_sleep_logspace); |
2676 | sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); | 2691 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); |
2677 | 2692 | ||
2678 | /* If we're shutting down, this tic is already off the queue */ | 2693 | /* If we're shutting down, this tic is already off the queue */ |
2679 | if (XLOG_FORCED_SHUTDOWN(log)) { | 2694 | if (XLOG_FORCED_SHUTDOWN(log)) { |
@@ -2916,7 +2931,7 @@ xlog_state_switch_iclogs(xlog_t *log, | |||
2916 | * 2. the current iclog is drity, and the previous iclog is in the | 2931 | * 2. the current iclog is drity, and the previous iclog is in the |
2917 | * active or dirty state. | 2932 | * active or dirty state. |
2918 | * | 2933 | * |
2919 | * We may sleep (call psema) if: | 2934 | * We may sleep if: |
2920 | * | 2935 | * |
2921 | * 1. the current iclog is not in the active nor dirty state. | 2936 | * 1. the current iclog is not in the active nor dirty state. |
2922 | * 2. the current iclog dirty, and the previous iclog is not in the | 2937 | * 2. the current iclog dirty, and the previous iclog is not in the |
@@ -3013,7 +3028,7 @@ maybe_sleep: | |||
3013 | return XFS_ERROR(EIO); | 3028 | return XFS_ERROR(EIO); |
3014 | } | 3029 | } |
3015 | XFS_STATS_INC(xs_log_force_sleep); | 3030 | XFS_STATS_INC(xs_log_force_sleep); |
3016 | sv_wait(&iclog->ic_forcesema, PINOD, &log->l_icloglock, s); | 3031 | sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s); |
3017 | /* | 3032 | /* |
3018 | * No need to grab the log lock here since we're | 3033 | * No need to grab the log lock here since we're |
3019 | * only deciding whether or not to return EIO | 3034 | * only deciding whether or not to return EIO |
@@ -3096,7 +3111,7 @@ try_again: | |||
3096 | XLOG_STATE_SYNCING))) { | 3111 | XLOG_STATE_SYNCING))) { |
3097 | ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); | 3112 | ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); |
3098 | XFS_STATS_INC(xs_log_force_sleep); | 3113 | XFS_STATS_INC(xs_log_force_sleep); |
3099 | sv_wait(&iclog->ic_prev->ic_writesema, PSWP, | 3114 | sv_wait(&iclog->ic_prev->ic_write_wait, PSWP, |
3100 | &log->l_icloglock, s); | 3115 | &log->l_icloglock, s); |
3101 | *log_flushed = 1; | 3116 | *log_flushed = 1; |
3102 | already_slept = 1; | 3117 | already_slept = 1; |
@@ -3116,7 +3131,7 @@ try_again: | |||
3116 | !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { | 3131 | !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { |
3117 | 3132 | ||
3118 | /* | 3133 | /* |
3119 | * Don't wait on the forcesema if we know that we've | 3134 | * Don't wait on completion if we know that we've |
3120 | * gotten a log write error. | 3135 | * gotten a log write error. |
3121 | */ | 3136 | */ |
3122 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | 3137 | if (iclog->ic_state & XLOG_STATE_IOERROR) { |
@@ -3124,7 +3139,7 @@ try_again: | |||
3124 | return XFS_ERROR(EIO); | 3139 | return XFS_ERROR(EIO); |
3125 | } | 3140 | } |
3126 | XFS_STATS_INC(xs_log_force_sleep); | 3141 | XFS_STATS_INC(xs_log_force_sleep); |
3127 | sv_wait(&iclog->ic_forcesema, PSWP, &log->l_icloglock, s); | 3142 | sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); |
3128 | /* | 3143 | /* |
3129 | * No need to grab the log lock here since we're | 3144 | * No need to grab the log lock here since we're |
3130 | * only deciding whether or not to return EIO | 3145 | * only deciding whether or not to return EIO |
@@ -3180,7 +3195,7 @@ STATIC void | |||
3180 | xlog_ticket_put(xlog_t *log, | 3195 | xlog_ticket_put(xlog_t *log, |
3181 | xlog_ticket_t *ticket) | 3196 | xlog_ticket_t *ticket) |
3182 | { | 3197 | { |
3183 | sv_destroy(&ticket->t_sema); | 3198 | sv_destroy(&ticket->t_wait); |
3184 | kmem_zone_free(xfs_log_ticket_zone, ticket); | 3199 | kmem_zone_free(xfs_log_ticket_zone, ticket); |
3185 | } /* xlog_ticket_put */ | 3200 | } /* xlog_ticket_put */ |
3186 | 3201 | ||
@@ -3270,7 +3285,7 @@ xlog_ticket_get(xlog_t *log, | |||
3270 | tic->t_trans_type = 0; | 3285 | tic->t_trans_type = 0; |
3271 | if (xflags & XFS_LOG_PERM_RESERV) | 3286 | if (xflags & XFS_LOG_PERM_RESERV) |
3272 | tic->t_flags |= XLOG_TIC_PERM_RESERV; | 3287 | tic->t_flags |= XLOG_TIC_PERM_RESERV; |
3273 | sv_init(&(tic->t_sema), SV_DEFAULT, "logtick"); | 3288 | sv_init(&(tic->t_wait), SV_DEFAULT, "logtick"); |
3274 | 3289 | ||
3275 | xlog_tic_reset_res(tic); | 3290 | xlog_tic_reset_res(tic); |
3276 | 3291 | ||
@@ -3557,14 +3572,14 @@ xfs_log_force_umount( | |||
3557 | */ | 3572 | */ |
3558 | if ((tic = log->l_reserve_headq)) { | 3573 | if ((tic = log->l_reserve_headq)) { |
3559 | do { | 3574 | do { |
3560 | sv_signal(&tic->t_sema); | 3575 | sv_signal(&tic->t_wait); |
3561 | tic = tic->t_next; | 3576 | tic = tic->t_next; |
3562 | } while (tic != log->l_reserve_headq); | 3577 | } while (tic != log->l_reserve_headq); |
3563 | } | 3578 | } |
3564 | 3579 | ||
3565 | if ((tic = log->l_write_headq)) { | 3580 | if ((tic = log->l_write_headq)) { |
3566 | do { | 3581 | do { |
3567 | sv_signal(&tic->t_sema); | 3582 | sv_signal(&tic->t_wait); |
3568 | tic = tic->t_next; | 3583 | tic = tic->t_next; |
3569 | } while (tic != log->l_write_headq); | 3584 | } while (tic != log->l_write_headq); |
3570 | } | 3585 | } |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index d1d678ecb63e..d47b91f10822 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -149,7 +149,7 @@ int xfs_log_mount(struct xfs_mount *mp, | |||
149 | struct xfs_buftarg *log_target, | 149 | struct xfs_buftarg *log_target, |
150 | xfs_daddr_t start_block, | 150 | xfs_daddr_t start_block, |
151 | int num_bblocks); | 151 | int num_bblocks); |
152 | int xfs_log_mount_finish(struct xfs_mount *mp, int); | 152 | int xfs_log_mount_finish(struct xfs_mount *mp); |
153 | void xfs_log_move_tail(struct xfs_mount *mp, | 153 | void xfs_log_move_tail(struct xfs_mount *mp, |
154 | xfs_lsn_t tail_lsn); | 154 | xfs_lsn_t tail_lsn); |
155 | int xfs_log_notify(struct xfs_mount *mp, | 155 | int xfs_log_notify(struct xfs_mount *mp, |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 6245913196b4..e7d8f84443fa 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -241,7 +241,7 @@ typedef struct xlog_res { | |||
241 | } xlog_res_t; | 241 | } xlog_res_t; |
242 | 242 | ||
243 | typedef struct xlog_ticket { | 243 | typedef struct xlog_ticket { |
244 | sv_t t_sema; /* sleep on this semaphore : 20 */ | 244 | sv_t t_wait; /* ticket wait queue : 20 */ |
245 | struct xlog_ticket *t_next; /* :4|8 */ | 245 | struct xlog_ticket *t_next; /* :4|8 */ |
246 | struct xlog_ticket *t_prev; /* :4|8 */ | 246 | struct xlog_ticket *t_prev; /* :4|8 */ |
247 | xlog_tid_t t_tid; /* transaction identifier : 4 */ | 247 | xlog_tid_t t_tid; /* transaction identifier : 4 */ |
@@ -314,7 +314,7 @@ typedef struct xlog_rec_ext_header { | |||
314 | * xlog_rec_header_t into the reserved space. | 314 | * xlog_rec_header_t into the reserved space. |
315 | * - ic_data follows, so a write to disk can start at the beginning of | 315 | * - ic_data follows, so a write to disk can start at the beginning of |
316 | * the iclog. | 316 | * the iclog. |
317 | * - ic_forcesema is used to implement synchronous forcing of the iclog to disk. | 317 | * - ic_forcewait is used to implement synchronous forcing of the iclog to disk. |
318 | * - ic_next is the pointer to the next iclog in the ring. | 318 | * - ic_next is the pointer to the next iclog in the ring. |
319 | * - ic_bp is a pointer to the buffer used to write this incore log to disk. | 319 | * - ic_bp is a pointer to the buffer used to write this incore log to disk. |
320 | * - ic_log is a pointer back to the global log structure. | 320 | * - ic_log is a pointer back to the global log structure. |
@@ -339,8 +339,8 @@ typedef struct xlog_rec_ext_header { | |||
339 | * and move everything else out to subsequent cachelines. | 339 | * and move everything else out to subsequent cachelines. |
340 | */ | 340 | */ |
341 | typedef struct xlog_iclog_fields { | 341 | typedef struct xlog_iclog_fields { |
342 | sv_t ic_forcesema; | 342 | sv_t ic_force_wait; |
343 | sv_t ic_writesema; | 343 | sv_t ic_write_wait; |
344 | struct xlog_in_core *ic_next; | 344 | struct xlog_in_core *ic_next; |
345 | struct xlog_in_core *ic_prev; | 345 | struct xlog_in_core *ic_prev; |
346 | struct xfs_buf *ic_bp; | 346 | struct xfs_buf *ic_bp; |
@@ -377,8 +377,8 @@ typedef struct xlog_in_core { | |||
377 | /* | 377 | /* |
378 | * Defines to save our code from this glop. | 378 | * Defines to save our code from this glop. |
379 | */ | 379 | */ |
380 | #define ic_forcesema hic_fields.ic_forcesema | 380 | #define ic_force_wait hic_fields.ic_force_wait |
381 | #define ic_writesema hic_fields.ic_writesema | 381 | #define ic_write_wait hic_fields.ic_write_wait |
382 | #define ic_next hic_fields.ic_next | 382 | #define ic_next hic_fields.ic_next |
383 | #define ic_prev hic_fields.ic_prev | 383 | #define ic_prev hic_fields.ic_prev |
384 | #define ic_bp hic_fields.ic_bp | 384 | #define ic_bp hic_fields.ic_bp |
@@ -448,7 +448,6 @@ typedef struct log { | |||
448 | int l_grant_write_bytes; | 448 | int l_grant_write_bytes; |
449 | 449 | ||
450 | #ifdef XFS_LOG_TRACE | 450 | #ifdef XFS_LOG_TRACE |
451 | struct ktrace *l_trace; | ||
452 | struct ktrace *l_grant_trace; | 451 | struct ktrace *l_grant_trace; |
453 | #endif | 452 | #endif |
454 | 453 | ||
@@ -468,7 +467,7 @@ extern int xlog_find_tail(xlog_t *log, | |||
468 | xfs_daddr_t *head_blk, | 467 | xfs_daddr_t *head_blk, |
469 | xfs_daddr_t *tail_blk); | 468 | xfs_daddr_t *tail_blk); |
470 | extern int xlog_recover(xlog_t *log); | 469 | extern int xlog_recover(xlog_t *log); |
471 | extern int xlog_recover_finish(xlog_t *log, int mfsi_flags); | 470 | extern int xlog_recover_finish(xlog_t *log); |
472 | extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); | 471 | extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); |
473 | extern void xlog_recover_process_iunlinks(xlog_t *log); | 472 | extern void xlog_recover_process_iunlinks(xlog_t *log); |
474 | 473 | ||
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 9eb722ec744e..82d46ce69d5f 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -3940,8 +3940,7 @@ xlog_recover( | |||
3940 | */ | 3940 | */ |
3941 | int | 3941 | int |
3942 | xlog_recover_finish( | 3942 | xlog_recover_finish( |
3943 | xlog_t *log, | 3943 | xlog_t *log) |
3944 | int mfsi_flags) | ||
3945 | { | 3944 | { |
3946 | /* | 3945 | /* |
3947 | * Now we're ready to do the transactions needed for the | 3946 | * Now we're ready to do the transactions needed for the |
@@ -3969,9 +3968,7 @@ xlog_recover_finish( | |||
3969 | xfs_log_force(log->l_mp, (xfs_lsn_t)0, | 3968 | xfs_log_force(log->l_mp, (xfs_lsn_t)0, |
3970 | (XFS_LOG_FORCE | XFS_LOG_SYNC)); | 3969 | (XFS_LOG_FORCE | XFS_LOG_SYNC)); |
3971 | 3970 | ||
3972 | if ( (mfsi_flags & XFS_MFSI_NOUNLINK) == 0 ) { | 3971 | xlog_recover_process_iunlinks(log); |
3973 | xlog_recover_process_iunlinks(log); | ||
3974 | } | ||
3975 | 3972 | ||
3976 | xlog_recover_check_summary(log); | 3973 | xlog_recover_check_summary(log); |
3977 | 3974 | ||
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 6c5d1325e7f6..a4503f5e9497 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -128,7 +128,7 @@ static const struct { | |||
128 | * initialized. | 128 | * initialized. |
129 | */ | 129 | */ |
130 | STATIC void | 130 | STATIC void |
131 | xfs_mount_free( | 131 | xfs_free_perag( |
132 | xfs_mount_t *mp) | 132 | xfs_mount_t *mp) |
133 | { | 133 | { |
134 | if (mp->m_perag) { | 134 | if (mp->m_perag) { |
@@ -139,20 +139,6 @@ xfs_mount_free( | |||
139 | kmem_free(mp->m_perag[agno].pagb_list); | 139 | kmem_free(mp->m_perag[agno].pagb_list); |
140 | kmem_free(mp->m_perag); | 140 | kmem_free(mp->m_perag); |
141 | } | 141 | } |
142 | |||
143 | spinlock_destroy(&mp->m_ail_lock); | ||
144 | spinlock_destroy(&mp->m_sb_lock); | ||
145 | mutex_destroy(&mp->m_ilock); | ||
146 | mutex_destroy(&mp->m_growlock); | ||
147 | if (mp->m_quotainfo) | ||
148 | XFS_QM_DONE(mp); | ||
149 | |||
150 | if (mp->m_fsname != NULL) | ||
151 | kmem_free(mp->m_fsname); | ||
152 | if (mp->m_rtname != NULL) | ||
153 | kmem_free(mp->m_rtname); | ||
154 | if (mp->m_logname != NULL) | ||
155 | kmem_free(mp->m_logname); | ||
156 | } | 142 | } |
157 | 143 | ||
158 | /* | 144 | /* |
@@ -704,11 +690,11 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount) | |||
704 | * Update alignment values based on mount options and sb values | 690 | * Update alignment values based on mount options and sb values |
705 | */ | 691 | */ |
706 | STATIC int | 692 | STATIC int |
707 | xfs_update_alignment(xfs_mount_t *mp, int mfsi_flags, __uint64_t *update_flags) | 693 | xfs_update_alignment(xfs_mount_t *mp, __uint64_t *update_flags) |
708 | { | 694 | { |
709 | xfs_sb_t *sbp = &(mp->m_sb); | 695 | xfs_sb_t *sbp = &(mp->m_sb); |
710 | 696 | ||
711 | if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) { | 697 | if (mp->m_dalign) { |
712 | /* | 698 | /* |
713 | * If stripe unit and stripe width are not multiples | 699 | * If stripe unit and stripe width are not multiples |
714 | * of the fs blocksize turn off alignment. | 700 | * of the fs blocksize turn off alignment. |
@@ -864,7 +850,7 @@ xfs_set_inoalignment(xfs_mount_t *mp) | |||
864 | * Check that the data (and log if separate) are an ok size. | 850 | * Check that the data (and log if separate) are an ok size. |
865 | */ | 851 | */ |
866 | STATIC int | 852 | STATIC int |
867 | xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags) | 853 | xfs_check_sizes(xfs_mount_t *mp) |
868 | { | 854 | { |
869 | xfs_buf_t *bp; | 855 | xfs_buf_t *bp; |
870 | xfs_daddr_t d; | 856 | xfs_daddr_t d; |
@@ -887,8 +873,7 @@ xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags) | |||
887 | return error; | 873 | return error; |
888 | } | 874 | } |
889 | 875 | ||
890 | if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) && | 876 | if (mp->m_logdev_targp != mp->m_ddev_targp) { |
891 | mp->m_logdev_targp != mp->m_ddev_targp) { | ||
892 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); | 877 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); |
893 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { | 878 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { |
894 | cmn_err(CE_WARN, "XFS: size check 3 failed"); | 879 | cmn_err(CE_WARN, "XFS: size check 3 failed"); |
@@ -923,15 +908,13 @@ xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags) | |||
923 | */ | 908 | */ |
924 | int | 909 | int |
925 | xfs_mountfs( | 910 | xfs_mountfs( |
926 | xfs_mount_t *mp, | 911 | xfs_mount_t *mp) |
927 | int mfsi_flags) | ||
928 | { | 912 | { |
929 | xfs_sb_t *sbp = &(mp->m_sb); | 913 | xfs_sb_t *sbp = &(mp->m_sb); |
930 | xfs_inode_t *rip; | 914 | xfs_inode_t *rip; |
931 | __uint64_t resblks; | 915 | __uint64_t resblks; |
932 | __int64_t update_flags = 0LL; | 916 | __int64_t update_flags = 0LL; |
933 | uint quotamount, quotaflags; | 917 | uint quotamount, quotaflags; |
934 | int agno; | ||
935 | int uuid_mounted = 0; | 918 | int uuid_mounted = 0; |
936 | int error = 0; | 919 | int error = 0; |
937 | 920 | ||
@@ -985,7 +968,7 @@ xfs_mountfs( | |||
985 | * allocator alignment is within an ag, therefore ag has | 968 | * allocator alignment is within an ag, therefore ag has |
986 | * to be aligned at stripe boundary. | 969 | * to be aligned at stripe boundary. |
987 | */ | 970 | */ |
988 | error = xfs_update_alignment(mp, mfsi_flags, &update_flags); | 971 | error = xfs_update_alignment(mp, &update_flags); |
989 | if (error) | 972 | if (error) |
990 | goto error1; | 973 | goto error1; |
991 | 974 | ||
@@ -1004,8 +987,7 @@ xfs_mountfs( | |||
1004 | * since a single partition filesystem is identical to a single | 987 | * since a single partition filesystem is identical to a single |
1005 | * partition volume/filesystem. | 988 | * partition volume/filesystem. |
1006 | */ | 989 | */ |
1007 | if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && | 990 | if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) { |
1008 | (mp->m_flags & XFS_MOUNT_NOUUID) == 0) { | ||
1009 | if (xfs_uuid_mount(mp)) { | 991 | if (xfs_uuid_mount(mp)) { |
1010 | error = XFS_ERROR(EINVAL); | 992 | error = XFS_ERROR(EINVAL); |
1011 | goto error1; | 993 | goto error1; |
@@ -1033,7 +1015,7 @@ xfs_mountfs( | |||
1033 | /* | 1015 | /* |
1034 | * Check that the data (and log if separate) are an ok size. | 1016 | * Check that the data (and log if separate) are an ok size. |
1035 | */ | 1017 | */ |
1036 | error = xfs_check_sizes(mp, mfsi_flags); | 1018 | error = xfs_check_sizes(mp); |
1037 | if (error) | 1019 | if (error) |
1038 | goto error1; | 1020 | goto error1; |
1039 | 1021 | ||
@@ -1047,13 +1029,6 @@ xfs_mountfs( | |||
1047 | } | 1029 | } |
1048 | 1030 | ||
1049 | /* | 1031 | /* |
1050 | * For client case we are done now | ||
1051 | */ | ||
1052 | if (mfsi_flags & XFS_MFSI_CLIENT) { | ||
1053 | return 0; | ||
1054 | } | ||
1055 | |||
1056 | /* | ||
1057 | * Copies the low order bits of the timestamp and the randomly | 1032 | * Copies the low order bits of the timestamp and the randomly |
1058 | * set "sequence" number out of a UUID. | 1033 | * set "sequence" number out of a UUID. |
1059 | */ | 1034 | */ |
@@ -1077,8 +1052,10 @@ xfs_mountfs( | |||
1077 | * Allocate and initialize the per-ag data. | 1052 | * Allocate and initialize the per-ag data. |
1078 | */ | 1053 | */ |
1079 | init_rwsem(&mp->m_peraglock); | 1054 | init_rwsem(&mp->m_peraglock); |
1080 | mp->m_perag = | 1055 | mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), |
1081 | kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP); | 1056 | KM_MAYFAIL); |
1057 | if (!mp->m_perag) | ||
1058 | goto error1; | ||
1082 | 1059 | ||
1083 | mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount); | 1060 | mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount); |
1084 | 1061 | ||
@@ -1190,7 +1167,7 @@ xfs_mountfs( | |||
1190 | * delayed until after the root and real-time bitmap inodes | 1167 | * delayed until after the root and real-time bitmap inodes |
1191 | * were consistently read in. | 1168 | * were consistently read in. |
1192 | */ | 1169 | */ |
1193 | error = xfs_log_mount_finish(mp, mfsi_flags); | 1170 | error = xfs_log_mount_finish(mp); |
1194 | if (error) { | 1171 | if (error) { |
1195 | cmn_err(CE_WARN, "XFS: log mount finish failed"); | 1172 | cmn_err(CE_WARN, "XFS: log mount finish failed"); |
1196 | goto error4; | 1173 | goto error4; |
@@ -1199,7 +1176,7 @@ xfs_mountfs( | |||
1199 | /* | 1176 | /* |
1200 | * Complete the quota initialisation, post-log-replay component. | 1177 | * Complete the quota initialisation, post-log-replay component. |
1201 | */ | 1178 | */ |
1202 | error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags); | 1179 | error = XFS_QM_MOUNT(mp, quotamount, quotaflags); |
1203 | if (error) | 1180 | if (error) |
1204 | goto error4; | 1181 | goto error4; |
1205 | 1182 | ||
@@ -1233,12 +1210,7 @@ xfs_mountfs( | |||
1233 | error3: | 1210 | error3: |
1234 | xfs_log_unmount_dealloc(mp); | 1211 | xfs_log_unmount_dealloc(mp); |
1235 | error2: | 1212 | error2: |
1236 | for (agno = 0; agno < sbp->sb_agcount; agno++) | 1213 | xfs_free_perag(mp); |
1237 | if (mp->m_perag[agno].pagb_list) | ||
1238 | kmem_free(mp->m_perag[agno].pagb_list); | ||
1239 | kmem_free(mp->m_perag); | ||
1240 | mp->m_perag = NULL; | ||
1241 | /* FALLTHROUGH */ | ||
1242 | error1: | 1214 | error1: |
1243 | if (uuid_mounted) | 1215 | if (uuid_mounted) |
1244 | uuid_table_remove(&mp->m_sb.sb_uuid); | 1216 | uuid_table_remove(&mp->m_sb.sb_uuid); |
@@ -1246,16 +1218,17 @@ xfs_mountfs( | |||
1246 | } | 1218 | } |
1247 | 1219 | ||
1248 | /* | 1220 | /* |
1249 | * xfs_unmountfs | ||
1250 | * | ||
1251 | * This flushes out the inodes,dquots and the superblock, unmounts the | 1221 | * This flushes out the inodes,dquots and the superblock, unmounts the |
1252 | * log and makes sure that incore structures are freed. | 1222 | * log and makes sure that incore structures are freed. |
1253 | */ | 1223 | */ |
1254 | int | 1224 | void |
1255 | xfs_unmountfs(xfs_mount_t *mp) | 1225 | xfs_unmountfs( |
1226 | struct xfs_mount *mp) | ||
1256 | { | 1227 | { |
1257 | __uint64_t resblks; | 1228 | __uint64_t resblks; |
1258 | int error = 0; | 1229 | int error; |
1230 | |||
1231 | IRELE(mp->m_rootip); | ||
1259 | 1232 | ||
1260 | /* | 1233 | /* |
1261 | * We can potentially deadlock here if we have an inode cluster | 1234 | * We can potentially deadlock here if we have an inode cluster |
@@ -1312,8 +1285,6 @@ xfs_unmountfs(xfs_mount_t *mp) | |||
1312 | xfs_unmountfs_wait(mp); /* wait for async bufs */ | 1285 | xfs_unmountfs_wait(mp); /* wait for async bufs */ |
1313 | xfs_log_unmount(mp); /* Done! No more fs ops. */ | 1286 | xfs_log_unmount(mp); /* Done! No more fs ops. */ |
1314 | 1287 | ||
1315 | xfs_freesb(mp); | ||
1316 | |||
1317 | /* | 1288 | /* |
1318 | * All inodes from this mount point should be freed. | 1289 | * All inodes from this mount point should be freed. |
1319 | */ | 1290 | */ |
@@ -1322,11 +1293,12 @@ xfs_unmountfs(xfs_mount_t *mp) | |||
1322 | if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) | 1293 | if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) |
1323 | uuid_table_remove(&mp->m_sb.sb_uuid); | 1294 | uuid_table_remove(&mp->m_sb.sb_uuid); |
1324 | 1295 | ||
1325 | #if defined(DEBUG) || defined(INDUCE_IO_ERROR) | 1296 | #if defined(DEBUG) |
1326 | xfs_errortag_clearall(mp, 0); | 1297 | xfs_errortag_clearall(mp, 0); |
1327 | #endif | 1298 | #endif |
1328 | xfs_mount_free(mp); | 1299 | xfs_free_perag(mp); |
1329 | return 0; | 1300 | if (mp->m_quotainfo) |
1301 | XFS_QM_DONE(mp); | ||
1330 | } | 1302 | } |
1331 | 1303 | ||
1332 | STATIC void | 1304 | STATIC void |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 5269bd6e3df0..f3c1024b1241 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -114,7 +114,7 @@ struct xfs_dqtrxops; | |||
114 | struct xfs_quotainfo; | 114 | struct xfs_quotainfo; |
115 | 115 | ||
116 | typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *); | 116 | typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *); |
117 | typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint, int); | 117 | typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint); |
118 | typedef int (*xfs_qmunmount_t)(struct xfs_mount *); | 118 | typedef int (*xfs_qmunmount_t)(struct xfs_mount *); |
119 | typedef void (*xfs_qmdone_t)(struct xfs_mount *); | 119 | typedef void (*xfs_qmdone_t)(struct xfs_mount *); |
120 | typedef void (*xfs_dqrele_t)(struct xfs_dquot *); | 120 | typedef void (*xfs_dqrele_t)(struct xfs_dquot *); |
@@ -158,8 +158,8 @@ typedef struct xfs_qmops { | |||
158 | 158 | ||
159 | #define XFS_QM_INIT(mp, mnt, fl) \ | 159 | #define XFS_QM_INIT(mp, mnt, fl) \ |
160 | (*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl) | 160 | (*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl) |
161 | #define XFS_QM_MOUNT(mp, mnt, fl, mfsi_flags) \ | 161 | #define XFS_QM_MOUNT(mp, mnt, fl) \ |
162 | (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl, mfsi_flags) | 162 | (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl) |
163 | #define XFS_QM_UNMOUNT(mp) \ | 163 | #define XFS_QM_UNMOUNT(mp) \ |
164 | (*(mp)->m_qm_ops->xfs_qmunmount)(mp) | 164 | (*(mp)->m_qm_ops->xfs_qmunmount)(mp) |
165 | #define XFS_QM_DONE(mp) \ | 165 | #define XFS_QM_DONE(mp) \ |
@@ -442,13 +442,6 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, | |||
442 | /* | 442 | /* |
443 | * Flags for xfs_mountfs | 443 | * Flags for xfs_mountfs |
444 | */ | 444 | */ |
445 | #define XFS_MFSI_SECOND 0x01 /* Secondary mount -- skip stuff */ | ||
446 | #define XFS_MFSI_CLIENT 0x02 /* Is a client -- skip lots of stuff */ | ||
447 | /* XFS_MFSI_RRINODES */ | ||
448 | #define XFS_MFSI_NOUNLINK 0x08 /* Skip unlinked inode processing in */ | ||
449 | /* log recovery */ | ||
450 | #define XFS_MFSI_NO_QUOTACHECK 0x10 /* Skip quotacheck processing */ | ||
451 | /* XFS_MFSI_CONVERT_SUNIT */ | ||
452 | #define XFS_MFSI_QUIET 0x40 /* Be silent if mount errors found */ | 445 | #define XFS_MFSI_QUIET 0x40 /* Be silent if mount errors found */ |
453 | 446 | ||
454 | #define XFS_DADDR_TO_AGNO(mp,d) xfs_daddr_to_agno(mp,d) | 447 | #define XFS_DADDR_TO_AGNO(mp,d) xfs_daddr_to_agno(mp,d) |
@@ -517,10 +510,10 @@ typedef struct xfs_mod_sb { | |||
517 | 510 | ||
518 | extern void xfs_mod_sb(xfs_trans_t *, __int64_t); | 511 | extern void xfs_mod_sb(xfs_trans_t *, __int64_t); |
519 | extern int xfs_log_sbcount(xfs_mount_t *, uint); | 512 | extern int xfs_log_sbcount(xfs_mount_t *, uint); |
520 | extern int xfs_mountfs(xfs_mount_t *mp, int); | 513 | extern int xfs_mountfs(xfs_mount_t *mp); |
521 | extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); | 514 | extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); |
522 | 515 | ||
523 | extern int xfs_unmountfs(xfs_mount_t *); | 516 | extern void xfs_unmountfs(xfs_mount_t *); |
524 | extern int xfs_unmountfs_writesb(xfs_mount_t *); | 517 | extern int xfs_unmountfs_writesb(xfs_mount_t *); |
525 | extern int xfs_unmount_flush(xfs_mount_t *, int); | 518 | extern int xfs_unmount_flush(xfs_mount_t *, int); |
526 | extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); | 519 | extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index bf87a5913504..e2f68de16159 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
@@ -74,18 +74,6 @@ STATIC int xfs_rtmodify_summary(xfs_mount_t *, xfs_trans_t *, int, | |||
74 | */ | 74 | */ |
75 | 75 | ||
76 | /* | 76 | /* |
77 | * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set. | ||
78 | */ | ||
79 | STATIC int | ||
80 | xfs_lowbit32( | ||
81 | __uint32_t v) | ||
82 | { | ||
83 | if (v) | ||
84 | return ffs(v) - 1; | ||
85 | return -1; | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * Allocate space to the bitmap or summary file, and zero it, for growfs. | 77 | * Allocate space to the bitmap or summary file, and zero it, for growfs. |
90 | */ | 78 | */ |
91 | STATIC int /* error */ | 79 | STATIC int /* error */ |
@@ -450,6 +438,7 @@ xfs_rtallocate_extent_near( | |||
450 | } | 438 | } |
451 | bbno = XFS_BITTOBLOCK(mp, bno); | 439 | bbno = XFS_BITTOBLOCK(mp, bno); |
452 | i = 0; | 440 | i = 0; |
441 | ASSERT(minlen != 0); | ||
453 | log2len = xfs_highbit32(minlen); | 442 | log2len = xfs_highbit32(minlen); |
454 | /* | 443 | /* |
455 | * Loop over all bitmap blocks (bbno + i is current block). | 444 | * Loop over all bitmap blocks (bbno + i is current block). |
@@ -618,6 +607,8 @@ xfs_rtallocate_extent_size( | |||
618 | xfs_suminfo_t sum; /* summary information for extents */ | 607 | xfs_suminfo_t sum; /* summary information for extents */ |
619 | 608 | ||
620 | ASSERT(minlen % prod == 0 && maxlen % prod == 0); | 609 | ASSERT(minlen % prod == 0 && maxlen % prod == 0); |
610 | ASSERT(maxlen != 0); | ||
611 | |||
621 | /* | 612 | /* |
622 | * Loop over all the levels starting with maxlen. | 613 | * Loop over all the levels starting with maxlen. |
623 | * At each level, look at all the bitmap blocks, to see if there | 614 | * At each level, look at all the bitmap blocks, to see if there |
@@ -675,6 +666,9 @@ xfs_rtallocate_extent_size( | |||
675 | *rtblock = NULLRTBLOCK; | 666 | *rtblock = NULLRTBLOCK; |
676 | return 0; | 667 | return 0; |
677 | } | 668 | } |
669 | ASSERT(minlen != 0); | ||
670 | ASSERT(maxlen != 0); | ||
671 | |||
678 | /* | 672 | /* |
679 | * Loop over sizes, from maxlen down to minlen. | 673 | * Loop over sizes, from maxlen down to minlen. |
680 | * This time, when we do the allocations, allow smaller ones | 674 | * This time, when we do the allocations, allow smaller ones |
@@ -1961,6 +1955,7 @@ xfs_growfs_rt( | |||
1961 | nsbp->sb_blocksize * nsbp->sb_rextsize); | 1955 | nsbp->sb_blocksize * nsbp->sb_rextsize); |
1962 | nsbp->sb_rextents = nsbp->sb_rblocks; | 1956 | nsbp->sb_rextents = nsbp->sb_rblocks; |
1963 | do_div(nsbp->sb_rextents, nsbp->sb_rextsize); | 1957 | do_div(nsbp->sb_rextents, nsbp->sb_rextsize); |
1958 | ASSERT(nsbp->sb_rextents != 0); | ||
1964 | nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents); | 1959 | nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents); |
1965 | nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1; | 1960 | nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1; |
1966 | nrsumsize = | 1961 | nrsumsize = |
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index b0f31c09a76d..3a82576dde9a 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c | |||
@@ -314,7 +314,7 @@ xfs_bioerror_relse( | |||
314 | * ASYNC buffers. | 314 | * ASYNC buffers. |
315 | */ | 315 | */ |
316 | XFS_BUF_ERROR(bp, EIO); | 316 | XFS_BUF_ERROR(bp, EIO); |
317 | XFS_BUF_V_IODONESEMA(bp); | 317 | XFS_BUF_FINISH_IOWAIT(bp); |
318 | } else { | 318 | } else { |
319 | xfs_buf_relse(bp); | 319 | xfs_buf_relse(bp); |
320 | } | 320 | } |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index e4ebddd3c500..4e1c22a23be5 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include "xfs_quota.h" | 43 | #include "xfs_quota.h" |
44 | #include "xfs_trans_priv.h" | 44 | #include "xfs_trans_priv.h" |
45 | #include "xfs_trans_space.h" | 45 | #include "xfs_trans_space.h" |
46 | #include "xfs_inode_item.h" | ||
46 | 47 | ||
47 | 48 | ||
48 | STATIC void xfs_trans_apply_sb_deltas(xfs_trans_t *); | 49 | STATIC void xfs_trans_apply_sb_deltas(xfs_trans_t *); |
@@ -253,7 +254,7 @@ _xfs_trans_alloc( | |||
253 | tp->t_mountp = mp; | 254 | tp->t_mountp = mp; |
254 | tp->t_items_free = XFS_LIC_NUM_SLOTS; | 255 | tp->t_items_free = XFS_LIC_NUM_SLOTS; |
255 | tp->t_busy_free = XFS_LBC_NUM_SLOTS; | 256 | tp->t_busy_free = XFS_LBC_NUM_SLOTS; |
256 | XFS_LIC_INIT(&(tp->t_items)); | 257 | xfs_lic_init(&(tp->t_items)); |
257 | XFS_LBC_INIT(&(tp->t_busy)); | 258 | XFS_LBC_INIT(&(tp->t_busy)); |
258 | return tp; | 259 | return tp; |
259 | } | 260 | } |
@@ -282,7 +283,7 @@ xfs_trans_dup( | |||
282 | ntp->t_mountp = tp->t_mountp; | 283 | ntp->t_mountp = tp->t_mountp; |
283 | ntp->t_items_free = XFS_LIC_NUM_SLOTS; | 284 | ntp->t_items_free = XFS_LIC_NUM_SLOTS; |
284 | ntp->t_busy_free = XFS_LBC_NUM_SLOTS; | 285 | ntp->t_busy_free = XFS_LBC_NUM_SLOTS; |
285 | XFS_LIC_INIT(&(ntp->t_items)); | 286 | xfs_lic_init(&(ntp->t_items)); |
286 | XFS_LBC_INIT(&(ntp->t_busy)); | 287 | XFS_LBC_INIT(&(ntp->t_busy)); |
287 | 288 | ||
288 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); | 289 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); |
@@ -1169,7 +1170,7 @@ xfs_trans_cancel( | |||
1169 | while (licp != NULL) { | 1170 | while (licp != NULL) { |
1170 | lidp = licp->lic_descs; | 1171 | lidp = licp->lic_descs; |
1171 | for (i = 0; i < licp->lic_unused; i++, lidp++) { | 1172 | for (i = 0; i < licp->lic_unused; i++, lidp++) { |
1172 | if (XFS_LIC_ISFREE(licp, i)) { | 1173 | if (xfs_lic_isfree(licp, i)) { |
1173 | continue; | 1174 | continue; |
1174 | } | 1175 | } |
1175 | 1176 | ||
@@ -1216,6 +1217,68 @@ xfs_trans_free( | |||
1216 | kmem_zone_free(xfs_trans_zone, tp); | 1217 | kmem_zone_free(xfs_trans_zone, tp); |
1217 | } | 1218 | } |
1218 | 1219 | ||
1220 | /* | ||
1221 | * Roll from one trans in the sequence of PERMANENT transactions to | ||
1222 | * the next: permanent transactions are only flushed out when | ||
1223 | * committed with XFS_TRANS_RELEASE_LOG_RES, but we still want as soon | ||
1224 | * as possible to let chunks of it go to the log. So we commit the | ||
1225 | * chunk we've been working on and get a new transaction to continue. | ||
1226 | */ | ||
1227 | int | ||
1228 | xfs_trans_roll( | ||
1229 | struct xfs_trans **tpp, | ||
1230 | struct xfs_inode *dp) | ||
1231 | { | ||
1232 | struct xfs_trans *trans; | ||
1233 | unsigned int logres, count; | ||
1234 | int error; | ||
1235 | |||
1236 | /* | ||
1237 | * Ensure that the inode is always logged. | ||
1238 | */ | ||
1239 | trans = *tpp; | ||
1240 | xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); | ||
1241 | |||
1242 | /* | ||
1243 | * Copy the critical parameters from one trans to the next. | ||
1244 | */ | ||
1245 | logres = trans->t_log_res; | ||
1246 | count = trans->t_log_count; | ||
1247 | *tpp = xfs_trans_dup(trans); | ||
1248 | |||
1249 | /* | ||
1250 | * Commit the current transaction. | ||
1251 | * If this commit failed, then it'd just unlock those items that | ||
1252 | * are not marked ihold. That also means that a filesystem shutdown | ||
1253 | * is in progress. The caller takes the responsibility to cancel | ||
1254 | * the duplicate transaction that gets returned. | ||
1255 | */ | ||
1256 | error = xfs_trans_commit(trans, 0); | ||
1257 | if (error) | ||
1258 | return (error); | ||
1259 | |||
1260 | trans = *tpp; | ||
1261 | |||
1262 | /* | ||
1263 | * Reserve space in the log for th next transaction. | ||
1264 | * This also pushes items in the "AIL", the list of logged items, | ||
1265 | * out to disk if they are taking up space at the tail of the log | ||
1266 | * that we want to use. This requires that either nothing be locked | ||
1267 | * across this call, or that anything that is locked be logged in | ||
1268 | * the prior and the next transactions. | ||
1269 | */ | ||
1270 | error = xfs_trans_reserve(trans, 0, logres, 0, | ||
1271 | XFS_TRANS_PERM_LOG_RES, count); | ||
1272 | /* | ||
1273 | * Ensure that the inode is in the new transaction and locked. | ||
1274 | */ | ||
1275 | if (error) | ||
1276 | return error; | ||
1277 | |||
1278 | xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL); | ||
1279 | xfs_trans_ihold(trans, dp); | ||
1280 | return 0; | ||
1281 | } | ||
1219 | 1282 | ||
1220 | /* | 1283 | /* |
1221 | * THIS SHOULD BE REWRITTEN TO USE xfs_trans_next_item(). | 1284 | * THIS SHOULD BE REWRITTEN TO USE xfs_trans_next_item(). |
@@ -1253,7 +1316,7 @@ xfs_trans_committed( | |||
1253 | * Special case the chunk embedded in the transaction. | 1316 | * Special case the chunk embedded in the transaction. |
1254 | */ | 1317 | */ |
1255 | licp = &(tp->t_items); | 1318 | licp = &(tp->t_items); |
1256 | if (!(XFS_LIC_ARE_ALL_FREE(licp))) { | 1319 | if (!(xfs_lic_are_all_free(licp))) { |
1257 | xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag); | 1320 | xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag); |
1258 | } | 1321 | } |
1259 | 1322 | ||
@@ -1262,7 +1325,7 @@ xfs_trans_committed( | |||
1262 | */ | 1325 | */ |
1263 | licp = licp->lic_next; | 1326 | licp = licp->lic_next; |
1264 | while (licp != NULL) { | 1327 | while (licp != NULL) { |
1265 | ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); | 1328 | ASSERT(!xfs_lic_are_all_free(licp)); |
1266 | xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag); | 1329 | xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag); |
1267 | next_licp = licp->lic_next; | 1330 | next_licp = licp->lic_next; |
1268 | kmem_free(licp); | 1331 | kmem_free(licp); |
@@ -1325,7 +1388,7 @@ xfs_trans_chunk_committed( | |||
1325 | 1388 | ||
1326 | lidp = licp->lic_descs; | 1389 | lidp = licp->lic_descs; |
1327 | for (i = 0; i < licp->lic_unused; i++, lidp++) { | 1390 | for (i = 0; i < licp->lic_unused; i++, lidp++) { |
1328 | if (XFS_LIC_ISFREE(licp, i)) { | 1391 | if (xfs_lic_isfree(licp, i)) { |
1329 | continue; | 1392 | continue; |
1330 | } | 1393 | } |
1331 | 1394 | ||
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 0804207c7391..74c80bd2b0ec 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -210,62 +210,52 @@ typedef struct xfs_log_item_chunk { | |||
210 | * lic_unused to the right value (0 matches all free). The | 210 | * lic_unused to the right value (0 matches all free). The |
211 | * lic_descs.lid_index values are set up as each desc is allocated. | 211 | * lic_descs.lid_index values are set up as each desc is allocated. |
212 | */ | 212 | */ |
213 | #define XFS_LIC_INIT(cp) xfs_lic_init(cp) | ||
214 | static inline void xfs_lic_init(xfs_log_item_chunk_t *cp) | 213 | static inline void xfs_lic_init(xfs_log_item_chunk_t *cp) |
215 | { | 214 | { |
216 | cp->lic_free = XFS_LIC_FREEMASK; | 215 | cp->lic_free = XFS_LIC_FREEMASK; |
217 | } | 216 | } |
218 | 217 | ||
219 | #define XFS_LIC_INIT_SLOT(cp,slot) xfs_lic_init_slot(cp, slot) | ||
220 | static inline void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot) | 218 | static inline void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot) |
221 | { | 219 | { |
222 | cp->lic_descs[slot].lid_index = (unsigned char)(slot); | 220 | cp->lic_descs[slot].lid_index = (unsigned char)(slot); |
223 | } | 221 | } |
224 | 222 | ||
225 | #define XFS_LIC_VACANCY(cp) xfs_lic_vacancy(cp) | ||
226 | static inline int xfs_lic_vacancy(xfs_log_item_chunk_t *cp) | 223 | static inline int xfs_lic_vacancy(xfs_log_item_chunk_t *cp) |
227 | { | 224 | { |
228 | return cp->lic_free & XFS_LIC_FREEMASK; | 225 | return cp->lic_free & XFS_LIC_FREEMASK; |
229 | } | 226 | } |
230 | 227 | ||
231 | #define XFS_LIC_ALL_FREE(cp) xfs_lic_all_free(cp) | ||
232 | static inline void xfs_lic_all_free(xfs_log_item_chunk_t *cp) | 228 | static inline void xfs_lic_all_free(xfs_log_item_chunk_t *cp) |
233 | { | 229 | { |
234 | cp->lic_free = XFS_LIC_FREEMASK; | 230 | cp->lic_free = XFS_LIC_FREEMASK; |
235 | } | 231 | } |
236 | 232 | ||
237 | #define XFS_LIC_ARE_ALL_FREE(cp) xfs_lic_are_all_free(cp) | ||
238 | static inline int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp) | 233 | static inline int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp) |
239 | { | 234 | { |
240 | return ((cp->lic_free & XFS_LIC_FREEMASK) == XFS_LIC_FREEMASK); | 235 | return ((cp->lic_free & XFS_LIC_FREEMASK) == XFS_LIC_FREEMASK); |
241 | } | 236 | } |
242 | 237 | ||
243 | #define XFS_LIC_ISFREE(cp,slot) xfs_lic_isfree(cp,slot) | ||
244 | static inline int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot) | 238 | static inline int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot) |
245 | { | 239 | { |
246 | return (cp->lic_free & (1 << slot)); | 240 | return (cp->lic_free & (1 << slot)); |
247 | } | 241 | } |
248 | 242 | ||
249 | #define XFS_LIC_CLAIM(cp,slot) xfs_lic_claim(cp,slot) | ||
250 | static inline void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot) | 243 | static inline void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot) |
251 | { | 244 | { |
252 | cp->lic_free &= ~(1 << slot); | 245 | cp->lic_free &= ~(1 << slot); |
253 | } | 246 | } |
254 | 247 | ||
255 | #define XFS_LIC_RELSE(cp,slot) xfs_lic_relse(cp,slot) | ||
256 | static inline void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot) | 248 | static inline void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot) |
257 | { | 249 | { |
258 | cp->lic_free |= 1 << slot; | 250 | cp->lic_free |= 1 << slot; |
259 | } | 251 | } |
260 | 252 | ||
261 | #define XFS_LIC_SLOT(cp,slot) xfs_lic_slot(cp,slot) | ||
262 | static inline xfs_log_item_desc_t * | 253 | static inline xfs_log_item_desc_t * |
263 | xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot) | 254 | xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot) |
264 | { | 255 | { |
265 | return &(cp->lic_descs[slot]); | 256 | return &(cp->lic_descs[slot]); |
266 | } | 257 | } |
267 | 258 | ||
268 | #define XFS_LIC_DESC_TO_SLOT(dp) xfs_lic_desc_to_slot(dp) | ||
269 | static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp) | 259 | static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp) |
270 | { | 260 | { |
271 | return (uint)dp->lid_index; | 261 | return (uint)dp->lid_index; |
@@ -278,7 +268,6 @@ static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp) | |||
278 | * All of this yields the address of the chunk, which is | 268 | * All of this yields the address of the chunk, which is |
279 | * cast to a chunk pointer. | 269 | * cast to a chunk pointer. |
280 | */ | 270 | */ |
281 | #define XFS_LIC_DESC_TO_CHUNK(dp) xfs_lic_desc_to_chunk(dp) | ||
282 | static inline xfs_log_item_chunk_t * | 271 | static inline xfs_log_item_chunk_t * |
283 | xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) | 272 | xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) |
284 | { | 273 | { |
@@ -986,6 +975,7 @@ int _xfs_trans_commit(xfs_trans_t *, | |||
986 | int *); | 975 | int *); |
987 | #define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL) | 976 | #define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL) |
988 | void xfs_trans_cancel(xfs_trans_t *, int); | 977 | void xfs_trans_cancel(xfs_trans_t *, int); |
978 | int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); | ||
989 | int xfs_trans_ail_init(struct xfs_mount *); | 979 | int xfs_trans_ail_init(struct xfs_mount *); |
990 | void xfs_trans_ail_destroy(struct xfs_mount *); | 980 | void xfs_trans_ail_destroy(struct xfs_mount *); |
991 | void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); | 981 | void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index cb0c5839154b..4e855b5ced66 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -1021,16 +1021,16 @@ xfs_trans_buf_item_match( | |||
1021 | bp = NULL; | 1021 | bp = NULL; |
1022 | len = BBTOB(len); | 1022 | len = BBTOB(len); |
1023 | licp = &tp->t_items; | 1023 | licp = &tp->t_items; |
1024 | if (!XFS_LIC_ARE_ALL_FREE(licp)) { | 1024 | if (!xfs_lic_are_all_free(licp)) { |
1025 | for (i = 0; i < licp->lic_unused; i++) { | 1025 | for (i = 0; i < licp->lic_unused; i++) { |
1026 | /* | 1026 | /* |
1027 | * Skip unoccupied slots. | 1027 | * Skip unoccupied slots. |
1028 | */ | 1028 | */ |
1029 | if (XFS_LIC_ISFREE(licp, i)) { | 1029 | if (xfs_lic_isfree(licp, i)) { |
1030 | continue; | 1030 | continue; |
1031 | } | 1031 | } |
1032 | 1032 | ||
1033 | lidp = XFS_LIC_SLOT(licp, i); | 1033 | lidp = xfs_lic_slot(licp, i); |
1034 | blip = (xfs_buf_log_item_t *)lidp->lid_item; | 1034 | blip = (xfs_buf_log_item_t *)lidp->lid_item; |
1035 | if (blip->bli_item.li_type != XFS_LI_BUF) { | 1035 | if (blip->bli_item.li_type != XFS_LI_BUF) { |
1036 | continue; | 1036 | continue; |
@@ -1074,7 +1074,7 @@ xfs_trans_buf_item_match_all( | |||
1074 | bp = NULL; | 1074 | bp = NULL; |
1075 | len = BBTOB(len); | 1075 | len = BBTOB(len); |
1076 | for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) { | 1076 | for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) { |
1077 | if (XFS_LIC_ARE_ALL_FREE(licp)) { | 1077 | if (xfs_lic_are_all_free(licp)) { |
1078 | ASSERT(licp == &tp->t_items); | 1078 | ASSERT(licp == &tp->t_items); |
1079 | ASSERT(licp->lic_next == NULL); | 1079 | ASSERT(licp->lic_next == NULL); |
1080 | return NULL; | 1080 | return NULL; |
@@ -1083,11 +1083,11 @@ xfs_trans_buf_item_match_all( | |||
1083 | /* | 1083 | /* |
1084 | * Skip unoccupied slots. | 1084 | * Skip unoccupied slots. |
1085 | */ | 1085 | */ |
1086 | if (XFS_LIC_ISFREE(licp, i)) { | 1086 | if (xfs_lic_isfree(licp, i)) { |
1087 | continue; | 1087 | continue; |
1088 | } | 1088 | } |
1089 | 1089 | ||
1090 | lidp = XFS_LIC_SLOT(licp, i); | 1090 | lidp = xfs_lic_slot(licp, i); |
1091 | blip = (xfs_buf_log_item_t *)lidp->lid_item; | 1091 | blip = (xfs_buf_log_item_t *)lidp->lid_item; |
1092 | if (blip->bli_item.li_type != XFS_LI_BUF) { | 1092 | if (blip->bli_item.li_type != XFS_LI_BUF) { |
1093 | continue; | 1093 | continue; |
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c index db5c83595526..3c666e8317f8 100644 --- a/fs/xfs/xfs_trans_item.c +++ b/fs/xfs/xfs_trans_item.c | |||
@@ -53,11 +53,11 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip) | |||
53 | * Initialize the chunk, and then | 53 | * Initialize the chunk, and then |
54 | * claim the first slot in the newly allocated chunk. | 54 | * claim the first slot in the newly allocated chunk. |
55 | */ | 55 | */ |
56 | XFS_LIC_INIT(licp); | 56 | xfs_lic_init(licp); |
57 | XFS_LIC_CLAIM(licp, 0); | 57 | xfs_lic_claim(licp, 0); |
58 | licp->lic_unused = 1; | 58 | licp->lic_unused = 1; |
59 | XFS_LIC_INIT_SLOT(licp, 0); | 59 | xfs_lic_init_slot(licp, 0); |
60 | lidp = XFS_LIC_SLOT(licp, 0); | 60 | lidp = xfs_lic_slot(licp, 0); |
61 | 61 | ||
62 | /* | 62 | /* |
63 | * Link in the new chunk and update the free count. | 63 | * Link in the new chunk and update the free count. |
@@ -88,14 +88,14 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip) | |||
88 | */ | 88 | */ |
89 | licp = &tp->t_items; | 89 | licp = &tp->t_items; |
90 | while (licp != NULL) { | 90 | while (licp != NULL) { |
91 | if (XFS_LIC_VACANCY(licp)) { | 91 | if (xfs_lic_vacancy(licp)) { |
92 | if (licp->lic_unused <= XFS_LIC_MAX_SLOT) { | 92 | if (licp->lic_unused <= XFS_LIC_MAX_SLOT) { |
93 | i = licp->lic_unused; | 93 | i = licp->lic_unused; |
94 | ASSERT(XFS_LIC_ISFREE(licp, i)); | 94 | ASSERT(xfs_lic_isfree(licp, i)); |
95 | break; | 95 | break; |
96 | } | 96 | } |
97 | for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) { | 97 | for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) { |
98 | if (XFS_LIC_ISFREE(licp, i)) | 98 | if (xfs_lic_isfree(licp, i)) |
99 | break; | 99 | break; |
100 | } | 100 | } |
101 | ASSERT(i <= XFS_LIC_MAX_SLOT); | 101 | ASSERT(i <= XFS_LIC_MAX_SLOT); |
@@ -108,12 +108,12 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip) | |||
108 | * If we find a free descriptor, claim it, | 108 | * If we find a free descriptor, claim it, |
109 | * initialize it, and return it. | 109 | * initialize it, and return it. |
110 | */ | 110 | */ |
111 | XFS_LIC_CLAIM(licp, i); | 111 | xfs_lic_claim(licp, i); |
112 | if (licp->lic_unused <= i) { | 112 | if (licp->lic_unused <= i) { |
113 | licp->lic_unused = i + 1; | 113 | licp->lic_unused = i + 1; |
114 | XFS_LIC_INIT_SLOT(licp, i); | 114 | xfs_lic_init_slot(licp, i); |
115 | } | 115 | } |
116 | lidp = XFS_LIC_SLOT(licp, i); | 116 | lidp = xfs_lic_slot(licp, i); |
117 | tp->t_items_free--; | 117 | tp->t_items_free--; |
118 | lidp->lid_item = lip; | 118 | lidp->lid_item = lip; |
119 | lidp->lid_flags = 0; | 119 | lidp->lid_flags = 0; |
@@ -136,9 +136,9 @@ xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp) | |||
136 | xfs_log_item_chunk_t *licp; | 136 | xfs_log_item_chunk_t *licp; |
137 | xfs_log_item_chunk_t **licpp; | 137 | xfs_log_item_chunk_t **licpp; |
138 | 138 | ||
139 | slot = XFS_LIC_DESC_TO_SLOT(lidp); | 139 | slot = xfs_lic_desc_to_slot(lidp); |
140 | licp = XFS_LIC_DESC_TO_CHUNK(lidp); | 140 | licp = xfs_lic_desc_to_chunk(lidp); |
141 | XFS_LIC_RELSE(licp, slot); | 141 | xfs_lic_relse(licp, slot); |
142 | lidp->lid_item->li_desc = NULL; | 142 | lidp->lid_item->li_desc = NULL; |
143 | tp->t_items_free++; | 143 | tp->t_items_free++; |
144 | 144 | ||
@@ -154,7 +154,7 @@ xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp) | |||
154 | * Also decrement the transaction structure's count of free items | 154 | * Also decrement the transaction structure's count of free items |
155 | * by the number in a chunk since we are freeing an empty chunk. | 155 | * by the number in a chunk since we are freeing an empty chunk. |
156 | */ | 156 | */ |
157 | if (XFS_LIC_ARE_ALL_FREE(licp) && (licp != &(tp->t_items))) { | 157 | if (xfs_lic_are_all_free(licp) && (licp != &(tp->t_items))) { |
158 | licpp = &(tp->t_items.lic_next); | 158 | licpp = &(tp->t_items.lic_next); |
159 | while (*licpp != licp) { | 159 | while (*licpp != licp) { |
160 | ASSERT(*licpp != NULL); | 160 | ASSERT(*licpp != NULL); |
@@ -207,20 +207,20 @@ xfs_trans_first_item(xfs_trans_t *tp) | |||
207 | /* | 207 | /* |
208 | * If it's not in the first chunk, skip to the second. | 208 | * If it's not in the first chunk, skip to the second. |
209 | */ | 209 | */ |
210 | if (XFS_LIC_ARE_ALL_FREE(licp)) { | 210 | if (xfs_lic_are_all_free(licp)) { |
211 | licp = licp->lic_next; | 211 | licp = licp->lic_next; |
212 | } | 212 | } |
213 | 213 | ||
214 | /* | 214 | /* |
215 | * Return the first non-free descriptor in the chunk. | 215 | * Return the first non-free descriptor in the chunk. |
216 | */ | 216 | */ |
217 | ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); | 217 | ASSERT(!xfs_lic_are_all_free(licp)); |
218 | for (i = 0; i < licp->lic_unused; i++) { | 218 | for (i = 0; i < licp->lic_unused; i++) { |
219 | if (XFS_LIC_ISFREE(licp, i)) { | 219 | if (xfs_lic_isfree(licp, i)) { |
220 | continue; | 220 | continue; |
221 | } | 221 | } |
222 | 222 | ||
223 | return XFS_LIC_SLOT(licp, i); | 223 | return xfs_lic_slot(licp, i); |
224 | } | 224 | } |
225 | cmn_err(CE_WARN, "xfs_trans_first_item() -- no first item"); | 225 | cmn_err(CE_WARN, "xfs_trans_first_item() -- no first item"); |
226 | return NULL; | 226 | return NULL; |
@@ -242,18 +242,18 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp) | |||
242 | xfs_log_item_chunk_t *licp; | 242 | xfs_log_item_chunk_t *licp; |
243 | int i; | 243 | int i; |
244 | 244 | ||
245 | licp = XFS_LIC_DESC_TO_CHUNK(lidp); | 245 | licp = xfs_lic_desc_to_chunk(lidp); |
246 | 246 | ||
247 | /* | 247 | /* |
248 | * First search the rest of the chunk. The for loop keeps us | 248 | * First search the rest of the chunk. The for loop keeps us |
249 | * from referencing things beyond the end of the chunk. | 249 | * from referencing things beyond the end of the chunk. |
250 | */ | 250 | */ |
251 | for (i = (int)XFS_LIC_DESC_TO_SLOT(lidp) + 1; i < licp->lic_unused; i++) { | 251 | for (i = (int)xfs_lic_desc_to_slot(lidp) + 1; i < licp->lic_unused; i++) { |
252 | if (XFS_LIC_ISFREE(licp, i)) { | 252 | if (xfs_lic_isfree(licp, i)) { |
253 | continue; | 253 | continue; |
254 | } | 254 | } |
255 | 255 | ||
256 | return XFS_LIC_SLOT(licp, i); | 256 | return xfs_lic_slot(licp, i); |
257 | } | 257 | } |
258 | 258 | ||
259 | /* | 259 | /* |
@@ -266,13 +266,13 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp) | |||
266 | } | 266 | } |
267 | 267 | ||
268 | licp = licp->lic_next; | 268 | licp = licp->lic_next; |
269 | ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); | 269 | ASSERT(!xfs_lic_are_all_free(licp)); |
270 | for (i = 0; i < licp->lic_unused; i++) { | 270 | for (i = 0; i < licp->lic_unused; i++) { |
271 | if (XFS_LIC_ISFREE(licp, i)) { | 271 | if (xfs_lic_isfree(licp, i)) { |
272 | continue; | 272 | continue; |
273 | } | 273 | } |
274 | 274 | ||
275 | return XFS_LIC_SLOT(licp, i); | 275 | return xfs_lic_slot(licp, i); |
276 | } | 276 | } |
277 | ASSERT(0); | 277 | ASSERT(0); |
278 | /* NOTREACHED */ | 278 | /* NOTREACHED */ |
@@ -300,9 +300,9 @@ xfs_trans_free_items( | |||
300 | /* | 300 | /* |
301 | * Special case the embedded chunk so we don't free it below. | 301 | * Special case the embedded chunk so we don't free it below. |
302 | */ | 302 | */ |
303 | if (!XFS_LIC_ARE_ALL_FREE(licp)) { | 303 | if (!xfs_lic_are_all_free(licp)) { |
304 | (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); | 304 | (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); |
305 | XFS_LIC_ALL_FREE(licp); | 305 | xfs_lic_all_free(licp); |
306 | licp->lic_unused = 0; | 306 | licp->lic_unused = 0; |
307 | } | 307 | } |
308 | licp = licp->lic_next; | 308 | licp = licp->lic_next; |
@@ -311,7 +311,7 @@ xfs_trans_free_items( | |||
311 | * Unlock each item in each chunk and free the chunks. | 311 | * Unlock each item in each chunk and free the chunks. |
312 | */ | 312 | */ |
313 | while (licp != NULL) { | 313 | while (licp != NULL) { |
314 | ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); | 314 | ASSERT(!xfs_lic_are_all_free(licp)); |
315 | (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); | 315 | (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); |
316 | next_licp = licp->lic_next; | 316 | next_licp = licp->lic_next; |
317 | kmem_free(licp); | 317 | kmem_free(licp); |
@@ -347,7 +347,7 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn) | |||
347 | /* | 347 | /* |
348 | * Special case the embedded chunk so we don't free. | 348 | * Special case the embedded chunk so we don't free. |
349 | */ | 349 | */ |
350 | if (!XFS_LIC_ARE_ALL_FREE(licp)) { | 350 | if (!xfs_lic_are_all_free(licp)) { |
351 | freed = xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn); | 351 | freed = xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn); |
352 | } | 352 | } |
353 | licpp = &(tp->t_items.lic_next); | 353 | licpp = &(tp->t_items.lic_next); |
@@ -358,10 +358,10 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn) | |||
358 | * and free empty chunks. | 358 | * and free empty chunks. |
359 | */ | 359 | */ |
360 | while (licp != NULL) { | 360 | while (licp != NULL) { |
361 | ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); | 361 | ASSERT(!xfs_lic_are_all_free(licp)); |
362 | freed += xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn); | 362 | freed += xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn); |
363 | next_licp = licp->lic_next; | 363 | next_licp = licp->lic_next; |
364 | if (XFS_LIC_ARE_ALL_FREE(licp)) { | 364 | if (xfs_lic_are_all_free(licp)) { |
365 | *licpp = next_licp; | 365 | *licpp = next_licp; |
366 | kmem_free(licp); | 366 | kmem_free(licp); |
367 | freed -= XFS_LIC_NUM_SLOTS; | 367 | freed -= XFS_LIC_NUM_SLOTS; |
@@ -402,7 +402,7 @@ xfs_trans_unlock_chunk( | |||
402 | freed = 0; | 402 | freed = 0; |
403 | lidp = licp->lic_descs; | 403 | lidp = licp->lic_descs; |
404 | for (i = 0; i < licp->lic_unused; i++, lidp++) { | 404 | for (i = 0; i < licp->lic_unused; i++, lidp++) { |
405 | if (XFS_LIC_ISFREE(licp, i)) { | 405 | if (xfs_lic_isfree(licp, i)) { |
406 | continue; | 406 | continue; |
407 | } | 407 | } |
408 | lip = lidp->lid_item; | 408 | lip = lidp->lid_item; |
@@ -421,7 +421,7 @@ xfs_trans_unlock_chunk( | |||
421 | */ | 421 | */ |
422 | if (!(freeing_chunk) && | 422 | if (!(freeing_chunk) && |
423 | (!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) { | 423 | (!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) { |
424 | XFS_LIC_RELSE(licp, i); | 424 | xfs_lic_relse(licp, i); |
425 | freed++; | 425 | freed++; |
426 | } | 426 | } |
427 | } | 427 | } |
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 98e5f110ba5f..35d4d414bcc2 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c | |||
@@ -237,7 +237,7 @@ xfs_droplink( | |||
237 | 237 | ||
238 | ASSERT (ip->i_d.di_nlink > 0); | 238 | ASSERT (ip->i_d.di_nlink > 0); |
239 | ip->i_d.di_nlink--; | 239 | ip->i_d.di_nlink--; |
240 | drop_nlink(ip->i_vnode); | 240 | drop_nlink(VFS_I(ip)); |
241 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 241 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
242 | 242 | ||
243 | error = 0; | 243 | error = 0; |
@@ -301,7 +301,7 @@ xfs_bumplink( | |||
301 | 301 | ||
302 | ASSERT(ip->i_d.di_nlink > 0); | 302 | ASSERT(ip->i_d.di_nlink > 0); |
303 | ip->i_d.di_nlink++; | 303 | ip->i_d.di_nlink++; |
304 | inc_nlink(ip->i_vnode); | 304 | inc_nlink(VFS_I(ip)); |
305 | if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) && | 305 | if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) && |
306 | (ip->i_d.di_nlink > XFS_MAXLINK_1)) { | 306 | (ip->i_d.di_nlink > XFS_MAXLINK_1)) { |
307 | /* | 307 | /* |
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h index f316cb85d8e2..ef321225d269 100644 --- a/fs/xfs/xfs_utils.h +++ b/fs/xfs/xfs_utils.h | |||
@@ -18,9 +18,6 @@ | |||
18 | #ifndef __XFS_UTILS_H__ | 18 | #ifndef __XFS_UTILS_H__ |
19 | #define __XFS_UTILS_H__ | 19 | #define __XFS_UTILS_H__ |
20 | 20 | ||
21 | #define IRELE(ip) VN_RELE(XFS_ITOV(ip)) | ||
22 | #define IHOLD(ip) VN_HOLD(XFS_ITOV(ip)) | ||
23 | |||
24 | extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *); | 21 | extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *); |
25 | extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, | 22 | extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, |
26 | xfs_dev_t, cred_t *, prid_t, int, | 23 | xfs_dev_t, cred_t *, prid_t, int, |
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 4a9a43315a86..439dd3939dda 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c | |||
@@ -128,7 +128,6 @@ xfs_unmount_flush( | |||
128 | xfs_inode_t *rip = mp->m_rootip; | 128 | xfs_inode_t *rip = mp->m_rootip; |
129 | xfs_inode_t *rbmip; | 129 | xfs_inode_t *rbmip; |
130 | xfs_inode_t *rsumip = NULL; | 130 | xfs_inode_t *rsumip = NULL; |
131 | bhv_vnode_t *rvp = XFS_ITOV(rip); | ||
132 | int error; | 131 | int error; |
133 | 132 | ||
134 | xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); | 133 | xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); |
@@ -146,7 +145,7 @@ xfs_unmount_flush( | |||
146 | if (error == EFSCORRUPTED) | 145 | if (error == EFSCORRUPTED) |
147 | goto fscorrupt_out; | 146 | goto fscorrupt_out; |
148 | 147 | ||
149 | ASSERT(vn_count(XFS_ITOV(rbmip)) == 1); | 148 | ASSERT(vn_count(VFS_I(rbmip)) == 1); |
150 | 149 | ||
151 | rsumip = mp->m_rsumip; | 150 | rsumip = mp->m_rsumip; |
152 | xfs_ilock(rsumip, XFS_ILOCK_EXCL); | 151 | xfs_ilock(rsumip, XFS_ILOCK_EXCL); |
@@ -157,7 +156,7 @@ xfs_unmount_flush( | |||
157 | if (error == EFSCORRUPTED) | 156 | if (error == EFSCORRUPTED) |
158 | goto fscorrupt_out; | 157 | goto fscorrupt_out; |
159 | 158 | ||
160 | ASSERT(vn_count(XFS_ITOV(rsumip)) == 1); | 159 | ASSERT(vn_count(VFS_I(rsumip)) == 1); |
161 | } | 160 | } |
162 | 161 | ||
163 | /* | 162 | /* |
@@ -167,7 +166,7 @@ xfs_unmount_flush( | |||
167 | if (error == EFSCORRUPTED) | 166 | if (error == EFSCORRUPTED) |
168 | goto fscorrupt_out2; | 167 | goto fscorrupt_out2; |
169 | 168 | ||
170 | if (vn_count(rvp) != 1 && !relocation) { | 169 | if (vn_count(VFS_I(rip)) != 1 && !relocation) { |
171 | xfs_iunlock(rip, XFS_ILOCK_EXCL); | 170 | xfs_iunlock(rip, XFS_ILOCK_EXCL); |
172 | return XFS_ERROR(EBUSY); | 171 | return XFS_ERROR(EBUSY); |
173 | } | 172 | } |
@@ -284,7 +283,7 @@ xfs_sync_inodes( | |||
284 | int *bypassed) | 283 | int *bypassed) |
285 | { | 284 | { |
286 | xfs_inode_t *ip = NULL; | 285 | xfs_inode_t *ip = NULL; |
287 | bhv_vnode_t *vp = NULL; | 286 | struct inode *vp = NULL; |
288 | int error; | 287 | int error; |
289 | int last_error; | 288 | int last_error; |
290 | uint64_t fflag; | 289 | uint64_t fflag; |
@@ -404,7 +403,7 @@ xfs_sync_inodes( | |||
404 | continue; | 403 | continue; |
405 | } | 404 | } |
406 | 405 | ||
407 | vp = XFS_ITOV_NULL(ip); | 406 | vp = VFS_I(ip); |
408 | 407 | ||
409 | /* | 408 | /* |
410 | * If the vnode is gone then this is being torn down, | 409 | * If the vnode is gone then this is being torn down, |
@@ -479,7 +478,7 @@ xfs_sync_inodes( | |||
479 | IPOINTER_INSERT(ip, mp); | 478 | IPOINTER_INSERT(ip, mp); |
480 | xfs_ilock(ip, lock_flags); | 479 | xfs_ilock(ip, lock_flags); |
481 | 480 | ||
482 | ASSERT(vp == XFS_ITOV(ip)); | 481 | ASSERT(vp == VFS_I(ip)); |
483 | ASSERT(ip->i_mount == mp); | 482 | ASSERT(ip->i_mount == mp); |
484 | 483 | ||
485 | vnode_refed = B_TRUE; | 484 | vnode_refed = B_TRUE; |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 76a1166af822..8b6812f66a15 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -83,7 +83,7 @@ xfs_setattr( | |||
83 | cred_t *credp) | 83 | cred_t *credp) |
84 | { | 84 | { |
85 | xfs_mount_t *mp = ip->i_mount; | 85 | xfs_mount_t *mp = ip->i_mount; |
86 | struct inode *inode = XFS_ITOV(ip); | 86 | struct inode *inode = VFS_I(ip); |
87 | int mask = iattr->ia_valid; | 87 | int mask = iattr->ia_valid; |
88 | xfs_trans_t *tp; | 88 | xfs_trans_t *tp; |
89 | int code; | 89 | int code; |
@@ -182,7 +182,7 @@ xfs_setattr( | |||
182 | xfs_ilock(ip, lock_flags); | 182 | xfs_ilock(ip, lock_flags); |
183 | 183 | ||
184 | /* boolean: are we the file owner? */ | 184 | /* boolean: are we the file owner? */ |
185 | file_owner = (current_fsuid(credp) == ip->i_d.di_uid); | 185 | file_owner = (current_fsuid() == ip->i_d.di_uid); |
186 | 186 | ||
187 | /* | 187 | /* |
188 | * Change various properties of a file. | 188 | * Change various properties of a file. |
@@ -513,7 +513,6 @@ xfs_setattr( | |||
513 | ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; | 513 | ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; |
514 | ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; | 514 | ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; |
515 | ip->i_update_core = 1; | 515 | ip->i_update_core = 1; |
516 | timeflags &= ~XFS_ICHGTIME_ACC; | ||
517 | } | 516 | } |
518 | if (mask & ATTR_MTIME) { | 517 | if (mask & ATTR_MTIME) { |
519 | inode->i_mtime = iattr->ia_mtime; | 518 | inode->i_mtime = iattr->ia_mtime; |
@@ -714,7 +713,7 @@ xfs_fsync( | |||
714 | return XFS_ERROR(EIO); | 713 | return XFS_ERROR(EIO); |
715 | 714 | ||
716 | /* capture size updates in I/O completion before writing the inode. */ | 715 | /* capture size updates in I/O completion before writing the inode. */ |
717 | error = filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); | 716 | error = filemap_fdatawait(VFS_I(ip)->i_mapping); |
718 | if (error) | 717 | if (error) |
719 | return XFS_ERROR(error); | 718 | return XFS_ERROR(error); |
720 | 719 | ||
@@ -1160,7 +1159,6 @@ int | |||
1160 | xfs_release( | 1159 | xfs_release( |
1161 | xfs_inode_t *ip) | 1160 | xfs_inode_t *ip) |
1162 | { | 1161 | { |
1163 | bhv_vnode_t *vp = XFS_ITOV(ip); | ||
1164 | xfs_mount_t *mp = ip->i_mount; | 1162 | xfs_mount_t *mp = ip->i_mount; |
1165 | int error; | 1163 | int error; |
1166 | 1164 | ||
@@ -1195,13 +1193,13 @@ xfs_release( | |||
1195 | * be exposed to that problem. | 1193 | * be exposed to that problem. |
1196 | */ | 1194 | */ |
1197 | truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); | 1195 | truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); |
1198 | if (truncated && VN_DIRTY(vp) && ip->i_delayed_blks > 0) | 1196 | if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) |
1199 | xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); | 1197 | xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); |
1200 | } | 1198 | } |
1201 | 1199 | ||
1202 | if (ip->i_d.di_nlink != 0) { | 1200 | if (ip->i_d.di_nlink != 0) { |
1203 | if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && | 1201 | if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && |
1204 | ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || | 1202 | ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || |
1205 | ip->i_delayed_blks > 0)) && | 1203 | ip->i_delayed_blks > 0)) && |
1206 | (ip->i_df.if_flags & XFS_IFEXTENTS)) && | 1204 | (ip->i_df.if_flags & XFS_IFEXTENTS)) && |
1207 | (!(ip->i_d.di_flags & | 1205 | (!(ip->i_d.di_flags & |
@@ -1227,7 +1225,6 @@ int | |||
1227 | xfs_inactive( | 1225 | xfs_inactive( |
1228 | xfs_inode_t *ip) | 1226 | xfs_inode_t *ip) |
1229 | { | 1227 | { |
1230 | bhv_vnode_t *vp = XFS_ITOV(ip); | ||
1231 | xfs_bmap_free_t free_list; | 1228 | xfs_bmap_free_t free_list; |
1232 | xfs_fsblock_t first_block; | 1229 | xfs_fsblock_t first_block; |
1233 | int committed; | 1230 | int committed; |
@@ -1242,7 +1239,7 @@ xfs_inactive( | |||
1242 | * If the inode is already free, then there can be nothing | 1239 | * If the inode is already free, then there can be nothing |
1243 | * to clean up here. | 1240 | * to clean up here. |
1244 | */ | 1241 | */ |
1245 | if (ip->i_d.di_mode == 0 || VN_BAD(vp)) { | 1242 | if (ip->i_d.di_mode == 0 || VN_BAD(VFS_I(ip))) { |
1246 | ASSERT(ip->i_df.if_real_bytes == 0); | 1243 | ASSERT(ip->i_df.if_real_bytes == 0); |
1247 | ASSERT(ip->i_df.if_broot_bytes == 0); | 1244 | ASSERT(ip->i_df.if_broot_bytes == 0); |
1248 | return VN_INACTIVE_CACHE; | 1245 | return VN_INACTIVE_CACHE; |
@@ -1272,7 +1269,7 @@ xfs_inactive( | |||
1272 | 1269 | ||
1273 | if (ip->i_d.di_nlink != 0) { | 1270 | if (ip->i_d.di_nlink != 0) { |
1274 | if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && | 1271 | if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && |
1275 | ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || | 1272 | ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || |
1276 | ip->i_delayed_blks > 0)) && | 1273 | ip->i_delayed_blks > 0)) && |
1277 | (ip->i_df.if_flags & XFS_IFEXTENTS) && | 1274 | (ip->i_df.if_flags & XFS_IFEXTENTS) && |
1278 | (!(ip->i_d.di_flags & | 1275 | (!(ip->i_d.di_flags & |
@@ -1536,7 +1533,7 @@ xfs_create( | |||
1536 | * Make sure that we have allocated dquot(s) on disk. | 1533 | * Make sure that we have allocated dquot(s) on disk. |
1537 | */ | 1534 | */ |
1538 | error = XFS_QM_DQVOPALLOC(mp, dp, | 1535 | error = XFS_QM_DQVOPALLOC(mp, dp, |
1539 | current_fsuid(credp), current_fsgid(credp), prid, | 1536 | current_fsuid(), current_fsgid(), prid, |
1540 | XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp); | 1537 | XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp); |
1541 | if (error) | 1538 | if (error) |
1542 | goto std_return; | 1539 | goto std_return; |
@@ -1708,111 +1705,6 @@ std_return: | |||
1708 | } | 1705 | } |
1709 | 1706 | ||
1710 | #ifdef DEBUG | 1707 | #ifdef DEBUG |
1711 | /* | ||
1712 | * Some counters to see if (and how often) we are hitting some deadlock | ||
1713 | * prevention code paths. | ||
1714 | */ | ||
1715 | |||
1716 | int xfs_rm_locks; | ||
1717 | int xfs_rm_lock_delays; | ||
1718 | int xfs_rm_attempts; | ||
1719 | #endif | ||
1720 | |||
1721 | /* | ||
1722 | * The following routine will lock the inodes associated with the | ||
1723 | * directory and the named entry in the directory. The locks are | ||
1724 | * acquired in increasing inode number. | ||
1725 | * | ||
1726 | * If the entry is "..", then only the directory is locked. The | ||
1727 | * vnode ref count will still include that from the .. entry in | ||
1728 | * this case. | ||
1729 | * | ||
1730 | * There is a deadlock we need to worry about. If the locked directory is | ||
1731 | * in the AIL, it might be blocking up the log. The next inode we lock | ||
1732 | * could be already locked by another thread waiting for log space (e.g | ||
1733 | * a permanent log reservation with a long running transaction (see | ||
1734 | * xfs_itruncate_finish)). To solve this, we must check if the directory | ||
1735 | * is in the ail and use lock_nowait. If we can't lock, we need to | ||
1736 | * drop the inode lock on the directory and try again. xfs_iunlock will | ||
1737 | * potentially push the tail if we were holding up the log. | ||
1738 | */ | ||
1739 | STATIC int | ||
1740 | xfs_lock_dir_and_entry( | ||
1741 | xfs_inode_t *dp, | ||
1742 | xfs_inode_t *ip) /* inode of entry 'name' */ | ||
1743 | { | ||
1744 | int attempts; | ||
1745 | xfs_ino_t e_inum; | ||
1746 | xfs_inode_t *ips[2]; | ||
1747 | xfs_log_item_t *lp; | ||
1748 | |||
1749 | #ifdef DEBUG | ||
1750 | xfs_rm_locks++; | ||
1751 | #endif | ||
1752 | attempts = 0; | ||
1753 | |||
1754 | again: | ||
1755 | xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); | ||
1756 | |||
1757 | e_inum = ip->i_ino; | ||
1758 | |||
1759 | xfs_itrace_ref(ip); | ||
1760 | |||
1761 | /* | ||
1762 | * We want to lock in increasing inum. Since we've already | ||
1763 | * acquired the lock on the directory, we may need to release | ||
1764 | * if if the inum of the entry turns out to be less. | ||
1765 | */ | ||
1766 | if (e_inum > dp->i_ino) { | ||
1767 | /* | ||
1768 | * We are already in the right order, so just | ||
1769 | * lock on the inode of the entry. | ||
1770 | * We need to use nowait if dp is in the AIL. | ||
1771 | */ | ||
1772 | |||
1773 | lp = (xfs_log_item_t *)dp->i_itemp; | ||
1774 | if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { | ||
1775 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { | ||
1776 | attempts++; | ||
1777 | #ifdef DEBUG | ||
1778 | xfs_rm_attempts++; | ||
1779 | #endif | ||
1780 | |||
1781 | /* | ||
1782 | * Unlock dp and try again. | ||
1783 | * xfs_iunlock will try to push the tail | ||
1784 | * if the inode is in the AIL. | ||
1785 | */ | ||
1786 | |||
1787 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | ||
1788 | |||
1789 | if ((attempts % 5) == 0) { | ||
1790 | delay(1); /* Don't just spin the CPU */ | ||
1791 | #ifdef DEBUG | ||
1792 | xfs_rm_lock_delays++; | ||
1793 | #endif | ||
1794 | } | ||
1795 | goto again; | ||
1796 | } | ||
1797 | } else { | ||
1798 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
1799 | } | ||
1800 | } else if (e_inum < dp->i_ino) { | ||
1801 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | ||
1802 | |||
1803 | ips[0] = ip; | ||
1804 | ips[1] = dp; | ||
1805 | xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); | ||
1806 | } | ||
1807 | /* else e_inum == dp->i_ino */ | ||
1808 | /* This can happen if we're asked to lock /x/.. | ||
1809 | * the entry is "..", which is also the parent directory. | ||
1810 | */ | ||
1811 | |||
1812 | return 0; | ||
1813 | } | ||
1814 | |||
1815 | #ifdef DEBUG | ||
1816 | int xfs_locked_n; | 1708 | int xfs_locked_n; |
1817 | int xfs_small_retries; | 1709 | int xfs_small_retries; |
1818 | int xfs_middle_retries; | 1710 | int xfs_middle_retries; |
@@ -1946,6 +1838,53 @@ again: | |||
1946 | #endif | 1838 | #endif |
1947 | } | 1839 | } |
1948 | 1840 | ||
1841 | /* | ||
1842 | * xfs_lock_two_inodes() can only be used to lock one type of lock | ||
1843 | * at a time - the iolock or the ilock, but not both at once. If | ||
1844 | * we lock both at once, lockdep will report false positives saying | ||
1845 | * we have violated locking orders. | ||
1846 | */ | ||
1847 | void | ||
1848 | xfs_lock_two_inodes( | ||
1849 | xfs_inode_t *ip0, | ||
1850 | xfs_inode_t *ip1, | ||
1851 | uint lock_mode) | ||
1852 | { | ||
1853 | xfs_inode_t *temp; | ||
1854 | int attempts = 0; | ||
1855 | xfs_log_item_t *lp; | ||
1856 | |||
1857 | if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) | ||
1858 | ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); | ||
1859 | ASSERT(ip0->i_ino != ip1->i_ino); | ||
1860 | |||
1861 | if (ip0->i_ino > ip1->i_ino) { | ||
1862 | temp = ip0; | ||
1863 | ip0 = ip1; | ||
1864 | ip1 = temp; | ||
1865 | } | ||
1866 | |||
1867 | again: | ||
1868 | xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0)); | ||
1869 | |||
1870 | /* | ||
1871 | * If the first lock we have locked is in the AIL, we must TRY to get | ||
1872 | * the second lock. If we can't get it, we must release the first one | ||
1873 | * and try again. | ||
1874 | */ | ||
1875 | lp = (xfs_log_item_t *)ip0->i_itemp; | ||
1876 | if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { | ||
1877 | if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) { | ||
1878 | xfs_iunlock(ip0, lock_mode); | ||
1879 | if ((++attempts % 5) == 0) | ||
1880 | delay(1); /* Don't just spin the CPU */ | ||
1881 | goto again; | ||
1882 | } | ||
1883 | } else { | ||
1884 | xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1)); | ||
1885 | } | ||
1886 | } | ||
1887 | |||
1949 | int | 1888 | int |
1950 | xfs_remove( | 1889 | xfs_remove( |
1951 | xfs_inode_t *dp, | 1890 | xfs_inode_t *dp, |
@@ -2018,9 +1957,7 @@ xfs_remove( | |||
2018 | goto out_trans_cancel; | 1957 | goto out_trans_cancel; |
2019 | } | 1958 | } |
2020 | 1959 | ||
2021 | error = xfs_lock_dir_and_entry(dp, ip); | 1960 | xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); |
2022 | if (error) | ||
2023 | goto out_trans_cancel; | ||
2024 | 1961 | ||
2025 | /* | 1962 | /* |
2026 | * At this point, we've gotten both the directory and the entry | 1963 | * At this point, we've gotten both the directory and the entry |
@@ -2047,9 +1984,6 @@ xfs_remove( | |||
2047 | } | 1984 | } |
2048 | } | 1985 | } |
2049 | 1986 | ||
2050 | /* | ||
2051 | * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. | ||
2052 | */ | ||
2053 | XFS_BMAP_INIT(&free_list, &first_block); | 1987 | XFS_BMAP_INIT(&free_list, &first_block); |
2054 | error = xfs_dir_removename(tp, dp, name, ip->i_ino, | 1988 | error = xfs_dir_removename(tp, dp, name, ip->i_ino, |
2055 | &first_block, &free_list, resblks); | 1989 | &first_block, &free_list, resblks); |
@@ -2155,7 +2089,6 @@ xfs_link( | |||
2155 | { | 2089 | { |
2156 | xfs_mount_t *mp = tdp->i_mount; | 2090 | xfs_mount_t *mp = tdp->i_mount; |
2157 | xfs_trans_t *tp; | 2091 | xfs_trans_t *tp; |
2158 | xfs_inode_t *ips[2]; | ||
2159 | int error; | 2092 | int error; |
2160 | xfs_bmap_free_t free_list; | 2093 | xfs_bmap_free_t free_list; |
2161 | xfs_fsblock_t first_block; | 2094 | xfs_fsblock_t first_block; |
@@ -2203,15 +2136,7 @@ xfs_link( | |||
2203 | goto error_return; | 2136 | goto error_return; |
2204 | } | 2137 | } |
2205 | 2138 | ||
2206 | if (sip->i_ino < tdp->i_ino) { | 2139 | xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); |
2207 | ips[0] = sip; | ||
2208 | ips[1] = tdp; | ||
2209 | } else { | ||
2210 | ips[0] = tdp; | ||
2211 | ips[1] = sip; | ||
2212 | } | ||
2213 | |||
2214 | xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); | ||
2215 | 2140 | ||
2216 | /* | 2141 | /* |
2217 | * Increment vnode ref counts since xfs_trans_commit & | 2142 | * Increment vnode ref counts since xfs_trans_commit & |
@@ -2352,7 +2277,7 @@ xfs_mkdir( | |||
2352 | * Make sure that we have allocated dquot(s) on disk. | 2277 | * Make sure that we have allocated dquot(s) on disk. |
2353 | */ | 2278 | */ |
2354 | error = XFS_QM_DQVOPALLOC(mp, dp, | 2279 | error = XFS_QM_DQVOPALLOC(mp, dp, |
2355 | current_fsuid(credp), current_fsgid(credp), prid, | 2280 | current_fsuid(), current_fsgid(), prid, |
2356 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); | 2281 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); |
2357 | if (error) | 2282 | if (error) |
2358 | goto std_return; | 2283 | goto std_return; |
@@ -2578,7 +2503,7 @@ xfs_symlink( | |||
2578 | * Make sure that we have allocated dquot(s) on disk. | 2503 | * Make sure that we have allocated dquot(s) on disk. |
2579 | */ | 2504 | */ |
2580 | error = XFS_QM_DQVOPALLOC(mp, dp, | 2505 | error = XFS_QM_DQVOPALLOC(mp, dp, |
2581 | current_fsuid(credp), current_fsgid(credp), prid, | 2506 | current_fsuid(), current_fsgid(), prid, |
2582 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); | 2507 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); |
2583 | if (error) | 2508 | if (error) |
2584 | goto std_return; | 2509 | goto std_return; |
@@ -2873,14 +2798,13 @@ int | |||
2873 | xfs_reclaim( | 2798 | xfs_reclaim( |
2874 | xfs_inode_t *ip) | 2799 | xfs_inode_t *ip) |
2875 | { | 2800 | { |
2876 | bhv_vnode_t *vp = XFS_ITOV(ip); | ||
2877 | 2801 | ||
2878 | xfs_itrace_entry(ip); | 2802 | xfs_itrace_entry(ip); |
2879 | 2803 | ||
2880 | ASSERT(!VN_MAPPED(vp)); | 2804 | ASSERT(!VN_MAPPED(VFS_I(ip))); |
2881 | 2805 | ||
2882 | /* bad inode, get out here ASAP */ | 2806 | /* bad inode, get out here ASAP */ |
2883 | if (VN_BAD(vp)) { | 2807 | if (VN_BAD(VFS_I(ip))) { |
2884 | xfs_ireclaim(ip); | 2808 | xfs_ireclaim(ip); |
2885 | return 0; | 2809 | return 0; |
2886 | } | 2810 | } |
@@ -2917,7 +2841,7 @@ xfs_reclaim( | |||
2917 | XFS_MOUNT_ILOCK(mp); | 2841 | XFS_MOUNT_ILOCK(mp); |
2918 | spin_lock(&ip->i_flags_lock); | 2842 | spin_lock(&ip->i_flags_lock); |
2919 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); | 2843 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); |
2920 | vn_to_inode(vp)->i_private = NULL; | 2844 | VFS_I(ip)->i_private = NULL; |
2921 | ip->i_vnode = NULL; | 2845 | ip->i_vnode = NULL; |
2922 | spin_unlock(&ip->i_flags_lock); | 2846 | spin_unlock(&ip->i_flags_lock); |
2923 | list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); | 2847 | list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); |
@@ -2933,7 +2857,7 @@ xfs_finish_reclaim( | |||
2933 | int sync_mode) | 2857 | int sync_mode) |
2934 | { | 2858 | { |
2935 | xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); | 2859 | xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); |
2936 | bhv_vnode_t *vp = XFS_ITOV_NULL(ip); | 2860 | struct inode *vp = VFS_I(ip); |
2937 | 2861 | ||
2938 | if (vp && VN_BAD(vp)) | 2862 | if (vp && VN_BAD(vp)) |
2939 | goto reclaim; | 2863 | goto reclaim; |
@@ -3236,6 +3160,13 @@ error1: /* Just cancel transaction */ | |||
3236 | /* | 3160 | /* |
3237 | * Zero file bytes between startoff and endoff inclusive. | 3161 | * Zero file bytes between startoff and endoff inclusive. |
3238 | * The iolock is held exclusive and no blocks are buffered. | 3162 | * The iolock is held exclusive and no blocks are buffered. |
3163 | * | ||
3164 | * This function is used by xfs_free_file_space() to zero | ||
3165 | * partial blocks when the range to free is not block aligned. | ||
3166 | * When unreserving space with boundaries that are not block | ||
3167 | * aligned we round up the start and round down the end | ||
3168 | * boundaries and then use this function to zero the parts of | ||
3169 | * the blocks that got dropped during the rounding. | ||
3239 | */ | 3170 | */ |
3240 | STATIC int | 3171 | STATIC int |
3241 | xfs_zero_remaining_bytes( | 3172 | xfs_zero_remaining_bytes( |
@@ -3252,6 +3183,17 @@ xfs_zero_remaining_bytes( | |||
3252 | int nimap; | 3183 | int nimap; |
3253 | int error = 0; | 3184 | int error = 0; |
3254 | 3185 | ||
3186 | /* | ||
3187 | * Avoid doing I/O beyond eof - it's not necessary | ||
3188 | * since nothing can read beyond eof. The space will | ||
3189 | * be zeroed when the file is extended anyway. | ||
3190 | */ | ||
3191 | if (startoff >= ip->i_size) | ||
3192 | return 0; | ||
3193 | |||
3194 | if (endoff > ip->i_size) | ||
3195 | endoff = ip->i_size; | ||
3196 | |||
3255 | bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, | 3197 | bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, |
3256 | XFS_IS_REALTIME_INODE(ip) ? | 3198 | XFS_IS_REALTIME_INODE(ip) ? |
3257 | mp->m_rtdev_targp : mp->m_ddev_targp); | 3199 | mp->m_rtdev_targp : mp->m_ddev_targp); |
@@ -3321,7 +3263,6 @@ xfs_free_file_space( | |||
3321 | xfs_off_t len, | 3263 | xfs_off_t len, |
3322 | int attr_flags) | 3264 | int attr_flags) |
3323 | { | 3265 | { |
3324 | bhv_vnode_t *vp; | ||
3325 | int committed; | 3266 | int committed; |
3326 | int done; | 3267 | int done; |
3327 | xfs_off_t end_dmi_offset; | 3268 | xfs_off_t end_dmi_offset; |
@@ -3341,7 +3282,6 @@ xfs_free_file_space( | |||
3341 | xfs_trans_t *tp; | 3282 | xfs_trans_t *tp; |
3342 | int need_iolock = 1; | 3283 | int need_iolock = 1; |
3343 | 3284 | ||
3344 | vp = XFS_ITOV(ip); | ||
3345 | mp = ip->i_mount; | 3285 | mp = ip->i_mount; |
3346 | 3286 | ||
3347 | xfs_itrace_entry(ip); | 3287 | xfs_itrace_entry(ip); |
@@ -3378,7 +3318,7 @@ xfs_free_file_space( | |||
3378 | rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); | 3318 | rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); |
3379 | ioffset = offset & ~(rounding - 1); | 3319 | ioffset = offset & ~(rounding - 1); |
3380 | 3320 | ||
3381 | if (VN_CACHED(vp) != 0) { | 3321 | if (VN_CACHED(VFS_I(ip)) != 0) { |
3382 | xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1); | 3322 | xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1); |
3383 | error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED); | 3323 | error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED); |
3384 | if (error) | 3324 | if (error) |