diff options
Diffstat (limited to 'fs')
298 files changed, 15600 insertions, 7690 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 047c791427aa..c061c3f18e7c 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -55,7 +55,7 @@ enum { | |||
55 | Opt_err | 55 | Opt_err |
56 | }; | 56 | }; |
57 | 57 | ||
58 | static match_table_t tokens = { | 58 | static const match_table_t tokens = { |
59 | {Opt_debug, "debug=%x"}, | 59 | {Opt_debug, "debug=%x"}, |
60 | {Opt_dfltuid, "dfltuid=%u"}, | 60 | {Opt_dfltuid, "dfltuid=%u"}, |
61 | {Opt_dfltgid, "dfltgid=%u"}, | 61 | {Opt_dfltgid, "dfltgid=%u"}, |
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 88e3787c6ea9..e298fe194093 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c | |||
@@ -119,6 +119,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) | |||
119 | 119 | ||
120 | const struct file_operations v9fs_dir_operations = { | 120 | const struct file_operations v9fs_dir_operations = { |
121 | .read = generic_read_dir, | 121 | .read = generic_read_dir, |
122 | .llseek = generic_file_llseek, | ||
122 | .readdir = v9fs_dir_readdir, | 123 | .readdir = v9fs_dir_readdir, |
123 | .open = v9fs_file_open, | 124 | .open = v9fs_file_open, |
124 | .release = v9fs_dir_release, | 125 | .release = v9fs_dir_release, |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index c95295c65045..e83aa5ebe861 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -626,8 +626,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | |||
626 | return NULL; | 626 | return NULL; |
627 | 627 | ||
628 | error: | 628 | error: |
629 | if (fid) | 629 | p9_client_clunk(fid); |
630 | p9_client_clunk(fid); | ||
631 | 630 | ||
632 | return ERR_PTR(result); | 631 | return ERR_PTR(result); |
633 | } | 632 | } |
diff --git a/fs/Kconfig b/fs/Kconfig index d3873583360b..d0a1174fb516 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -136,37 +136,51 @@ config EXT3_FS_SECURITY | |||
136 | If you are not using a security module that requires using | 136 | If you are not using a security module that requires using |
137 | extended attributes for file security labels, say N. | 137 | extended attributes for file security labels, say N. |
138 | 138 | ||
139 | config EXT4DEV_FS | 139 | config EXT4_FS |
140 | tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)" | 140 | tristate "The Extended 4 (ext4) filesystem" |
141 | depends on EXPERIMENTAL | ||
142 | select JBD2 | 141 | select JBD2 |
143 | select CRC16 | 142 | select CRC16 |
144 | help | 143 | help |
145 | Ext4dev is a predecessor filesystem of the next generation | 144 | This is the next generation of the ext3 filesystem. |
146 | extended fs ext4, based on ext3 filesystem code. It will be | ||
147 | renamed ext4 fs later, once ext4dev is mature and stabilized. | ||
148 | 145 | ||
149 | Unlike the change from ext2 filesystem to ext3 filesystem, | 146 | Unlike the change from ext2 filesystem to ext3 filesystem, |
150 | the on-disk format of ext4dev is not the same as ext3 any more: | 147 | the on-disk format of ext4 is not forwards compatible with |
151 | it is based on extent maps and it supports 48-bit physical block | 148 | ext3; it is based on extent maps and it supports 48-bit |
152 | numbers. These combined on-disk format changes will allow | 149 | physical block numbers. The ext4 filesystem also supports delayed |
153 | ext4dev/ext4 to handle more than 16 TB filesystem volumes -- | 150 | allocation, persistent preallocation, high resolution time stamps, |
154 | a hard limit that ext3 cannot overcome without changing the | 151 | and a number of other features to improve performance and speed |
155 | on-disk format. | 152 | up fsck time. For more information, please see the web pages at |
156 | 153 | http://ext4.wiki.kernel.org. | |
157 | Other than extent maps and 48-bit block numbers, ext4dev also is | 154 | |
158 | likely to have other new features such as persistent preallocation, | 155 | The ext4 filesystem will support mounting an ext3 |
159 | high resolution time stamps, and larger file support etc. These | 156 | filesystem; while there will be some performance gains from |
160 | features will be added to ext4dev gradually. | 157 | the delayed allocation and inode table readahead, the best |
158 | performance gains will require enabling ext4 features in the | ||
159 | filesystem, or formating a new filesystem as an ext4 | ||
160 | filesystem initially. | ||
161 | 161 | ||
162 | To compile this file system support as a module, choose M here. The | 162 | To compile this file system support as a module, choose M here. The |
163 | module will be called ext4dev. | 163 | module will be called ext4. |
164 | 164 | ||
165 | If unsure, say N. | 165 | If unsure, say N. |
166 | 166 | ||
167 | config EXT4DEV_FS_XATTR | 167 | config EXT4DEV_COMPAT |
168 | bool "Ext4dev extended attributes" | 168 | bool "Enable ext4dev compatibility" |
169 | depends on EXT4DEV_FS | 169 | depends on EXT4_FS |
170 | help | ||
171 | Starting with 2.6.28, the name of the ext4 filesystem was | ||
172 | renamed from ext4dev to ext4. Unfortunately there are some | ||
173 | legacy userspace programs (such as klibc's fstype) have | ||
174 | "ext4dev" hardcoded. | ||
175 | |||
176 | To enable backwards compatibility so that systems that are | ||
177 | still expecting to mount ext4 filesystems using ext4dev, | ||
178 | chose Y here. This feature will go away by 2.6.31, so | ||
179 | please arrange to get your userspace programs fixed! | ||
180 | |||
181 | config EXT4_FS_XATTR | ||
182 | bool "Ext4 extended attributes" | ||
183 | depends on EXT4_FS | ||
170 | default y | 184 | default y |
171 | help | 185 | help |
172 | Extended attributes are name:value pairs associated with inodes by | 186 | Extended attributes are name:value pairs associated with inodes by |
@@ -175,11 +189,11 @@ config EXT4DEV_FS_XATTR | |||
175 | 189 | ||
176 | If unsure, say N. | 190 | If unsure, say N. |
177 | 191 | ||
178 | You need this for POSIX ACL support on ext4dev/ext4. | 192 | You need this for POSIX ACL support on ext4. |
179 | 193 | ||
180 | config EXT4DEV_FS_POSIX_ACL | 194 | config EXT4_FS_POSIX_ACL |
181 | bool "Ext4dev POSIX Access Control Lists" | 195 | bool "Ext4 POSIX Access Control Lists" |
182 | depends on EXT4DEV_FS_XATTR | 196 | depends on EXT4_FS_XATTR |
183 | select FS_POSIX_ACL | 197 | select FS_POSIX_ACL |
184 | help | 198 | help |
185 | POSIX Access Control Lists (ACLs) support permissions for users and | 199 | POSIX Access Control Lists (ACLs) support permissions for users and |
@@ -190,14 +204,14 @@ config EXT4DEV_FS_POSIX_ACL | |||
190 | 204 | ||
191 | If you don't know what Access Control Lists are, say N | 205 | If you don't know what Access Control Lists are, say N |
192 | 206 | ||
193 | config EXT4DEV_FS_SECURITY | 207 | config EXT4_FS_SECURITY |
194 | bool "Ext4dev Security Labels" | 208 | bool "Ext4 Security Labels" |
195 | depends on EXT4DEV_FS_XATTR | 209 | depends on EXT4_FS_XATTR |
196 | help | 210 | help |
197 | Security labels support alternative access control models | 211 | Security labels support alternative access control models |
198 | implemented by security modules like SELinux. This option | 212 | implemented by security modules like SELinux. This option |
199 | enables an extended attribute handler for file security | 213 | enables an extended attribute handler for file security |
200 | labels in the ext4dev/ext4 filesystem. | 214 | labels in the ext4 filesystem. |
201 | 215 | ||
202 | If you are not using a security module that requires using | 216 | If you are not using a security module that requires using |
203 | extended attributes for file security labels, say N. | 217 | extended attributes for file security labels, say N. |
@@ -206,17 +220,16 @@ config JBD | |||
206 | tristate | 220 | tristate |
207 | help | 221 | help |
208 | This is a generic journalling layer for block devices. It is | 222 | This is a generic journalling layer for block devices. It is |
209 | currently used by the ext3 and OCFS2 file systems, but it could | 223 | currently used by the ext3 file system, but it could also be |
210 | also be used to add journal support to other file systems or block | 224 | used to add journal support to other file systems or block |
211 | devices such as RAID or LVM. | 225 | devices such as RAID or LVM. |
212 | 226 | ||
213 | If you are using the ext3 or OCFS2 file systems, you need to | 227 | If you are using the ext3 file system, you need to say Y here. |
214 | say Y here. If you are not using ext3 OCFS2 then you will probably | 228 | If you are not using ext3 then you will probably want to say N. |
215 | want to say N. | ||
216 | 229 | ||
217 | To compile this device as a module, choose M here: the module will be | 230 | To compile this device as a module, choose M here: the module will be |
218 | called jbd. If you are compiling ext3 or OCFS2 into the kernel, | 231 | called jbd. If you are compiling ext3 into the kernel, you |
219 | you cannot compile this code as a module. | 232 | cannot compile this code as a module. |
220 | 233 | ||
221 | config JBD_DEBUG | 234 | config JBD_DEBUG |
222 | bool "JBD (ext3) debugging support" | 235 | bool "JBD (ext3) debugging support" |
@@ -240,22 +253,23 @@ config JBD2 | |||
240 | help | 253 | help |
241 | This is a generic journaling layer for block devices that support | 254 | This is a generic journaling layer for block devices that support |
242 | both 32-bit and 64-bit block numbers. It is currently used by | 255 | both 32-bit and 64-bit block numbers. It is currently used by |
243 | the ext4dev/ext4 filesystem, but it could also be used to add | 256 | the ext4 and OCFS2 filesystems, but it could also be used to add |
244 | journal support to other file systems or block devices such | 257 | journal support to other file systems or block devices such |
245 | as RAID or LVM. | 258 | as RAID or LVM. |
246 | 259 | ||
247 | If you are using ext4dev/ext4, you need to say Y here. If you are not | 260 | If you are using ext4 or OCFS2, you need to say Y here. |
248 | using ext4dev/ext4 then you will probably want to say N. | 261 | If you are not using ext4 or OCFS2 then you will |
262 | probably want to say N. | ||
249 | 263 | ||
250 | To compile this device as a module, choose M here. The module will be | 264 | To compile this device as a module, choose M here. The module will be |
251 | called jbd2. If you are compiling ext4dev/ext4 into the kernel, | 265 | called jbd2. If you are compiling ext4 or OCFS2 into the kernel, |
252 | you cannot compile this code as a module. | 266 | you cannot compile this code as a module. |
253 | 267 | ||
254 | config JBD2_DEBUG | 268 | config JBD2_DEBUG |
255 | bool "JBD2 (ext4dev/ext4) debugging support" | 269 | bool "JBD2 (ext4) debugging support" |
256 | depends on JBD2 && DEBUG_FS | 270 | depends on JBD2 && DEBUG_FS |
257 | help | 271 | help |
258 | If you are using the ext4dev/ext4 journaled file system (or | 272 | If you are using the ext4 journaled file system (or |
259 | potentially any other filesystem/device using JBD2), this option | 273 | potentially any other filesystem/device using JBD2), this option |
260 | allows you to enable debugging output while the system is running, | 274 | allows you to enable debugging output while the system is running, |
261 | in order to help track down any problems you are having. | 275 | in order to help track down any problems you are having. |
@@ -270,9 +284,9 @@ config JBD2_DEBUG | |||
270 | config FS_MBCACHE | 284 | config FS_MBCACHE |
271 | # Meta block cache for Extended Attributes (ext2/ext3/ext4) | 285 | # Meta block cache for Extended Attributes (ext2/ext3/ext4) |
272 | tristate | 286 | tristate |
273 | depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR | 287 | depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS_XATTR |
274 | default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y | 288 | default y if EXT2_FS=y || EXT3_FS=y || EXT4_FS=y |
275 | default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m | 289 | default m if EXT2_FS=m || EXT3_FS=m || EXT4_FS=m |
276 | 290 | ||
277 | config REISERFS_FS | 291 | config REISERFS_FS |
278 | tristate "Reiserfs support" | 292 | tristate "Reiserfs support" |
@@ -419,6 +433,14 @@ config FS_POSIX_ACL | |||
419 | bool | 433 | bool |
420 | default n | 434 | default n |
421 | 435 | ||
436 | config FILE_LOCKING | ||
437 | bool "Enable POSIX file locking API" if EMBEDDED | ||
438 | default y | ||
439 | help | ||
440 | This option enables standard file locking support, required | ||
441 | for filesystems like NFS and for the flock() system | ||
442 | call. Disabling this option saves about 11k. | ||
443 | |||
422 | source "fs/xfs/Kconfig" | 444 | source "fs/xfs/Kconfig" |
423 | source "fs/gfs2/Kconfig" | 445 | source "fs/gfs2/Kconfig" |
424 | 446 | ||
@@ -426,7 +448,7 @@ config OCFS2_FS | |||
426 | tristate "OCFS2 file system support" | 448 | tristate "OCFS2 file system support" |
427 | depends on NET && SYSFS | 449 | depends on NET && SYSFS |
428 | select CONFIGFS_FS | 450 | select CONFIGFS_FS |
429 | select JBD | 451 | select JBD2 |
430 | select CRC32 | 452 | select CRC32 |
431 | help | 453 | help |
432 | OCFS2 is a general purpose extent based shared disk cluster file | 454 | OCFS2 is a general purpose extent based shared disk cluster file |
@@ -497,6 +519,16 @@ config OCFS2_DEBUG_FS | |||
497 | this option for debugging only as it is likely to decrease | 519 | this option for debugging only as it is likely to decrease |
498 | performance of the filesystem. | 520 | performance of the filesystem. |
499 | 521 | ||
522 | config OCFS2_COMPAT_JBD | ||
523 | bool "Use JBD for compatibility" | ||
524 | depends on OCFS2_FS | ||
525 | default n | ||
526 | select JBD | ||
527 | help | ||
528 | The ocfs2 filesystem now uses JBD2 for its journalling. JBD2 | ||
529 | is backwards compatible with JBD. It is safe to say N here. | ||
530 | However, if you really want to use the original JBD, say Y here. | ||
531 | |||
500 | endif # BLOCK | 532 | endif # BLOCK |
501 | 533 | ||
502 | config DNOTIFY | 534 | config DNOTIFY |
@@ -1765,6 +1797,28 @@ config SUNRPC_XPRT_RDMA | |||
1765 | 1797 | ||
1766 | If unsure, say N. | 1798 | If unsure, say N. |
1767 | 1799 | ||
1800 | config SUNRPC_REGISTER_V4 | ||
1801 | bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)" | ||
1802 | depends on SUNRPC && EXPERIMENTAL | ||
1803 | default n | ||
1804 | help | ||
1805 | Sun added support for registering RPC services at an IPv6 | ||
1806 | address by creating two new versions of the rpcbind protocol | ||
1807 | (RFC 1833). | ||
1808 | |||
1809 | This option enables support in the kernel RPC server for | ||
1810 | registering kernel RPC services via version 4 of the rpcbind | ||
1811 | protocol. If you enable this option, you must run a portmapper | ||
1812 | daemon that supports rpcbind protocol version 4. | ||
1813 | |||
1814 | Serving NFS over IPv6 from knfsd (the kernel's NFS server) | ||
1815 | requires that you enable this option and use a portmapper that | ||
1816 | supports rpcbind version 4. | ||
1817 | |||
1818 | If unsure, say N to get traditional behavior (register kernel | ||
1819 | RPC services using only rpcbind version 2). Distributions | ||
1820 | using the legacy Linux portmapper daemon must say N here. | ||
1821 | |||
1768 | config RPCSEC_GSS_KRB5 | 1822 | config RPCSEC_GSS_KRB5 |
1769 | tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" | 1823 | tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" |
1770 | depends on SUNRPC && EXPERIMENTAL | 1824 | depends on SUNRPC && EXPERIMENTAL |
@@ -1930,6 +1984,16 @@ config CIFS_WEAK_PW_HASH | |||
1930 | 1984 | ||
1931 | If unsure, say N. | 1985 | If unsure, say N. |
1932 | 1986 | ||
1987 | config CIFS_UPCALL | ||
1988 | bool "Kerberos/SPNEGO advanced session setup" | ||
1989 | depends on CIFS && KEYS | ||
1990 | help | ||
1991 | Enables an upcall mechanism for CIFS which accesses | ||
1992 | userspace helper utilities to provide SPNEGO packaged (RFC 4178) | ||
1993 | Kerberos tickets which are needed to mount to certain secure servers | ||
1994 | (for which more secure Kerberos authentication is required). If | ||
1995 | unsure, say N. | ||
1996 | |||
1933 | config CIFS_XATTR | 1997 | config CIFS_XATTR |
1934 | bool "CIFS extended attributes" | 1998 | bool "CIFS extended attributes" |
1935 | depends on CIFS | 1999 | depends on CIFS |
@@ -1982,17 +2046,6 @@ config CIFS_EXPERIMENTAL | |||
1982 | (which is disabled by default). See the file fs/cifs/README | 2046 | (which is disabled by default). See the file fs/cifs/README |
1983 | for more details. If unsure, say N. | 2047 | for more details. If unsure, say N. |
1984 | 2048 | ||
1985 | config CIFS_UPCALL | ||
1986 | bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)" | ||
1987 | depends on CIFS_EXPERIMENTAL | ||
1988 | depends on KEYS | ||
1989 | help | ||
1990 | Enables an upcall mechanism for CIFS which accesses | ||
1991 | userspace helper utilities to provide SPNEGO packaged (RFC 4178) | ||
1992 | Kerberos tickets which are needed to mount to certain secure servers | ||
1993 | (for which more secure Kerberos authentication is required). If | ||
1994 | unsure, say N. | ||
1995 | |||
1996 | config CIFS_DFS_UPCALL | 2049 | config CIFS_DFS_UPCALL |
1997 | bool "DFS feature support (EXPERIMENTAL)" | 2050 | bool "DFS feature support (EXPERIMENTAL)" |
1998 | depends on CIFS_EXPERIMENTAL | 2051 | depends on CIFS_EXPERIMENTAL |
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 4a551af6f3fc..801db1341811 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt | |||
@@ -25,7 +25,7 @@ config BINFMT_ELF | |||
25 | 25 | ||
26 | config COMPAT_BINFMT_ELF | 26 | config COMPAT_BINFMT_ELF |
27 | bool | 27 | bool |
28 | depends on COMPAT && MMU | 28 | depends on COMPAT && BINFMT_ELF |
29 | 29 | ||
30 | config BINFMT_ELF_FDPIC | 30 | config BINFMT_ELF_FDPIC |
31 | bool "Kernel support for FDPIC ELF binaries" | 31 | bool "Kernel support for FDPIC ELF binaries" |
@@ -59,10 +59,12 @@ config BINFMT_SHARED_FLAT | |||
59 | help | 59 | help |
60 | Support FLAT shared libraries | 60 | Support FLAT shared libraries |
61 | 61 | ||
62 | config HAVE_AOUT | ||
63 | def_bool n | ||
64 | |||
62 | config BINFMT_AOUT | 65 | config BINFMT_AOUT |
63 | tristate "Kernel support for a.out and ECOFF binaries" | 66 | tristate "Kernel support for a.out and ECOFF binaries" |
64 | depends on ARCH_SUPPORTS_AOUT && \ | 67 | depends on HAVE_AOUT |
65 | (X86_32 || ALPHA || ARM || M68K) | ||
66 | ---help--- | 68 | ---help--- |
67 | A.out (Assembler.OUTput) is a set of formats for libraries and | 69 | A.out (Assembler.OUTput) is a set of formats for libraries and |
68 | executables used in the earliest versions of UNIX. Linux used | 70 | executables used in the earliest versions of UNIX. Linux used |
diff --git a/fs/Makefile b/fs/Makefile index a1482a5eff15..2168c902d5ca 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -7,8 +7,8 @@ | |||
7 | 7 | ||
8 | obj-y := open.o read_write.o file_table.o super.o \ | 8 | obj-y := open.o read_write.o file_table.o super.o \ |
9 | char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ | 9 | char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ |
10 | ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ | 10 | ioctl.o readdir.o select.o fifo.o dcache.o inode.o \ |
11 | attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ | 11 | attr.o bad_inode.o file.o filesystems.o namespace.o \ |
12 | seq_file.o xattr.o libfs.o fs-writeback.o \ | 12 | seq_file.o xattr.o libfs.o fs-writeback.o \ |
13 | pnode.o drop_caches.o splice.o sync.o utimes.o \ | 13 | pnode.o drop_caches.o splice.o sync.o utimes.o \ |
14 | stack.o | 14 | stack.o |
@@ -27,6 +27,8 @@ obj-$(CONFIG_ANON_INODES) += anon_inodes.o | |||
27 | obj-$(CONFIG_SIGNALFD) += signalfd.o | 27 | obj-$(CONFIG_SIGNALFD) += signalfd.o |
28 | obj-$(CONFIG_TIMERFD) += timerfd.o | 28 | obj-$(CONFIG_TIMERFD) += timerfd.o |
29 | obj-$(CONFIG_EVENTFD) += eventfd.o | 29 | obj-$(CONFIG_EVENTFD) += eventfd.o |
30 | obj-$(CONFIG_AIO) += aio.o | ||
31 | obj-$(CONFIG_FILE_LOCKING) += locks.o | ||
30 | obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o | 32 | obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o |
31 | 33 | ||
32 | nfsd-$(CONFIG_NFSD) := nfsctl.o | 34 | nfsd-$(CONFIG_NFSD) := nfsctl.o |
@@ -69,7 +71,7 @@ obj-$(CONFIG_DLM) += dlm/ | |||
69 | # Do not add any filesystems before this line | 71 | # Do not add any filesystems before this line |
70 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ | 72 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ |
71 | obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 | 73 | obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 |
72 | obj-$(CONFIG_EXT4DEV_FS) += ext4/ # Before ext2 so root fs can be ext4dev | 74 | obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4 |
73 | obj-$(CONFIG_JBD) += jbd/ | 75 | obj-$(CONFIG_JBD) += jbd/ |
74 | obj-$(CONFIG_JBD2) += jbd2/ | 76 | obj-$(CONFIG_JBD2) += jbd2/ |
75 | obj-$(CONFIG_EXT2_FS) += ext2/ | 77 | obj-$(CONFIG_EXT2_FS) += ext2/ |
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index fc1a8dc64d78..85a30e929800 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c | |||
@@ -197,6 +197,7 @@ out: | |||
197 | 197 | ||
198 | const struct file_operations adfs_dir_operations = { | 198 | const struct file_operations adfs_dir_operations = { |
199 | .read = generic_read_dir, | 199 | .read = generic_read_dir, |
200 | .llseek = generic_file_llseek, | ||
200 | .readdir = adfs_readdir, | 201 | .readdir = adfs_readdir, |
201 | .fsync = file_fsync, | 202 | .fsync = file_fsync, |
202 | }; | 203 | }; |
diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 26f3b43726bb..7f83a46f2b7e 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c | |||
@@ -157,7 +157,7 @@ static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt) | |||
157 | 157 | ||
158 | enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_err}; | 158 | enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_err}; |
159 | 159 | ||
160 | static match_table_t tokens = { | 160 | static const match_table_t tokens = { |
161 | {Opt_uid, "uid=%u"}, | 161 | {Opt_uid, "uid=%u"}, |
162 | {Opt_gid, "gid=%u"}, | 162 | {Opt_gid, "gid=%u"}, |
163 | {Opt_ownmask, "ownmask=%o"}, | 163 | {Opt_ownmask, "ownmask=%o"}, |
diff --git a/fs/affs/dir.c b/fs/affs/dir.c index 6e3f282424b0..7b36904dbeac 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c | |||
@@ -19,6 +19,7 @@ static int affs_readdir(struct file *, void *, filldir_t); | |||
19 | 19 | ||
20 | const struct file_operations affs_dir_operations = { | 20 | const struct file_operations affs_dir_operations = { |
21 | .read = generic_read_dir, | 21 | .read = generic_read_dir, |
22 | .llseek = generic_file_llseek, | ||
22 | .readdir = affs_readdir, | 23 | .readdir = affs_readdir, |
23 | .fsync = file_fsync, | 24 | .fsync = file_fsync, |
24 | }; | 25 | }; |
diff --git a/fs/affs/super.c b/fs/affs/super.c index 3a89094f93d0..8989c93193ed 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c | |||
@@ -135,7 +135,7 @@ enum { | |||
135 | Opt_verbose, Opt_volume, Opt_ignore, Opt_err, | 135 | Opt_verbose, Opt_volume, Opt_ignore, Opt_err, |
136 | }; | 136 | }; |
137 | 137 | ||
138 | static match_table_t tokens = { | 138 | static const match_table_t tokens = { |
139 | {Opt_bs, "bs=%u"}, | 139 | {Opt_bs, "bs=%u"}, |
140 | {Opt_mode, "mode=%o"}, | 140 | {Opt_mode, "mode=%o"}, |
141 | {Opt_mufs, "mufs"}, | 141 | {Opt_mufs, "mufs"}, |
diff --git a/fs/afs/file.c b/fs/afs/file.c index 525f7c56e068..a3901769a96c 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c | |||
@@ -50,8 +50,8 @@ const struct address_space_operations afs_fs_aops = { | |||
50 | .launder_page = afs_launder_page, | 50 | .launder_page = afs_launder_page, |
51 | .releasepage = afs_releasepage, | 51 | .releasepage = afs_releasepage, |
52 | .invalidatepage = afs_invalidatepage, | 52 | .invalidatepage = afs_invalidatepage, |
53 | .prepare_write = afs_prepare_write, | 53 | .write_begin = afs_write_begin, |
54 | .commit_write = afs_commit_write, | 54 | .write_end = afs_write_end, |
55 | .writepage = afs_writepage, | 55 | .writepage = afs_writepage, |
56 | .writepages = afs_writepages, | 56 | .writepages = afs_writepages, |
57 | }; | 57 | }; |
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 3cb6920ff30b..67f259d99cd6 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
@@ -728,8 +728,12 @@ extern int afs_volume_release_fileserver(struct afs_vnode *, | |||
728 | */ | 728 | */ |
729 | extern int afs_set_page_dirty(struct page *); | 729 | extern int afs_set_page_dirty(struct page *); |
730 | extern void afs_put_writeback(struct afs_writeback *); | 730 | extern void afs_put_writeback(struct afs_writeback *); |
731 | extern int afs_prepare_write(struct file *, struct page *, unsigned, unsigned); | 731 | extern int afs_write_begin(struct file *file, struct address_space *mapping, |
732 | extern int afs_commit_write(struct file *, struct page *, unsigned, unsigned); | 732 | loff_t pos, unsigned len, unsigned flags, |
733 | struct page **pagep, void **fsdata); | ||
734 | extern int afs_write_end(struct file *file, struct address_space *mapping, | ||
735 | loff_t pos, unsigned len, unsigned copied, | ||
736 | struct page *page, void *fsdata); | ||
733 | extern int afs_writepage(struct page *, struct writeback_control *); | 737 | extern int afs_writepage(struct page *, struct writeback_control *); |
734 | extern int afs_writepages(struct address_space *, struct writeback_control *); | 738 | extern int afs_writepages(struct address_space *, struct writeback_control *); |
735 | extern int afs_write_inode(struct inode *, int); | 739 | extern int afs_write_inode(struct inode *, int); |
diff --git a/fs/afs/super.c b/fs/afs/super.c index 250d8c4d66e4..aee239a048cb 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
@@ -64,7 +64,7 @@ enum { | |||
64 | afs_opt_vol, | 64 | afs_opt_vol, |
65 | }; | 65 | }; |
66 | 66 | ||
67 | static match_table_t afs_options_list = { | 67 | static const match_table_t afs_options_list = { |
68 | { afs_opt_cell, "cell=%s" }, | 68 | { afs_opt_cell, "cell=%s" }, |
69 | { afs_opt_rwpath, "rwpath" }, | 69 | { afs_opt_rwpath, "rwpath" }, |
70 | { afs_opt_vol, "vol=%s" }, | 70 | { afs_opt_vol, "vol=%s" }, |
diff --git a/fs/afs/write.c b/fs/afs/write.c index 065b4e10681a..d6b85dab35fc 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c | |||
@@ -84,15 +84,23 @@ void afs_put_writeback(struct afs_writeback *wb) | |||
84 | * partly or wholly fill a page that's under preparation for writing | 84 | * partly or wholly fill a page that's under preparation for writing |
85 | */ | 85 | */ |
86 | static int afs_fill_page(struct afs_vnode *vnode, struct key *key, | 86 | static int afs_fill_page(struct afs_vnode *vnode, struct key *key, |
87 | unsigned start, unsigned len, struct page *page) | 87 | loff_t pos, unsigned len, struct page *page) |
88 | { | 88 | { |
89 | loff_t i_size; | ||
90 | unsigned eof; | ||
89 | int ret; | 91 | int ret; |
90 | 92 | ||
91 | _enter(",,%u,%u", start, len); | 93 | _enter(",,%llu,%u", (unsigned long long)pos, len); |
92 | 94 | ||
93 | ASSERTCMP(start + len, <=, PAGE_SIZE); | 95 | ASSERTCMP(len, <=, PAGE_CACHE_SIZE); |
94 | 96 | ||
95 | ret = afs_vnode_fetch_data(vnode, key, start, len, page); | 97 | i_size = i_size_read(&vnode->vfs_inode); |
98 | if (pos + len > i_size) | ||
99 | eof = i_size; | ||
100 | else | ||
101 | eof = PAGE_CACHE_SIZE; | ||
102 | |||
103 | ret = afs_vnode_fetch_data(vnode, key, 0, eof, page); | ||
96 | if (ret < 0) { | 104 | if (ret < 0) { |
97 | if (ret == -ENOENT) { | 105 | if (ret == -ENOENT) { |
98 | _debug("got NOENT from server" | 106 | _debug("got NOENT from server" |
@@ -107,109 +115,55 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, | |||
107 | } | 115 | } |
108 | 116 | ||
109 | /* | 117 | /* |
110 | * prepare a page for being written to | ||
111 | */ | ||
112 | static int afs_prepare_page(struct afs_vnode *vnode, struct page *page, | ||
113 | struct key *key, unsigned offset, unsigned to) | ||
114 | { | ||
115 | unsigned eof, tail, start, stop, len; | ||
116 | loff_t i_size, pos; | ||
117 | void *p; | ||
118 | int ret; | ||
119 | |||
120 | _enter(""); | ||
121 | |||
122 | if (offset == 0 && to == PAGE_SIZE) | ||
123 | return 0; | ||
124 | |||
125 | p = kmap_atomic(page, KM_USER0); | ||
126 | |||
127 | i_size = i_size_read(&vnode->vfs_inode); | ||
128 | pos = (loff_t) page->index << PAGE_SHIFT; | ||
129 | if (pos >= i_size) { | ||
130 | /* partial write, page beyond EOF */ | ||
131 | _debug("beyond"); | ||
132 | if (offset > 0) | ||
133 | memset(p, 0, offset); | ||
134 | if (to < PAGE_SIZE) | ||
135 | memset(p + to, 0, PAGE_SIZE - to); | ||
136 | kunmap_atomic(p, KM_USER0); | ||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | if (i_size - pos >= PAGE_SIZE) { | ||
141 | /* partial write, page entirely before EOF */ | ||
142 | _debug("before"); | ||
143 | tail = eof = PAGE_SIZE; | ||
144 | } else { | ||
145 | /* partial write, page overlaps EOF */ | ||
146 | eof = i_size - pos; | ||
147 | _debug("overlap %u", eof); | ||
148 | tail = max(eof, to); | ||
149 | if (tail < PAGE_SIZE) | ||
150 | memset(p + tail, 0, PAGE_SIZE - tail); | ||
151 | if (offset > eof) | ||
152 | memset(p + eof, 0, PAGE_SIZE - eof); | ||
153 | } | ||
154 | |||
155 | kunmap_atomic(p, KM_USER0); | ||
156 | |||
157 | ret = 0; | ||
158 | if (offset > 0 || eof > to) { | ||
159 | /* need to fill one or two bits that aren't going to be written | ||
160 | * (cover both fillers in one read if there are two) */ | ||
161 | start = (offset > 0) ? 0 : to; | ||
162 | stop = (eof > to) ? eof : offset; | ||
163 | len = stop - start; | ||
164 | _debug("wr=%u-%u av=0-%u rd=%u@%u", | ||
165 | offset, to, eof, start, len); | ||
166 | ret = afs_fill_page(vnode, key, start, len, page); | ||
167 | } | ||
168 | |||
169 | _leave(" = %d", ret); | ||
170 | return ret; | ||
171 | } | ||
172 | |||
173 | /* | ||
174 | * prepare to perform part of a write to a page | 118 | * prepare to perform part of a write to a page |
175 | * - the caller holds the page locked, preventing it from being written out or | ||
176 | * modified by anyone else | ||
177 | */ | 119 | */ |
178 | int afs_prepare_write(struct file *file, struct page *page, | 120 | int afs_write_begin(struct file *file, struct address_space *mapping, |
179 | unsigned offset, unsigned to) | 121 | loff_t pos, unsigned len, unsigned flags, |
122 | struct page **pagep, void **fsdata) | ||
180 | { | 123 | { |
181 | struct afs_writeback *candidate, *wb; | 124 | struct afs_writeback *candidate, *wb; |
182 | struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); | 125 | struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); |
126 | struct page *page; | ||
183 | struct key *key = file->private_data; | 127 | struct key *key = file->private_data; |
184 | pgoff_t index; | 128 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); |
129 | unsigned to = from + len; | ||
130 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | ||
185 | int ret; | 131 | int ret; |
186 | 132 | ||
187 | _enter("{%x:%u},{%lx},%u,%u", | 133 | _enter("{%x:%u},{%lx},%u,%u", |
188 | vnode->fid.vid, vnode->fid.vnode, page->index, offset, to); | 134 | vnode->fid.vid, vnode->fid.vnode, index, from, to); |
189 | 135 | ||
190 | candidate = kzalloc(sizeof(*candidate), GFP_KERNEL); | 136 | candidate = kzalloc(sizeof(*candidate), GFP_KERNEL); |
191 | if (!candidate) | 137 | if (!candidate) |
192 | return -ENOMEM; | 138 | return -ENOMEM; |
193 | candidate->vnode = vnode; | 139 | candidate->vnode = vnode; |
194 | candidate->first = candidate->last = page->index; | 140 | candidate->first = candidate->last = index; |
195 | candidate->offset_first = offset; | 141 | candidate->offset_first = from; |
196 | candidate->to_last = to; | 142 | candidate->to_last = to; |
197 | candidate->usage = 1; | 143 | candidate->usage = 1; |
198 | candidate->state = AFS_WBACK_PENDING; | 144 | candidate->state = AFS_WBACK_PENDING; |
199 | init_waitqueue_head(&candidate->waitq); | 145 | init_waitqueue_head(&candidate->waitq); |
200 | 146 | ||
147 | page = __grab_cache_page(mapping, index); | ||
148 | if (!page) { | ||
149 | kfree(candidate); | ||
150 | return -ENOMEM; | ||
151 | } | ||
152 | *pagep = page; | ||
153 | /* page won't leak in error case: it eventually gets cleaned off LRU */ | ||
154 | |||
201 | if (!PageUptodate(page)) { | 155 | if (!PageUptodate(page)) { |
202 | _debug("not up to date"); | 156 | _debug("not up to date"); |
203 | ret = afs_prepare_page(vnode, page, key, offset, to); | 157 | ret = afs_fill_page(vnode, key, pos, len, page); |
204 | if (ret < 0) { | 158 | if (ret < 0) { |
205 | kfree(candidate); | 159 | kfree(candidate); |
206 | _leave(" = %d [prep]", ret); | 160 | _leave(" = %d [prep]", ret); |
207 | return ret; | 161 | return ret; |
208 | } | 162 | } |
163 | SetPageUptodate(page); | ||
209 | } | 164 | } |
210 | 165 | ||
211 | try_again: | 166 | try_again: |
212 | index = page->index; | ||
213 | spin_lock(&vnode->writeback_lock); | 167 | spin_lock(&vnode->writeback_lock); |
214 | 168 | ||
215 | /* see if this page is already pending a writeback under a suitable key | 169 | /* see if this page is already pending a writeback under a suitable key |
@@ -242,8 +196,8 @@ try_again: | |||
242 | subsume_in_current_wb: | 196 | subsume_in_current_wb: |
243 | _debug("subsume"); | 197 | _debug("subsume"); |
244 | ASSERTRANGE(wb->first, <=, index, <=, wb->last); | 198 | ASSERTRANGE(wb->first, <=, index, <=, wb->last); |
245 | if (index == wb->first && offset < wb->offset_first) | 199 | if (index == wb->first && from < wb->offset_first) |
246 | wb->offset_first = offset; | 200 | wb->offset_first = from; |
247 | if (index == wb->last && to > wb->to_last) | 201 | if (index == wb->last && to > wb->to_last) |
248 | wb->to_last = to; | 202 | wb->to_last = to; |
249 | spin_unlock(&vnode->writeback_lock); | 203 | spin_unlock(&vnode->writeback_lock); |
@@ -289,17 +243,17 @@ flush_conflicting_wb: | |||
289 | /* | 243 | /* |
290 | * finalise part of a write to a page | 244 | * finalise part of a write to a page |
291 | */ | 245 | */ |
292 | int afs_commit_write(struct file *file, struct page *page, | 246 | int afs_write_end(struct file *file, struct address_space *mapping, |
293 | unsigned offset, unsigned to) | 247 | loff_t pos, unsigned len, unsigned copied, |
248 | struct page *page, void *fsdata) | ||
294 | { | 249 | { |
295 | struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); | 250 | struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); |
296 | loff_t i_size, maybe_i_size; | 251 | loff_t i_size, maybe_i_size; |
297 | 252 | ||
298 | _enter("{%x:%u},{%lx},%u,%u", | 253 | _enter("{%x:%u},{%lx}", |
299 | vnode->fid.vid, vnode->fid.vnode, page->index, offset, to); | 254 | vnode->fid.vid, vnode->fid.vnode, page->index); |
300 | 255 | ||
301 | maybe_i_size = (loff_t) page->index << PAGE_SHIFT; | 256 | maybe_i_size = pos + copied; |
302 | maybe_i_size += to; | ||
303 | 257 | ||
304 | i_size = i_size_read(&vnode->vfs_inode); | 258 | i_size = i_size_read(&vnode->vfs_inode); |
305 | if (maybe_i_size > i_size) { | 259 | if (maybe_i_size > i_size) { |
@@ -310,12 +264,13 @@ int afs_commit_write(struct file *file, struct page *page, | |||
310 | spin_unlock(&vnode->writeback_lock); | 264 | spin_unlock(&vnode->writeback_lock); |
311 | } | 265 | } |
312 | 266 | ||
313 | SetPageUptodate(page); | ||
314 | set_page_dirty(page); | 267 | set_page_dirty(page); |
315 | if (PageDirty(page)) | 268 | if (PageDirty(page)) |
316 | _debug("dirtied"); | 269 | _debug("dirtied"); |
270 | unlock_page(page); | ||
271 | page_cache_release(page); | ||
317 | 272 | ||
318 | return 0; | 273 | return copied; |
319 | } | 274 | } |
320 | 275 | ||
321 | /* | 276 | /* |
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index dda510d31f84..b70eea1e8c59 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c | |||
@@ -59,7 +59,7 @@ static const struct super_operations autofs_sops = { | |||
59 | 59 | ||
60 | enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto}; | 60 | enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto}; |
61 | 61 | ||
62 | static match_table_t autofs_tokens = { | 62 | static const match_table_t autofs_tokens = { |
63 | {Opt_fd, "fd=%u"}, | 63 | {Opt_fd, "fd=%u"}, |
64 | {Opt_uid, "uid=%u"}, | 64 | {Opt_uid, "uid=%u"}, |
65 | {Opt_gid, "gid=%u"}, | 65 | {Opt_gid, "gid=%u"}, |
diff --git a/fs/autofs4/Makefile b/fs/autofs4/Makefile index f2c3b79e94d2..a811c1f7d9ab 100644 --- a/fs/autofs4/Makefile +++ b/fs/autofs4/Makefile | |||
@@ -4,4 +4,4 @@ | |||
4 | 4 | ||
5 | obj-$(CONFIG_AUTOFS4_FS) += autofs4.o | 5 | obj-$(CONFIG_AUTOFS4_FS) += autofs4.o |
6 | 6 | ||
7 | autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o | 7 | autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o |
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 69a2f5c92319..e0f16da00e54 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h | |||
@@ -14,6 +14,7 @@ | |||
14 | /* Internal header file for autofs */ | 14 | /* Internal header file for autofs */ |
15 | 15 | ||
16 | #include <linux/auto_fs4.h> | 16 | #include <linux/auto_fs4.h> |
17 | #include <linux/auto_dev-ioctl.h> | ||
17 | #include <linux/mutex.h> | 18 | #include <linux/mutex.h> |
18 | #include <linux/list.h> | 19 | #include <linux/list.h> |
19 | 20 | ||
@@ -21,6 +22,11 @@ | |||
21 | #define AUTOFS_IOC_FIRST AUTOFS_IOC_READY | 22 | #define AUTOFS_IOC_FIRST AUTOFS_IOC_READY |
22 | #define AUTOFS_IOC_COUNT 32 | 23 | #define AUTOFS_IOC_COUNT 32 |
23 | 24 | ||
25 | #define AUTOFS_DEV_IOCTL_IOC_FIRST (AUTOFS_DEV_IOCTL_VERSION) | ||
26 | #define AUTOFS_DEV_IOCTL_IOC_COUNT (AUTOFS_IOC_COUNT - 11) | ||
27 | |||
28 | #define AUTOFS_TYPE_TRIGGER (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET) | ||
29 | |||
24 | #include <linux/kernel.h> | 30 | #include <linux/kernel.h> |
25 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
26 | #include <linux/time.h> | 32 | #include <linux/time.h> |
@@ -35,11 +41,27 @@ | |||
35 | /* #define DEBUG */ | 41 | /* #define DEBUG */ |
36 | 42 | ||
37 | #ifdef DEBUG | 43 | #ifdef DEBUG |
38 | #define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __func__ , ##args); } while(0) | 44 | #define DPRINTK(fmt, args...) \ |
45 | do { \ | ||
46 | printk(KERN_DEBUG "pid %d: %s: " fmt "\n", \ | ||
47 | current->pid, __func__, ##args); \ | ||
48 | } while (0) | ||
39 | #else | 49 | #else |
40 | #define DPRINTK(fmt,args...) do {} while(0) | 50 | #define DPRINTK(fmt, args...) do {} while (0) |
41 | #endif | 51 | #endif |
42 | 52 | ||
53 | #define AUTOFS_WARN(fmt, args...) \ | ||
54 | do { \ | ||
55 | printk(KERN_WARNING "pid %d: %s: " fmt "\n", \ | ||
56 | current->pid, __func__, ##args); \ | ||
57 | } while (0) | ||
58 | |||
59 | #define AUTOFS_ERROR(fmt, args...) \ | ||
60 | do { \ | ||
61 | printk(KERN_ERR "pid %d: %s: " fmt "\n", \ | ||
62 | current->pid, __func__, ##args); \ | ||
63 | } while (0) | ||
64 | |||
43 | /* Unified info structure. This is pointed to by both the dentry and | 65 | /* Unified info structure. This is pointed to by both the dentry and |
44 | inode structures. Each file in the filesystem has an instance of this | 66 | inode structures. Each file in the filesystem has an instance of this |
45 | structure. It holds a reference to the dentry, so dentries are never | 67 | structure. It holds a reference to the dentry, so dentries are never |
@@ -61,6 +83,9 @@ struct autofs_info { | |||
61 | unsigned long last_used; | 83 | unsigned long last_used; |
62 | atomic_t count; | 84 | atomic_t count; |
63 | 85 | ||
86 | uid_t uid; | ||
87 | gid_t gid; | ||
88 | |||
64 | mode_t mode; | 89 | mode_t mode; |
65 | size_t size; | 90 | size_t size; |
66 | 91 | ||
@@ -92,10 +117,6 @@ struct autofs_wait_queue { | |||
92 | 117 | ||
93 | #define AUTOFS_SBI_MAGIC 0x6d4a556d | 118 | #define AUTOFS_SBI_MAGIC 0x6d4a556d |
94 | 119 | ||
95 | #define AUTOFS_TYPE_INDIRECT 0x0001 | ||
96 | #define AUTOFS_TYPE_DIRECT 0x0002 | ||
97 | #define AUTOFS_TYPE_OFFSET 0x0004 | ||
98 | |||
99 | struct autofs_sb_info { | 120 | struct autofs_sb_info { |
100 | u32 magic; | 121 | u32 magic; |
101 | int pipefd; | 122 | int pipefd; |
@@ -169,6 +190,17 @@ int autofs4_expire_run(struct super_block *, struct vfsmount *, | |||
169 | struct autofs_packet_expire __user *); | 190 | struct autofs_packet_expire __user *); |
170 | int autofs4_expire_multi(struct super_block *, struct vfsmount *, | 191 | int autofs4_expire_multi(struct super_block *, struct vfsmount *, |
171 | struct autofs_sb_info *, int __user *); | 192 | struct autofs_sb_info *, int __user *); |
193 | struct dentry *autofs4_expire_direct(struct super_block *sb, | ||
194 | struct vfsmount *mnt, | ||
195 | struct autofs_sb_info *sbi, int how); | ||
196 | struct dentry *autofs4_expire_indirect(struct super_block *sb, | ||
197 | struct vfsmount *mnt, | ||
198 | struct autofs_sb_info *sbi, int how); | ||
199 | |||
200 | /* Device node initialization */ | ||
201 | |||
202 | int autofs_dev_ioctl_init(void); | ||
203 | void autofs_dev_ioctl_exit(void); | ||
172 | 204 | ||
173 | /* Operations structures */ | 205 | /* Operations structures */ |
174 | 206 | ||
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c new file mode 100644 index 000000000000..625abf5422e2 --- /dev/null +++ b/fs/autofs4/dev-ioctl.c | |||
@@ -0,0 +1,863 @@ | |||
1 | /* | ||
2 | * Copyright 2008 Red Hat, Inc. All rights reserved. | ||
3 | * Copyright 2008 Ian Kent <raven@themaw.net> | ||
4 | * | ||
5 | * This file is part of the Linux kernel and is made available under | ||
6 | * the terms of the GNU General Public License, version 2, or at your | ||
7 | * option, any later version, incorporated herein by reference. | ||
8 | */ | ||
9 | |||
10 | #include <linux/module.h> | ||
11 | #include <linux/vmalloc.h> | ||
12 | #include <linux/miscdevice.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/wait.h> | ||
15 | #include <linux/namei.h> | ||
16 | #include <linux/fcntl.h> | ||
17 | #include <linux/file.h> | ||
18 | #include <linux/fdtable.h> | ||
19 | #include <linux/sched.h> | ||
20 | #include <linux/compat.h> | ||
21 | #include <linux/syscalls.h> | ||
22 | #include <linux/smp_lock.h> | ||
23 | #include <linux/magic.h> | ||
24 | #include <linux/dcache.h> | ||
25 | #include <linux/uaccess.h> | ||
26 | |||
27 | #include "autofs_i.h" | ||
28 | |||
29 | /* | ||
30 | * This module implements an interface for routing autofs ioctl control | ||
31 | * commands via a miscellaneous device file. | ||
32 | * | ||
33 | * The alternate interface is needed because we need to be able open | ||
34 | * an ioctl file descriptor on an autofs mount that may be covered by | ||
35 | * another mount. This situation arises when starting automount(8) | ||
36 | * or other user space daemon which uses direct mounts or offset | ||
37 | * mounts (used for autofs lazy mount/umount of nested mount trees), | ||
38 | * which have been left busy at at service shutdown. | ||
39 | */ | ||
40 | |||
41 | #define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl) | ||
42 | |||
43 | typedef int (*ioctl_fn)(struct file *, struct autofs_sb_info *, | ||
44 | struct autofs_dev_ioctl *); | ||
45 | |||
46 | static int check_name(const char *name) | ||
47 | { | ||
48 | if (!strchr(name, '/')) | ||
49 | return -EINVAL; | ||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | /* | ||
54 | * Check a string doesn't overrun the chunk of | ||
55 | * memory we copied from user land. | ||
56 | */ | ||
57 | static int invalid_str(char *str, void *end) | ||
58 | { | ||
59 | while ((void *) str <= end) | ||
60 | if (!*str++) | ||
61 | return 0; | ||
62 | return -EINVAL; | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * Check that the user compiled against correct version of autofs | ||
67 | * misc device code. | ||
68 | * | ||
69 | * As well as checking the version compatibility this always copies | ||
70 | * the kernel interface version out. | ||
71 | */ | ||
72 | static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param) | ||
73 | { | ||
74 | int err = 0; | ||
75 | |||
76 | if ((AUTOFS_DEV_IOCTL_VERSION_MAJOR != param->ver_major) || | ||
77 | (AUTOFS_DEV_IOCTL_VERSION_MINOR < param->ver_minor)) { | ||
78 | AUTOFS_WARN("ioctl control interface version mismatch: " | ||
79 | "kernel(%u.%u), user(%u.%u), cmd(%d)", | ||
80 | AUTOFS_DEV_IOCTL_VERSION_MAJOR, | ||
81 | AUTOFS_DEV_IOCTL_VERSION_MINOR, | ||
82 | param->ver_major, param->ver_minor, cmd); | ||
83 | err = -EINVAL; | ||
84 | } | ||
85 | |||
86 | /* Fill in the kernel version. */ | ||
87 | param->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR; | ||
88 | param->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; | ||
89 | |||
90 | return err; | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * Copy parameter control struct, including a possible path allocated | ||
95 | * at the end of the struct. | ||
96 | */ | ||
97 | static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in) | ||
98 | { | ||
99 | struct autofs_dev_ioctl tmp, *ads; | ||
100 | |||
101 | if (copy_from_user(&tmp, in, sizeof(tmp))) | ||
102 | return ERR_PTR(-EFAULT); | ||
103 | |||
104 | if (tmp.size < sizeof(tmp)) | ||
105 | return ERR_PTR(-EINVAL); | ||
106 | |||
107 | ads = kmalloc(tmp.size, GFP_KERNEL); | ||
108 | if (!ads) | ||
109 | return ERR_PTR(-ENOMEM); | ||
110 | |||
111 | if (copy_from_user(ads, in, tmp.size)) { | ||
112 | kfree(ads); | ||
113 | return ERR_PTR(-EFAULT); | ||
114 | } | ||
115 | |||
116 | return ads; | ||
117 | } | ||
118 | |||
119 | static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) | ||
120 | { | ||
121 | kfree(param); | ||
122 | return; | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | * Check sanity of parameter control fields and if a path is present | ||
127 | * check that it has a "/" and is terminated. | ||
128 | */ | ||
129 | static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) | ||
130 | { | ||
131 | int err = -EINVAL; | ||
132 | |||
133 | if (check_dev_ioctl_version(cmd, param)) { | ||
134 | AUTOFS_WARN("invalid device control module version " | ||
135 | "supplied for cmd(0x%08x)", cmd); | ||
136 | goto out; | ||
137 | } | ||
138 | |||
139 | if (param->size > sizeof(*param)) { | ||
140 | err = check_name(param->path); | ||
141 | if (err) { | ||
142 | AUTOFS_WARN("invalid path supplied for cmd(0x%08x)", | ||
143 | cmd); | ||
144 | goto out; | ||
145 | } | ||
146 | |||
147 | err = invalid_str(param->path, | ||
148 | (void *) ((size_t) param + param->size)); | ||
149 | if (err) { | ||
150 | AUTOFS_WARN("invalid path supplied for cmd(0x%08x)", | ||
151 | cmd); | ||
152 | goto out; | ||
153 | } | ||
154 | } | ||
155 | |||
156 | err = 0; | ||
157 | out: | ||
158 | return err; | ||
159 | } | ||
160 | |||
161 | /* | ||
162 | * Get the autofs super block info struct from the file opened on | ||
163 | * the autofs mount point. | ||
164 | */ | ||
165 | static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f) | ||
166 | { | ||
167 | struct autofs_sb_info *sbi = NULL; | ||
168 | struct inode *inode; | ||
169 | |||
170 | if (f) { | ||
171 | inode = f->f_path.dentry->d_inode; | ||
172 | sbi = autofs4_sbi(inode->i_sb); | ||
173 | } | ||
174 | return sbi; | ||
175 | } | ||
176 | |||
177 | /* Return autofs module protocol version */ | ||
178 | static int autofs_dev_ioctl_protover(struct file *fp, | ||
179 | struct autofs_sb_info *sbi, | ||
180 | struct autofs_dev_ioctl *param) | ||
181 | { | ||
182 | param->arg1 = sbi->version; | ||
183 | return 0; | ||
184 | } | ||
185 | |||
186 | /* Return autofs module protocol sub version */ | ||
187 | static int autofs_dev_ioctl_protosubver(struct file *fp, | ||
188 | struct autofs_sb_info *sbi, | ||
189 | struct autofs_dev_ioctl *param) | ||
190 | { | ||
191 | param->arg1 = sbi->sub_version; | ||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | * Walk down the mount stack looking for an autofs mount that | ||
197 | * has the requested device number (aka. new_encode_dev(sb->s_dev). | ||
198 | */ | ||
199 | static int autofs_dev_ioctl_find_super(struct nameidata *nd, dev_t devno) | ||
200 | { | ||
201 | struct dentry *dentry; | ||
202 | struct inode *inode; | ||
203 | struct super_block *sb; | ||
204 | dev_t s_dev; | ||
205 | unsigned int err; | ||
206 | |||
207 | err = -ENOENT; | ||
208 | |||
209 | /* Lookup the dentry name at the base of our mount point */ | ||
210 | dentry = d_lookup(nd->path.dentry, &nd->last); | ||
211 | if (!dentry) | ||
212 | goto out; | ||
213 | |||
214 | dput(nd->path.dentry); | ||
215 | nd->path.dentry = dentry; | ||
216 | |||
217 | /* And follow the mount stack looking for our autofs mount */ | ||
218 | while (follow_down(&nd->path.mnt, &nd->path.dentry)) { | ||
219 | inode = nd->path.dentry->d_inode; | ||
220 | if (!inode) | ||
221 | break; | ||
222 | |||
223 | sb = inode->i_sb; | ||
224 | s_dev = new_encode_dev(sb->s_dev); | ||
225 | if (devno == s_dev) { | ||
226 | if (sb->s_magic == AUTOFS_SUPER_MAGIC) { | ||
227 | err = 0; | ||
228 | break; | ||
229 | } | ||
230 | } | ||
231 | } | ||
232 | out: | ||
233 | return err; | ||
234 | } | ||
235 | |||
236 | /* | ||
237 | * Walk down the mount stack looking for an autofs mount that | ||
238 | * has the requested mount type (ie. indirect, direct or offset). | ||
239 | */ | ||
240 | static int autofs_dev_ioctl_find_sbi_type(struct nameidata *nd, unsigned int type) | ||
241 | { | ||
242 | struct dentry *dentry; | ||
243 | struct autofs_info *ino; | ||
244 | unsigned int err; | ||
245 | |||
246 | err = -ENOENT; | ||
247 | |||
248 | /* Lookup the dentry name at the base of our mount point */ | ||
249 | dentry = d_lookup(nd->path.dentry, &nd->last); | ||
250 | if (!dentry) | ||
251 | goto out; | ||
252 | |||
253 | dput(nd->path.dentry); | ||
254 | nd->path.dentry = dentry; | ||
255 | |||
256 | /* And follow the mount stack looking for our autofs mount */ | ||
257 | while (follow_down(&nd->path.mnt, &nd->path.dentry)) { | ||
258 | ino = autofs4_dentry_ino(nd->path.dentry); | ||
259 | if (ino && ino->sbi->type & type) { | ||
260 | err = 0; | ||
261 | break; | ||
262 | } | ||
263 | } | ||
264 | out: | ||
265 | return err; | ||
266 | } | ||
267 | |||
268 | static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file) | ||
269 | { | ||
270 | struct files_struct *files = current->files; | ||
271 | struct fdtable *fdt; | ||
272 | |||
273 | spin_lock(&files->file_lock); | ||
274 | fdt = files_fdtable(files); | ||
275 | BUG_ON(fdt->fd[fd] != NULL); | ||
276 | rcu_assign_pointer(fdt->fd[fd], file); | ||
277 | FD_SET(fd, fdt->close_on_exec); | ||
278 | spin_unlock(&files->file_lock); | ||
279 | } | ||
280 | |||
281 | |||
282 | /* | ||
283 | * Open a file descriptor on the autofs mount point corresponding | ||
284 | * to the given path and device number (aka. new_encode_dev(sb->s_dev)). | ||
285 | */ | ||
286 | static int autofs_dev_ioctl_open_mountpoint(const char *path, dev_t devid) | ||
287 | { | ||
288 | struct file *filp; | ||
289 | struct nameidata nd; | ||
290 | int err, fd; | ||
291 | |||
292 | fd = get_unused_fd(); | ||
293 | if (likely(fd >= 0)) { | ||
294 | /* Get nameidata of the parent directory */ | ||
295 | err = path_lookup(path, LOOKUP_PARENT, &nd); | ||
296 | if (err) | ||
297 | goto out; | ||
298 | |||
299 | /* | ||
300 | * Search down, within the parent, looking for an | ||
301 | * autofs super block that has the device number | ||
302 | * corresponding to the autofs fs we want to open. | ||
303 | */ | ||
304 | err = autofs_dev_ioctl_find_super(&nd, devid); | ||
305 | if (err) { | ||
306 | path_put(&nd.path); | ||
307 | goto out; | ||
308 | } | ||
309 | |||
310 | filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY); | ||
311 | if (IS_ERR(filp)) { | ||
312 | err = PTR_ERR(filp); | ||
313 | goto out; | ||
314 | } | ||
315 | |||
316 | autofs_dev_ioctl_fd_install(fd, filp); | ||
317 | } | ||
318 | |||
319 | return fd; | ||
320 | |||
321 | out: | ||
322 | put_unused_fd(fd); | ||
323 | return err; | ||
324 | } | ||
325 | |||
326 | /* Open a file descriptor on an autofs mount point */ | ||
327 | static int autofs_dev_ioctl_openmount(struct file *fp, | ||
328 | struct autofs_sb_info *sbi, | ||
329 | struct autofs_dev_ioctl *param) | ||
330 | { | ||
331 | const char *path; | ||
332 | dev_t devid; | ||
333 | int err, fd; | ||
334 | |||
335 | /* param->path has already been checked */ | ||
336 | if (!param->arg1) | ||
337 | return -EINVAL; | ||
338 | |||
339 | param->ioctlfd = -1; | ||
340 | |||
341 | path = param->path; | ||
342 | devid = param->arg1; | ||
343 | |||
344 | err = 0; | ||
345 | fd = autofs_dev_ioctl_open_mountpoint(path, devid); | ||
346 | if (unlikely(fd < 0)) { | ||
347 | err = fd; | ||
348 | goto out; | ||
349 | } | ||
350 | |||
351 | param->ioctlfd = fd; | ||
352 | out: | ||
353 | return err; | ||
354 | } | ||
355 | |||
356 | /* Close file descriptor allocated above (user can also use close(2)). */ | ||
357 | static int autofs_dev_ioctl_closemount(struct file *fp, | ||
358 | struct autofs_sb_info *sbi, | ||
359 | struct autofs_dev_ioctl *param) | ||
360 | { | ||
361 | return sys_close(param->ioctlfd); | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * Send "ready" status for an existing wait (either a mount or an expire | ||
366 | * request). | ||
367 | */ | ||
368 | static int autofs_dev_ioctl_ready(struct file *fp, | ||
369 | struct autofs_sb_info *sbi, | ||
370 | struct autofs_dev_ioctl *param) | ||
371 | { | ||
372 | autofs_wqt_t token; | ||
373 | |||
374 | token = (autofs_wqt_t) param->arg1; | ||
375 | return autofs4_wait_release(sbi, token, 0); | ||
376 | } | ||
377 | |||
378 | /* | ||
379 | * Send "fail" status for an existing wait (either a mount or an expire | ||
380 | * request). | ||
381 | */ | ||
382 | static int autofs_dev_ioctl_fail(struct file *fp, | ||
383 | struct autofs_sb_info *sbi, | ||
384 | struct autofs_dev_ioctl *param) | ||
385 | { | ||
386 | autofs_wqt_t token; | ||
387 | int status; | ||
388 | |||
389 | token = (autofs_wqt_t) param->arg1; | ||
390 | status = param->arg2 ? param->arg2 : -ENOENT; | ||
391 | return autofs4_wait_release(sbi, token, status); | ||
392 | } | ||
393 | |||
394 | /* | ||
395 | * Set the pipe fd for kernel communication to the daemon. | ||
396 | * | ||
397 | * Normally this is set at mount using an option but if we | ||
398 | * are reconnecting to a busy mount then we need to use this | ||
399 | * to tell the autofs mount about the new kernel pipe fd. In | ||
400 | * order to protect mounts against incorrectly setting the | ||
401 | * pipefd we also require that the autofs mount be catatonic. | ||
402 | * | ||
403 | * This also sets the process group id used to identify the | ||
404 | * controlling process (eg. the owning automount(8) daemon). | ||
405 | */ | ||
406 | static int autofs_dev_ioctl_setpipefd(struct file *fp, | ||
407 | struct autofs_sb_info *sbi, | ||
408 | struct autofs_dev_ioctl *param) | ||
409 | { | ||
410 | int pipefd; | ||
411 | int err = 0; | ||
412 | |||
413 | if (param->arg1 == -1) | ||
414 | return -EINVAL; | ||
415 | |||
416 | pipefd = param->arg1; | ||
417 | |||
418 | mutex_lock(&sbi->wq_mutex); | ||
419 | if (!sbi->catatonic) { | ||
420 | mutex_unlock(&sbi->wq_mutex); | ||
421 | return -EBUSY; | ||
422 | } else { | ||
423 | struct file *pipe = fget(pipefd); | ||
424 | if (!pipe->f_op || !pipe->f_op->write) { | ||
425 | err = -EPIPE; | ||
426 | fput(pipe); | ||
427 | goto out; | ||
428 | } | ||
429 | sbi->oz_pgrp = task_pgrp_nr(current); | ||
430 | sbi->pipefd = pipefd; | ||
431 | sbi->pipe = pipe; | ||
432 | sbi->catatonic = 0; | ||
433 | } | ||
434 | out: | ||
435 | mutex_unlock(&sbi->wq_mutex); | ||
436 | return err; | ||
437 | } | ||
438 | |||
439 | /* | ||
440 | * Make the autofs mount point catatonic, no longer responsive to | ||
441 | * mount requests. Also closes the kernel pipe file descriptor. | ||
442 | */ | ||
443 | static int autofs_dev_ioctl_catatonic(struct file *fp, | ||
444 | struct autofs_sb_info *sbi, | ||
445 | struct autofs_dev_ioctl *param) | ||
446 | { | ||
447 | autofs4_catatonic_mode(sbi); | ||
448 | return 0; | ||
449 | } | ||
450 | |||
451 | /* Set the autofs mount timeout */ | ||
452 | static int autofs_dev_ioctl_timeout(struct file *fp, | ||
453 | struct autofs_sb_info *sbi, | ||
454 | struct autofs_dev_ioctl *param) | ||
455 | { | ||
456 | unsigned long timeout; | ||
457 | |||
458 | timeout = param->arg1; | ||
459 | param->arg1 = sbi->exp_timeout / HZ; | ||
460 | sbi->exp_timeout = timeout * HZ; | ||
461 | return 0; | ||
462 | } | ||
463 | |||
464 | /* | ||
465 | * Return the uid and gid of the last request for the mount | ||
466 | * | ||
467 | * When reconstructing an autofs mount tree with active mounts | ||
468 | * we need to re-connect to mounts that may have used the original | ||
469 | * process uid and gid (or string variations of them) for mount | ||
470 | * lookups within the map entry. | ||
471 | */ | ||
472 | static int autofs_dev_ioctl_requester(struct file *fp, | ||
473 | struct autofs_sb_info *sbi, | ||
474 | struct autofs_dev_ioctl *param) | ||
475 | { | ||
476 | struct autofs_info *ino; | ||
477 | struct nameidata nd; | ||
478 | const char *path; | ||
479 | dev_t devid; | ||
480 | int err = -ENOENT; | ||
481 | |||
482 | if (param->size <= sizeof(*param)) { | ||
483 | err = -EINVAL; | ||
484 | goto out; | ||
485 | } | ||
486 | |||
487 | path = param->path; | ||
488 | devid = sbi->sb->s_dev; | ||
489 | |||
490 | param->arg1 = param->arg2 = -1; | ||
491 | |||
492 | /* Get nameidata of the parent directory */ | ||
493 | err = path_lookup(path, LOOKUP_PARENT, &nd); | ||
494 | if (err) | ||
495 | goto out; | ||
496 | |||
497 | err = autofs_dev_ioctl_find_super(&nd, devid); | ||
498 | if (err) | ||
499 | goto out_release; | ||
500 | |||
501 | ino = autofs4_dentry_ino(nd.path.dentry); | ||
502 | if (ino) { | ||
503 | err = 0; | ||
504 | autofs4_expire_wait(nd.path.dentry); | ||
505 | spin_lock(&sbi->fs_lock); | ||
506 | param->arg1 = ino->uid; | ||
507 | param->arg2 = ino->gid; | ||
508 | spin_unlock(&sbi->fs_lock); | ||
509 | } | ||
510 | |||
511 | out_release: | ||
512 | path_put(&nd.path); | ||
513 | out: | ||
514 | return err; | ||
515 | } | ||
516 | |||
517 | /* | ||
518 | * Call repeatedly until it returns -EAGAIN, meaning there's nothing | ||
519 | * more that can be done. | ||
520 | */ | ||
521 | static int autofs_dev_ioctl_expire(struct file *fp, | ||
522 | struct autofs_sb_info *sbi, | ||
523 | struct autofs_dev_ioctl *param) | ||
524 | { | ||
525 | struct dentry *dentry; | ||
526 | struct vfsmount *mnt; | ||
527 | int err = -EAGAIN; | ||
528 | int how; | ||
529 | |||
530 | how = param->arg1; | ||
531 | mnt = fp->f_path.mnt; | ||
532 | |||
533 | if (sbi->type & AUTOFS_TYPE_TRIGGER) | ||
534 | dentry = autofs4_expire_direct(sbi->sb, mnt, sbi, how); | ||
535 | else | ||
536 | dentry = autofs4_expire_indirect(sbi->sb, mnt, sbi, how); | ||
537 | |||
538 | if (dentry) { | ||
539 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | ||
540 | |||
541 | /* | ||
542 | * This is synchronous because it makes the daemon a | ||
543 | * little easier | ||
544 | */ | ||
545 | err = autofs4_wait(sbi, dentry, NFY_EXPIRE); | ||
546 | |||
547 | spin_lock(&sbi->fs_lock); | ||
548 | if (ino->flags & AUTOFS_INF_MOUNTPOINT) { | ||
549 | ino->flags &= ~AUTOFS_INF_MOUNTPOINT; | ||
550 | sbi->sb->s_root->d_mounted++; | ||
551 | } | ||
552 | ino->flags &= ~AUTOFS_INF_EXPIRING; | ||
553 | complete_all(&ino->expire_complete); | ||
554 | spin_unlock(&sbi->fs_lock); | ||
555 | dput(dentry); | ||
556 | } | ||
557 | |||
558 | return err; | ||
559 | } | ||
560 | |||
561 | /* Check if autofs mount point is in use */ | ||
562 | static int autofs_dev_ioctl_askumount(struct file *fp, | ||
563 | struct autofs_sb_info *sbi, | ||
564 | struct autofs_dev_ioctl *param) | ||
565 | { | ||
566 | param->arg1 = 0; | ||
567 | if (may_umount(fp->f_path.mnt)) | ||
568 | param->arg1 = 1; | ||
569 | return 0; | ||
570 | } | ||
571 | |||
572 | /* | ||
573 | * Check if the given path is a mountpoint. | ||
574 | * | ||
575 | * If we are supplied with the file descriptor of an autofs | ||
576 | * mount we're looking for a specific mount. In this case | ||
577 | * the path is considered a mountpoint if it is itself a | ||
578 | * mountpoint or contains a mount, such as a multi-mount | ||
579 | * without a root mount. In this case we return 1 if the | ||
580 | * path is a mount point and the super magic of the covering | ||
581 | * mount if there is one or 0 if it isn't a mountpoint. | ||
582 | * | ||
583 | * If we aren't supplied with a file descriptor then we | ||
584 | * lookup the nameidata of the path and check if it is the | ||
585 | * root of a mount. If a type is given we are looking for | ||
586 | * a particular autofs mount and if we don't find a match | ||
587 | * we return fail. If the located nameidata path is the | ||
588 | * root of a mount we return 1 along with the super magic | ||
589 | * of the mount or 0 otherwise. | ||
590 | * | ||
591 | * In both cases the the device number (as returned by | ||
592 | * new_encode_dev()) is also returned. | ||
593 | */ | ||
594 | static int autofs_dev_ioctl_ismountpoint(struct file *fp, | ||
595 | struct autofs_sb_info *sbi, | ||
596 | struct autofs_dev_ioctl *param) | ||
597 | { | ||
598 | struct nameidata nd; | ||
599 | const char *path; | ||
600 | unsigned int type; | ||
601 | int err = -ENOENT; | ||
602 | |||
603 | if (param->size <= sizeof(*param)) { | ||
604 | err = -EINVAL; | ||
605 | goto out; | ||
606 | } | ||
607 | |||
608 | path = param->path; | ||
609 | type = param->arg1; | ||
610 | |||
611 | param->arg1 = 0; | ||
612 | param->arg2 = 0; | ||
613 | |||
614 | if (!fp || param->ioctlfd == -1) { | ||
615 | if (type == AUTOFS_TYPE_ANY) { | ||
616 | struct super_block *sb; | ||
617 | |||
618 | err = path_lookup(path, LOOKUP_FOLLOW, &nd); | ||
619 | if (err) | ||
620 | goto out; | ||
621 | |||
622 | sb = nd.path.dentry->d_sb; | ||
623 | param->arg1 = new_encode_dev(sb->s_dev); | ||
624 | } else { | ||
625 | struct autofs_info *ino; | ||
626 | |||
627 | err = path_lookup(path, LOOKUP_PARENT, &nd); | ||
628 | if (err) | ||
629 | goto out; | ||
630 | |||
631 | err = autofs_dev_ioctl_find_sbi_type(&nd, type); | ||
632 | if (err) | ||
633 | goto out_release; | ||
634 | |||
635 | ino = autofs4_dentry_ino(nd.path.dentry); | ||
636 | param->arg1 = autofs4_get_dev(ino->sbi); | ||
637 | } | ||
638 | |||
639 | err = 0; | ||
640 | if (nd.path.dentry->d_inode && | ||
641 | nd.path.mnt->mnt_root == nd.path.dentry) { | ||
642 | err = 1; | ||
643 | param->arg2 = nd.path.dentry->d_inode->i_sb->s_magic; | ||
644 | } | ||
645 | } else { | ||
646 | dev_t devid = new_encode_dev(sbi->sb->s_dev); | ||
647 | |||
648 | err = path_lookup(path, LOOKUP_PARENT, &nd); | ||
649 | if (err) | ||
650 | goto out; | ||
651 | |||
652 | err = autofs_dev_ioctl_find_super(&nd, devid); | ||
653 | if (err) | ||
654 | goto out_release; | ||
655 | |||
656 | param->arg1 = autofs4_get_dev(sbi); | ||
657 | |||
658 | err = have_submounts(nd.path.dentry); | ||
659 | |||
660 | if (nd.path.mnt->mnt_mountpoint != nd.path.mnt->mnt_root) { | ||
661 | if (follow_down(&nd.path.mnt, &nd.path.dentry)) { | ||
662 | struct inode *inode = nd.path.dentry->d_inode; | ||
663 | param->arg2 = inode->i_sb->s_magic; | ||
664 | } | ||
665 | } | ||
666 | } | ||
667 | |||
668 | out_release: | ||
669 | path_put(&nd.path); | ||
670 | out: | ||
671 | return err; | ||
672 | } | ||
673 | |||
674 | /* | ||
675 | * Our range of ioctl numbers isn't 0 based so we need to shift | ||
676 | * the array index by _IOC_NR(AUTOFS_CTL_IOC_FIRST) for the table | ||
677 | * lookup. | ||
678 | */ | ||
679 | #define cmd_idx(cmd) (cmd - _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST)) | ||
680 | |||
681 | static ioctl_fn lookup_dev_ioctl(unsigned int cmd) | ||
682 | { | ||
683 | static struct { | ||
684 | int cmd; | ||
685 | ioctl_fn fn; | ||
686 | } _ioctls[] = { | ||
687 | {cmd_idx(AUTOFS_DEV_IOCTL_VERSION_CMD), NULL}, | ||
688 | {cmd_idx(AUTOFS_DEV_IOCTL_PROTOVER_CMD), | ||
689 | autofs_dev_ioctl_protover}, | ||
690 | {cmd_idx(AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD), | ||
691 | autofs_dev_ioctl_protosubver}, | ||
692 | {cmd_idx(AUTOFS_DEV_IOCTL_OPENMOUNT_CMD), | ||
693 | autofs_dev_ioctl_openmount}, | ||
694 | {cmd_idx(AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD), | ||
695 | autofs_dev_ioctl_closemount}, | ||
696 | {cmd_idx(AUTOFS_DEV_IOCTL_READY_CMD), | ||
697 | autofs_dev_ioctl_ready}, | ||
698 | {cmd_idx(AUTOFS_DEV_IOCTL_FAIL_CMD), | ||
699 | autofs_dev_ioctl_fail}, | ||
700 | {cmd_idx(AUTOFS_DEV_IOCTL_SETPIPEFD_CMD), | ||
701 | autofs_dev_ioctl_setpipefd}, | ||
702 | {cmd_idx(AUTOFS_DEV_IOCTL_CATATONIC_CMD), | ||
703 | autofs_dev_ioctl_catatonic}, | ||
704 | {cmd_idx(AUTOFS_DEV_IOCTL_TIMEOUT_CMD), | ||
705 | autofs_dev_ioctl_timeout}, | ||
706 | {cmd_idx(AUTOFS_DEV_IOCTL_REQUESTER_CMD), | ||
707 | autofs_dev_ioctl_requester}, | ||
708 | {cmd_idx(AUTOFS_DEV_IOCTL_EXPIRE_CMD), | ||
709 | autofs_dev_ioctl_expire}, | ||
710 | {cmd_idx(AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD), | ||
711 | autofs_dev_ioctl_askumount}, | ||
712 | {cmd_idx(AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD), | ||
713 | autofs_dev_ioctl_ismountpoint} | ||
714 | }; | ||
715 | unsigned int idx = cmd_idx(cmd); | ||
716 | |||
717 | return (idx >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[idx].fn; | ||
718 | } | ||
719 | |||
720 | /* ioctl dispatcher */ | ||
721 | static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __user *user) | ||
722 | { | ||
723 | struct autofs_dev_ioctl *param; | ||
724 | struct file *fp; | ||
725 | struct autofs_sb_info *sbi; | ||
726 | unsigned int cmd_first, cmd; | ||
727 | ioctl_fn fn = NULL; | ||
728 | int err = 0; | ||
729 | |||
730 | /* only root can play with this */ | ||
731 | if (!capable(CAP_SYS_ADMIN)) | ||
732 | return -EPERM; | ||
733 | |||
734 | cmd_first = _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST); | ||
735 | cmd = _IOC_NR(command); | ||
736 | |||
737 | if (_IOC_TYPE(command) != _IOC_TYPE(AUTOFS_DEV_IOCTL_IOC_FIRST) || | ||
738 | cmd - cmd_first >= AUTOFS_DEV_IOCTL_IOC_COUNT) { | ||
739 | return -ENOTTY; | ||
740 | } | ||
741 | |||
742 | /* Copy the parameters into kernel space. */ | ||
743 | param = copy_dev_ioctl(user); | ||
744 | if (IS_ERR(param)) | ||
745 | return PTR_ERR(param); | ||
746 | |||
747 | err = validate_dev_ioctl(command, param); | ||
748 | if (err) | ||
749 | goto out; | ||
750 | |||
751 | /* The validate routine above always sets the version */ | ||
752 | if (cmd == AUTOFS_DEV_IOCTL_VERSION_CMD) | ||
753 | goto done; | ||
754 | |||
755 | fn = lookup_dev_ioctl(cmd); | ||
756 | if (!fn) { | ||
757 | AUTOFS_WARN("unknown command 0x%08x", command); | ||
758 | return -ENOTTY; | ||
759 | } | ||
760 | |||
761 | fp = NULL; | ||
762 | sbi = NULL; | ||
763 | |||
764 | /* | ||
765 | * For obvious reasons the openmount can't have a file | ||
766 | * descriptor yet. We don't take a reference to the | ||
767 | * file during close to allow for immediate release. | ||
768 | */ | ||
769 | if (cmd != AUTOFS_DEV_IOCTL_OPENMOUNT_CMD && | ||
770 | cmd != AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD) { | ||
771 | fp = fget(param->ioctlfd); | ||
772 | if (!fp) { | ||
773 | if (cmd == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) | ||
774 | goto cont; | ||
775 | err = -EBADF; | ||
776 | goto out; | ||
777 | } | ||
778 | |||
779 | if (!fp->f_op) { | ||
780 | err = -ENOTTY; | ||
781 | fput(fp); | ||
782 | goto out; | ||
783 | } | ||
784 | |||
785 | sbi = autofs_dev_ioctl_sbi(fp); | ||
786 | if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) { | ||
787 | err = -EINVAL; | ||
788 | fput(fp); | ||
789 | goto out; | ||
790 | } | ||
791 | |||
792 | /* | ||
793 | * Admin needs to be able to set the mount catatonic in | ||
794 | * order to be able to perform the re-open. | ||
795 | */ | ||
796 | if (!autofs4_oz_mode(sbi) && | ||
797 | cmd != AUTOFS_DEV_IOCTL_CATATONIC_CMD) { | ||
798 | err = -EACCES; | ||
799 | fput(fp); | ||
800 | goto out; | ||
801 | } | ||
802 | } | ||
803 | cont: | ||
804 | err = fn(fp, sbi, param); | ||
805 | |||
806 | if (fp) | ||
807 | fput(fp); | ||
808 | done: | ||
809 | if (err >= 0 && copy_to_user(user, param, AUTOFS_DEV_IOCTL_SIZE)) | ||
810 | err = -EFAULT; | ||
811 | out: | ||
812 | free_dev_ioctl(param); | ||
813 | return err; | ||
814 | } | ||
815 | |||
816 | static long autofs_dev_ioctl(struct file *file, uint command, ulong u) | ||
817 | { | ||
818 | int err; | ||
819 | err = _autofs_dev_ioctl(command, (struct autofs_dev_ioctl __user *) u); | ||
820 | return (long) err; | ||
821 | } | ||
822 | |||
823 | #ifdef CONFIG_COMPAT | ||
824 | static long autofs_dev_ioctl_compat(struct file *file, uint command, ulong u) | ||
825 | { | ||
826 | return (long) autofs_dev_ioctl(file, command, (ulong) compat_ptr(u)); | ||
827 | } | ||
828 | #else | ||
829 | #define autofs_dev_ioctl_compat NULL | ||
830 | #endif | ||
831 | |||
832 | static const struct file_operations _dev_ioctl_fops = { | ||
833 | .unlocked_ioctl = autofs_dev_ioctl, | ||
834 | .compat_ioctl = autofs_dev_ioctl_compat, | ||
835 | .owner = THIS_MODULE, | ||
836 | }; | ||
837 | |||
838 | static struct miscdevice _autofs_dev_ioctl_misc = { | ||
839 | .minor = MISC_DYNAMIC_MINOR, | ||
840 | .name = AUTOFS_DEVICE_NAME, | ||
841 | .fops = &_dev_ioctl_fops | ||
842 | }; | ||
843 | |||
844 | /* Register/deregister misc character device */ | ||
845 | int autofs_dev_ioctl_init(void) | ||
846 | { | ||
847 | int r; | ||
848 | |||
849 | r = misc_register(&_autofs_dev_ioctl_misc); | ||
850 | if (r) { | ||
851 | AUTOFS_ERROR("misc_register failed for control device"); | ||
852 | return r; | ||
853 | } | ||
854 | |||
855 | return 0; | ||
856 | } | ||
857 | |||
858 | void autofs_dev_ioctl_exit(void) | ||
859 | { | ||
860 | misc_deregister(&_autofs_dev_ioctl_misc); | ||
861 | return; | ||
862 | } | ||
863 | |||
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index cdabb796ff01..cde2f8e8935a 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c | |||
@@ -244,10 +244,10 @@ cont: | |||
244 | } | 244 | } |
245 | 245 | ||
246 | /* Check if we can expire a direct mount (possibly a tree) */ | 246 | /* Check if we can expire a direct mount (possibly a tree) */ |
247 | static struct dentry *autofs4_expire_direct(struct super_block *sb, | 247 | struct dentry *autofs4_expire_direct(struct super_block *sb, |
248 | struct vfsmount *mnt, | 248 | struct vfsmount *mnt, |
249 | struct autofs_sb_info *sbi, | 249 | struct autofs_sb_info *sbi, |
250 | int how) | 250 | int how) |
251 | { | 251 | { |
252 | unsigned long timeout; | 252 | unsigned long timeout; |
253 | struct dentry *root = dget(sb->s_root); | 253 | struct dentry *root = dget(sb->s_root); |
@@ -283,10 +283,10 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb, | |||
283 | * - it is unused by any user process | 283 | * - it is unused by any user process |
284 | * - it has been unused for exp_timeout time | 284 | * - it has been unused for exp_timeout time |
285 | */ | 285 | */ |
286 | static struct dentry *autofs4_expire_indirect(struct super_block *sb, | 286 | struct dentry *autofs4_expire_indirect(struct super_block *sb, |
287 | struct vfsmount *mnt, | 287 | struct vfsmount *mnt, |
288 | struct autofs_sb_info *sbi, | 288 | struct autofs_sb_info *sbi, |
289 | int how) | 289 | int how) |
290 | { | 290 | { |
291 | unsigned long timeout; | 291 | unsigned long timeout; |
292 | struct dentry *root = sb->s_root; | 292 | struct dentry *root = sb->s_root; |
@@ -479,7 +479,7 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, | |||
479 | if (arg && get_user(do_now, arg)) | 479 | if (arg && get_user(do_now, arg)) |
480 | return -EFAULT; | 480 | return -EFAULT; |
481 | 481 | ||
482 | if (sbi->type & AUTOFS_TYPE_DIRECT) | 482 | if (sbi->type & AUTOFS_TYPE_TRIGGER) |
483 | dentry = autofs4_expire_direct(sb, mnt, sbi, do_now); | 483 | dentry = autofs4_expire_direct(sb, mnt, sbi, do_now); |
484 | else | 484 | else |
485 | dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now); | 485 | dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now); |
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c index 723a1c5e361b..9722e4bd8957 100644 --- a/fs/autofs4/init.c +++ b/fs/autofs4/init.c | |||
@@ -29,11 +29,20 @@ static struct file_system_type autofs_fs_type = { | |||
29 | 29 | ||
30 | static int __init init_autofs4_fs(void) | 30 | static int __init init_autofs4_fs(void) |
31 | { | 31 | { |
32 | return register_filesystem(&autofs_fs_type); | 32 | int err; |
33 | |||
34 | err = register_filesystem(&autofs_fs_type); | ||
35 | if (err) | ||
36 | return err; | ||
37 | |||
38 | autofs_dev_ioctl_init(); | ||
39 | |||
40 | return err; | ||
33 | } | 41 | } |
34 | 42 | ||
35 | static void __exit exit_autofs4_fs(void) | 43 | static void __exit exit_autofs4_fs(void) |
36 | { | 44 | { |
45 | autofs_dev_ioctl_exit(); | ||
37 | unregister_filesystem(&autofs_fs_type); | 46 | unregister_filesystem(&autofs_fs_type); |
38 | } | 47 | } |
39 | 48 | ||
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 7bb3e5ba0537..c7e65bb30ba0 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c | |||
@@ -53,6 +53,8 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino, | |||
53 | atomic_set(&ino->count, 0); | 53 | atomic_set(&ino->count, 0); |
54 | } | 54 | } |
55 | 55 | ||
56 | ino->uid = 0; | ||
57 | ino->gid = 0; | ||
56 | ino->mode = mode; | 58 | ino->mode = mode; |
57 | ino->last_used = jiffies; | 59 | ino->last_used = jiffies; |
58 | 60 | ||
@@ -213,7 +215,7 @@ static const struct super_operations autofs4_sops = { | |||
213 | enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto, | 215 | enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto, |
214 | Opt_indirect, Opt_direct, Opt_offset}; | 216 | Opt_indirect, Opt_direct, Opt_offset}; |
215 | 217 | ||
216 | static match_table_t tokens = { | 218 | static const match_table_t tokens = { |
217 | {Opt_fd, "fd=%u"}, | 219 | {Opt_fd, "fd=%u"}, |
218 | {Opt_uid, "uid=%u"}, | 220 | {Opt_uid, "uid=%u"}, |
219 | {Opt_gid, "gid=%u"}, | 221 | {Opt_gid, "gid=%u"}, |
@@ -288,7 +290,7 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, | |||
288 | *type = AUTOFS_TYPE_DIRECT; | 290 | *type = AUTOFS_TYPE_DIRECT; |
289 | break; | 291 | break; |
290 | case Opt_offset: | 292 | case Opt_offset: |
291 | *type = AUTOFS_TYPE_DIRECT | AUTOFS_TYPE_OFFSET; | 293 | *type = AUTOFS_TYPE_OFFSET; |
292 | break; | 294 | break; |
293 | default: | 295 | default: |
294 | return 1; | 296 | return 1; |
@@ -336,7 +338,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
336 | sbi->sb = s; | 338 | sbi->sb = s; |
337 | sbi->version = 0; | 339 | sbi->version = 0; |
338 | sbi->sub_version = 0; | 340 | sbi->sub_version = 0; |
339 | sbi->type = 0; | 341 | sbi->type = AUTOFS_TYPE_INDIRECT; |
340 | sbi->min_proto = 0; | 342 | sbi->min_proto = 0; |
341 | sbi->max_proto = 0; | 343 | sbi->max_proto = 0; |
342 | mutex_init(&sbi->wq_mutex); | 344 | mutex_init(&sbi->wq_mutex); |
@@ -378,7 +380,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
378 | } | 380 | } |
379 | 381 | ||
380 | root_inode->i_fop = &autofs4_root_operations; | 382 | root_inode->i_fop = &autofs4_root_operations; |
381 | root_inode->i_op = sbi->type & AUTOFS_TYPE_DIRECT ? | 383 | root_inode->i_op = sbi->type & AUTOFS_TYPE_TRIGGER ? |
382 | &autofs4_direct_root_inode_operations : | 384 | &autofs4_direct_root_inode_operations : |
383 | &autofs4_indirect_root_inode_operations; | 385 | &autofs4_indirect_root_inode_operations; |
384 | 386 | ||
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index bcfb2dc0a61b..2a41c2a7fc52 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -36,6 +36,7 @@ const struct file_operations autofs4_root_operations = { | |||
36 | .release = dcache_dir_close, | 36 | .release = dcache_dir_close, |
37 | .read = generic_read_dir, | 37 | .read = generic_read_dir, |
38 | .readdir = dcache_readdir, | 38 | .readdir = dcache_readdir, |
39 | .llseek = dcache_dir_lseek, | ||
39 | .ioctl = autofs4_root_ioctl, | 40 | .ioctl = autofs4_root_ioctl, |
40 | }; | 41 | }; |
41 | 42 | ||
@@ -44,6 +45,7 @@ const struct file_operations autofs4_dir_operations = { | |||
44 | .release = dcache_dir_close, | 45 | .release = dcache_dir_close, |
45 | .read = generic_read_dir, | 46 | .read = generic_read_dir, |
46 | .readdir = dcache_readdir, | 47 | .readdir = dcache_readdir, |
48 | .llseek = dcache_dir_lseek, | ||
47 | }; | 49 | }; |
48 | 50 | ||
49 | const struct inode_operations autofs4_indirect_root_inode_operations = { | 51 | const struct inode_operations autofs4_indirect_root_inode_operations = { |
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 35216d18d8b5..4b67c2a2d77c 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c | |||
@@ -337,7 +337,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | |||
337 | * is very similar for indirect mounts except only dentrys | 337 | * is very similar for indirect mounts except only dentrys |
338 | * in the root of the autofs file system may be negative. | 338 | * in the root of the autofs file system may be negative. |
339 | */ | 339 | */ |
340 | if (sbi->type & (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET)) | 340 | if (sbi->type & AUTOFS_TYPE_TRIGGER) |
341 | return -ENOENT; | 341 | return -ENOENT; |
342 | else if (!IS_ROOT(dentry->d_parent)) | 342 | else if (!IS_ROOT(dentry->d_parent)) |
343 | return -ENOENT; | 343 | return -ENOENT; |
@@ -348,7 +348,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | |||
348 | return -ENOMEM; | 348 | return -ENOMEM; |
349 | 349 | ||
350 | /* If this is a direct mount request create a dummy name */ | 350 | /* If this is a direct mount request create a dummy name */ |
351 | if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT)) | 351 | if (IS_ROOT(dentry) && sbi->type & AUTOFS_TYPE_TRIGGER) |
352 | qstr.len = sprintf(name, "%p", dentry); | 352 | qstr.len = sprintf(name, "%p", dentry); |
353 | else { | 353 | else { |
354 | qstr.len = autofs4_getpath(sbi, dentry, &name); | 354 | qstr.len = autofs4_getpath(sbi, dentry, &name); |
@@ -406,11 +406,11 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | |||
406 | type = autofs_ptype_expire_multi; | 406 | type = autofs_ptype_expire_multi; |
407 | } else { | 407 | } else { |
408 | if (notify == NFY_MOUNT) | 408 | if (notify == NFY_MOUNT) |
409 | type = (sbi->type & AUTOFS_TYPE_DIRECT) ? | 409 | type = (sbi->type & AUTOFS_TYPE_TRIGGER) ? |
410 | autofs_ptype_missing_direct : | 410 | autofs_ptype_missing_direct : |
411 | autofs_ptype_missing_indirect; | 411 | autofs_ptype_missing_indirect; |
412 | else | 412 | else |
413 | type = (sbi->type & AUTOFS_TYPE_DIRECT) ? | 413 | type = (sbi->type & AUTOFS_TYPE_TRIGGER) ? |
414 | autofs_ptype_expire_direct : | 414 | autofs_ptype_expire_direct : |
415 | autofs_ptype_expire_indirect; | 415 | autofs_ptype_expire_indirect; |
416 | } | 416 | } |
@@ -457,6 +457,40 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | |||
457 | 457 | ||
458 | status = wq->status; | 458 | status = wq->status; |
459 | 459 | ||
460 | /* | ||
461 | * For direct and offset mounts we need to track the requester's | ||
462 | * uid and gid in the dentry info struct. This is so it can be | ||
463 | * supplied, on request, by the misc device ioctl interface. | ||
464 | * This is needed during daemon resatart when reconnecting | ||
465 | * to existing, active, autofs mounts. The uid and gid (and | ||
466 | * related string values) may be used for macro substitution | ||
467 | * in autofs mount maps. | ||
468 | */ | ||
469 | if (!status) { | ||
470 | struct autofs_info *ino; | ||
471 | struct dentry *de = NULL; | ||
472 | |||
473 | /* direct mount or browsable map */ | ||
474 | ino = autofs4_dentry_ino(dentry); | ||
475 | if (!ino) { | ||
476 | /* If not lookup actual dentry used */ | ||
477 | de = d_lookup(dentry->d_parent, &dentry->d_name); | ||
478 | if (de) | ||
479 | ino = autofs4_dentry_ino(de); | ||
480 | } | ||
481 | |||
482 | /* Set mount requester */ | ||
483 | if (ino) { | ||
484 | spin_lock(&sbi->fs_lock); | ||
485 | ino->uid = wq->uid; | ||
486 | ino->gid = wq->gid; | ||
487 | spin_unlock(&sbi->fs_lock); | ||
488 | } | ||
489 | |||
490 | if (de) | ||
491 | dput(de); | ||
492 | } | ||
493 | |||
460 | /* Are we the last process to need status? */ | 494 | /* Are we the last process to need status? */ |
461 | mutex_lock(&sbi->wq_mutex); | 495 | mutex_lock(&sbi->wq_mutex); |
462 | if (!--wq->wait_ctr) | 496 | if (!--wq->wait_ctr) |
diff --git a/fs/befs/befs_fs_types.h b/fs/befs/befs_fs_types.h index e2595c2c403a..7893eaa1e58c 100644 --- a/fs/befs/befs_fs_types.h +++ b/fs/befs/befs_fs_types.h | |||
@@ -55,8 +55,12 @@ enum super_flags { | |||
55 | }; | 55 | }; |
56 | 56 | ||
57 | #define BEFS_BYTEORDER_NATIVE 0x42494745 | 57 | #define BEFS_BYTEORDER_NATIVE 0x42494745 |
58 | #define BEFS_BYTEORDER_NATIVE_LE (__force fs32)cpu_to_le32(BEFS_BYTEORDER_NATIVE) | ||
59 | #define BEFS_BYTEORDER_NATIVE_BE (__force fs32)cpu_to_be32(BEFS_BYTEORDER_NATIVE) | ||
58 | 60 | ||
59 | #define BEFS_SUPER_MAGIC BEFS_SUPER_MAGIC1 | 61 | #define BEFS_SUPER_MAGIC BEFS_SUPER_MAGIC1 |
62 | #define BEFS_SUPER_MAGIC1_LE (__force fs32)cpu_to_le32(BEFS_SUPER_MAGIC1) | ||
63 | #define BEFS_SUPER_MAGIC1_BE (__force fs32)cpu_to_be32(BEFS_SUPER_MAGIC1) | ||
60 | 64 | ||
61 | /* | 65 | /* |
62 | * Flags of inode | 66 | * Flags of inode |
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 02c6e62b72f8..b6dfee37c7b7 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -66,6 +66,7 @@ static struct kmem_cache *befs_inode_cachep; | |||
66 | static const struct file_operations befs_dir_operations = { | 66 | static const struct file_operations befs_dir_operations = { |
67 | .read = generic_read_dir, | 67 | .read = generic_read_dir, |
68 | .readdir = befs_readdir, | 68 | .readdir = befs_readdir, |
69 | .llseek = generic_file_llseek, | ||
69 | }; | 70 | }; |
70 | 71 | ||
71 | static const struct inode_operations befs_dir_inode_operations = { | 72 | static const struct inode_operations befs_dir_inode_operations = { |
@@ -649,7 +650,7 @@ enum { | |||
649 | Opt_uid, Opt_gid, Opt_charset, Opt_debug, Opt_err, | 650 | Opt_uid, Opt_gid, Opt_charset, Opt_debug, Opt_err, |
650 | }; | 651 | }; |
651 | 652 | ||
652 | static match_table_t befs_tokens = { | 653 | static const match_table_t befs_tokens = { |
653 | {Opt_uid, "uid=%d"}, | 654 | {Opt_uid, "uid=%d"}, |
654 | {Opt_gid, "gid=%d"}, | 655 | {Opt_gid, "gid=%d"}, |
655 | {Opt_charset, "iocharset=%s"}, | 656 | {Opt_charset, "iocharset=%s"}, |
@@ -808,8 +809,8 @@ befs_fill_super(struct super_block *sb, void *data, int silent) | |||
808 | 809 | ||
809 | /* account for offset of super block on x86 */ | 810 | /* account for offset of super block on x86 */ |
810 | disk_sb = (befs_super_block *) bh->b_data; | 811 | disk_sb = (befs_super_block *) bh->b_data; |
811 | if ((le32_to_cpu(disk_sb->magic1) == BEFS_SUPER_MAGIC1) || | 812 | if ((disk_sb->magic1 == BEFS_SUPER_MAGIC1_LE) || |
812 | (be32_to_cpu(disk_sb->magic1) == BEFS_SUPER_MAGIC1)) { | 813 | (disk_sb->magic1 == BEFS_SUPER_MAGIC1_BE)) { |
813 | befs_debug(sb, "Using PPC superblock location"); | 814 | befs_debug(sb, "Using PPC superblock location"); |
814 | } else { | 815 | } else { |
815 | befs_debug(sb, "Using x86 superblock location"); | 816 | befs_debug(sb, "Using x86 superblock location"); |
diff --git a/fs/befs/super.c b/fs/befs/super.c index 8c3401ff6d6a..41f2b4d0093e 100644 --- a/fs/befs/super.c +++ b/fs/befs/super.c | |||
@@ -26,10 +26,10 @@ befs_load_sb(struct super_block *sb, befs_super_block * disk_sb) | |||
26 | befs_sb_info *befs_sb = BEFS_SB(sb); | 26 | befs_sb_info *befs_sb = BEFS_SB(sb); |
27 | 27 | ||
28 | /* Check the byte order of the filesystem */ | 28 | /* Check the byte order of the filesystem */ |
29 | if (le32_to_cpu(disk_sb->fs_byte_order) == BEFS_BYTEORDER_NATIVE) | 29 | if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_LE) |
30 | befs_sb->byte_order = BEFS_BYTESEX_LE; | 30 | befs_sb->byte_order = BEFS_BYTESEX_LE; |
31 | else if (be32_to_cpu(disk_sb->fs_byte_order) == BEFS_BYTEORDER_NATIVE) | 31 | else if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_BE) |
32 | befs_sb->byte_order = BEFS_BYTESEX_BE; | 32 | befs_sb->byte_order = BEFS_BYTESEX_BE; |
33 | 33 | ||
34 | befs_sb->magic1 = fs32_to_cpu(sb, disk_sb->magic1); | 34 | befs_sb->magic1 = fs32_to_cpu(sb, disk_sb->magic1); |
35 | befs_sb->magic2 = fs32_to_cpu(sb, disk_sb->magic2); | 35 | befs_sb->magic2 = fs32_to_cpu(sb, disk_sb->magic2); |
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 87ee5ccee348..ed8feb052df9 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c | |||
@@ -125,8 +125,8 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
125 | inode->i_ino); | 125 | inode->i_ino); |
126 | if (err) { | 126 | if (err) { |
127 | inode_dec_link_count(inode); | 127 | inode_dec_link_count(inode); |
128 | iput(inode); | ||
129 | mutex_unlock(&info->bfs_lock); | 128 | mutex_unlock(&info->bfs_lock); |
129 | iput(inode); | ||
130 | return err; | 130 | return err; |
131 | } | 131 | } |
132 | mutex_unlock(&info->bfs_lock); | 132 | mutex_unlock(&info->bfs_lock); |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 655ed8d30a86..83d72006e29d 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -683,7 +683,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
683 | * switch really is going to happen - do this in | 683 | * switch really is going to happen - do this in |
684 | * flush_thread(). - akpm | 684 | * flush_thread(). - akpm |
685 | */ | 685 | */ |
686 | SET_PERSONALITY(loc->elf_ex, 0); | 686 | SET_PERSONALITY(loc->elf_ex); |
687 | 687 | ||
688 | interpreter = open_exec(elf_interpreter); | 688 | interpreter = open_exec(elf_interpreter); |
689 | retval = PTR_ERR(interpreter); | 689 | retval = PTR_ERR(interpreter); |
@@ -734,7 +734,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
734 | goto out_free_dentry; | 734 | goto out_free_dentry; |
735 | } else { | 735 | } else { |
736 | /* Executables without an interpreter also need a personality */ | 736 | /* Executables without an interpreter also need a personality */ |
737 | SET_PERSONALITY(loc->elf_ex, 0); | 737 | SET_PERSONALITY(loc->elf_ex); |
738 | } | 738 | } |
739 | 739 | ||
740 | /* Flush all traces of the currently running executable */ | 740 | /* Flush all traces of the currently running executable */ |
@@ -748,7 +748,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
748 | 748 | ||
749 | /* Do this immediately, since STACK_TOP as used in setup_arg_pages | 749 | /* Do this immediately, since STACK_TOP as used in setup_arg_pages |
750 | may depend on the personality. */ | 750 | may depend on the personality. */ |
751 | SET_PERSONALITY(loc->elf_ex, 0); | 751 | SET_PERSONALITY(loc->elf_ex); |
752 | if (elf_read_implies_exec(loc->elf_ex, executable_stack)) | 752 | if (elf_read_implies_exec(loc->elf_ex, executable_stack)) |
753 | current->personality |= READ_IMPLIES_EXEC; | 753 | current->personality |= READ_IMPLIES_EXEC; |
754 | 754 | ||
@@ -1333,20 +1333,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus, | |||
1333 | prstatus->pr_pgrp = task_pgrp_vnr(p); | 1333 | prstatus->pr_pgrp = task_pgrp_vnr(p); |
1334 | prstatus->pr_sid = task_session_vnr(p); | 1334 | prstatus->pr_sid = task_session_vnr(p); |
1335 | if (thread_group_leader(p)) { | 1335 | if (thread_group_leader(p)) { |
1336 | struct task_cputime cputime; | ||
1337 | |||
1336 | /* | 1338 | /* |
1337 | * This is the record for the group leader. Add in the | 1339 | * This is the record for the group leader. It shows the |
1338 | * cumulative times of previous dead threads. This total | 1340 | * group-wide total, not its individual thread total. |
1339 | * won't include the time of each live thread whose state | ||
1340 | * is included in the core dump. The final total reported | ||
1341 | * to our parent process when it calls wait4 will include | ||
1342 | * those sums as well as the little bit more time it takes | ||
1343 | * this and each other thread to finish dying after the | ||
1344 | * core dump synchronization phase. | ||
1345 | */ | 1341 | */ |
1346 | cputime_to_timeval(cputime_add(p->utime, p->signal->utime), | 1342 | thread_group_cputime(p, &cputime); |
1347 | &prstatus->pr_utime); | 1343 | cputime_to_timeval(cputime.utime, &prstatus->pr_utime); |
1348 | cputime_to_timeval(cputime_add(p->stime, p->signal->stime), | 1344 | cputime_to_timeval(cputime.stime, &prstatus->pr_stime); |
1349 | &prstatus->pr_stime); | ||
1350 | } else { | 1345 | } else { |
1351 | cputime_to_timeval(p->utime, &prstatus->pr_utime); | 1346 | cputime_to_timeval(p->utime, &prstatus->pr_utime); |
1352 | cputime_to_timeval(p->stime, &prstatus->pr_stime); | 1347 | cputime_to_timeval(p->stime, &prstatus->pr_stime); |
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 80c1f952ef78..0e8367c54624 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/fcntl.h> | 25 | #include <linux/fcntl.h> |
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/pagemap.h> | 27 | #include <linux/pagemap.h> |
28 | #include <linux/security.h> | ||
28 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
29 | #include <linux/highuid.h> | 30 | #include <linux/highuid.h> |
30 | #include <linux/personality.h> | 31 | #include <linux/personality.h> |
@@ -455,8 +456,19 @@ error_kill: | |||
455 | } | 456 | } |
456 | 457 | ||
457 | /*****************************************************************************/ | 458 | /*****************************************************************************/ |
459 | |||
460 | #ifndef ELF_BASE_PLATFORM | ||
458 | /* | 461 | /* |
459 | * present useful information to the program | 462 | * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture. |
463 | * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value | ||
464 | * will be copied to the user stack in the same manner as AT_PLATFORM. | ||
465 | */ | ||
466 | #define ELF_BASE_PLATFORM NULL | ||
467 | #endif | ||
468 | |||
469 | /* | ||
470 | * present useful information to the program by shovelling it onto the new | ||
471 | * process's stack | ||
460 | */ | 472 | */ |
461 | static int create_elf_fdpic_tables(struct linux_binprm *bprm, | 473 | static int create_elf_fdpic_tables(struct linux_binprm *bprm, |
462 | struct mm_struct *mm, | 474 | struct mm_struct *mm, |
@@ -466,15 +478,19 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
466 | unsigned long sp, csp, nitems; | 478 | unsigned long sp, csp, nitems; |
467 | elf_caddr_t __user *argv, *envp; | 479 | elf_caddr_t __user *argv, *envp; |
468 | size_t platform_len = 0, len; | 480 | size_t platform_len = 0, len; |
469 | char *k_platform; | 481 | char *k_platform, *k_base_platform; |
470 | char __user *u_platform, *p; | 482 | char __user *u_platform, *u_base_platform, *p; |
471 | long hwcap; | 483 | long hwcap; |
472 | int loop; | 484 | int loop; |
473 | int nr; /* reset for each csp adjustment */ | 485 | int nr; /* reset for each csp adjustment */ |
474 | 486 | ||
475 | /* we're going to shovel a whole load of stuff onto the stack */ | ||
476 | #ifdef CONFIG_MMU | 487 | #ifdef CONFIG_MMU |
477 | sp = bprm->p; | 488 | /* In some cases (e.g. Hyper-Threading), we want to avoid L1 evictions |
489 | * by the processes running on the same package. One thing we can do is | ||
490 | * to shuffle the initial stack for them, so we give the architecture | ||
491 | * an opportunity to do so here. | ||
492 | */ | ||
493 | sp = arch_align_stack(bprm->p); | ||
478 | #else | 494 | #else |
479 | sp = mm->start_stack; | 495 | sp = mm->start_stack; |
480 | 496 | ||
@@ -483,11 +499,14 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
483 | return -EFAULT; | 499 | return -EFAULT; |
484 | #endif | 500 | #endif |
485 | 501 | ||
486 | /* get hold of platform and hardware capabilities masks for the machine | ||
487 | * we are running on. In some cases (Sparc), this info is impossible | ||
488 | * to get, in others (i386) it is merely difficult. | ||
489 | */ | ||
490 | hwcap = ELF_HWCAP; | 502 | hwcap = ELF_HWCAP; |
503 | |||
504 | /* | ||
505 | * If this architecture has a platform capability string, copy it | ||
506 | * to userspace. In some cases (Sparc), this info is impossible | ||
507 | * for userspace to get any other way, in others (i386) it is | ||
508 | * merely difficult. | ||
509 | */ | ||
491 | k_platform = ELF_PLATFORM; | 510 | k_platform = ELF_PLATFORM; |
492 | u_platform = NULL; | 511 | u_platform = NULL; |
493 | 512 | ||
@@ -499,19 +518,20 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
499 | return -EFAULT; | 518 | return -EFAULT; |
500 | } | 519 | } |
501 | 520 | ||
502 | #if defined(__i386__) && defined(CONFIG_SMP) | 521 | /* |
503 | /* in some cases (e.g. Hyper-Threading), we want to avoid L1 evictions | 522 | * If this architecture has a "base" platform capability |
504 | * by the processes running on the same package. One thing we can do is | 523 | * string, copy it to userspace. |
505 | * to shuffle the initial stack for them. | ||
506 | * | ||
507 | * the conditionals here are unneeded, but kept in to make the code | ||
508 | * behaviour the same as pre change unless we have hyperthreaded | ||
509 | * processors. This keeps Mr Marcelo Person happier but should be | ||
510 | * removed for 2.5 | ||
511 | */ | 524 | */ |
512 | if (smp_num_siblings > 1) | 525 | k_base_platform = ELF_BASE_PLATFORM; |
513 | sp = sp - ((current->pid % 64) << 7); | 526 | u_base_platform = NULL; |
514 | #endif | 527 | |
528 | if (k_base_platform) { | ||
529 | platform_len = strlen(k_base_platform) + 1; | ||
530 | sp -= platform_len; | ||
531 | u_base_platform = (char __user *) sp; | ||
532 | if (__copy_to_user(u_base_platform, k_base_platform, platform_len) != 0) | ||
533 | return -EFAULT; | ||
534 | } | ||
515 | 535 | ||
516 | sp &= ~7UL; | 536 | sp &= ~7UL; |
517 | 537 | ||
@@ -541,9 +561,13 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
541 | } | 561 | } |
542 | 562 | ||
543 | /* force 16 byte _final_ alignment here for generality */ | 563 | /* force 16 byte _final_ alignment here for generality */ |
544 | #define DLINFO_ITEMS 13 | 564 | #define DLINFO_ITEMS 15 |
565 | |||
566 | nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0) + | ||
567 | (k_base_platform ? 1 : 0) + AT_VECTOR_SIZE_ARCH; | ||
545 | 568 | ||
546 | nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0) + AT_VECTOR_SIZE_ARCH; | 569 | if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) |
570 | nitems++; | ||
547 | 571 | ||
548 | csp = sp; | 572 | csp = sp; |
549 | sp -= nitems * 2 * sizeof(unsigned long); | 573 | sp -= nitems * 2 * sizeof(unsigned long); |
@@ -575,6 +599,19 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
575 | (elf_addr_t) (unsigned long) u_platform); | 599 | (elf_addr_t) (unsigned long) u_platform); |
576 | } | 600 | } |
577 | 601 | ||
602 | if (k_base_platform) { | ||
603 | nr = 0; | ||
604 | csp -= 2 * sizeof(unsigned long); | ||
605 | NEW_AUX_ENT(AT_BASE_PLATFORM, | ||
606 | (elf_addr_t) (unsigned long) u_base_platform); | ||
607 | } | ||
608 | |||
609 | if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) { | ||
610 | nr = 0; | ||
611 | csp -= 2 * sizeof(unsigned long); | ||
612 | NEW_AUX_ENT(AT_EXECFD, bprm->interp_data); | ||
613 | } | ||
614 | |||
578 | nr = 0; | 615 | nr = 0; |
579 | csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long); | 616 | csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long); |
580 | NEW_AUX_ENT(AT_HWCAP, hwcap); | 617 | NEW_AUX_ENT(AT_HWCAP, hwcap); |
@@ -590,6 +627,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
590 | NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->euid); | 627 | NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->euid); |
591 | NEW_AUX_ENT(AT_GID, (elf_addr_t) current->gid); | 628 | NEW_AUX_ENT(AT_GID, (elf_addr_t) current->gid); |
592 | NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->egid); | 629 | NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->egid); |
630 | NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); | ||
631 | NEW_AUX_ENT(AT_EXECFN, bprm->exec); | ||
593 | 632 | ||
594 | #ifdef ARCH_DLINFO | 633 | #ifdef ARCH_DLINFO |
595 | nr = 0; | 634 | nr = 0; |
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index f9c88d0c8ced..32fb00b52cd0 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c | |||
@@ -43,7 +43,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) | |||
43 | return -ENOEXEC; | 43 | return -ENOEXEC; |
44 | } | 44 | } |
45 | 45 | ||
46 | bprm->sh_bang = 1; /* Well, the bang-shell is implicit... */ | 46 | bprm->recursion_depth++; /* Well, the bang-shell is implicit... */ |
47 | allow_write_access(bprm->file); | 47 | allow_write_access(bprm->file); |
48 | fput(bprm->file); | 48 | fput(bprm->file); |
49 | bprm->file = NULL; | 49 | bprm->file = NULL; |
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index dfc0197905ca..ccb781a6a804 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
@@ -229,13 +229,13 @@ static int decompress_exec( | |||
229 | ret = 10; | 229 | ret = 10; |
230 | if (buf[3] & EXTRA_FIELD) { | 230 | if (buf[3] & EXTRA_FIELD) { |
231 | ret += 2 + buf[10] + (buf[11] << 8); | 231 | ret += 2 + buf[10] + (buf[11] << 8); |
232 | if (unlikely(LBUFSIZE == ret)) { | 232 | if (unlikely(LBUFSIZE <= ret)) { |
233 | DBG_FLT("binfmt_flat: buffer overflow (EXTRA)?\n"); | 233 | DBG_FLT("binfmt_flat: buffer overflow (EXTRA)?\n"); |
234 | goto out_free_buf; | 234 | goto out_free_buf; |
235 | } | 235 | } |
236 | } | 236 | } |
237 | if (buf[3] & ORIG_NAME) { | 237 | if (buf[3] & ORIG_NAME) { |
238 | for (; ret < LBUFSIZE && (buf[ret] != 0); ret++) | 238 | while (ret < LBUFSIZE && buf[ret++] != 0) |
239 | ; | 239 | ; |
240 | if (unlikely(LBUFSIZE == ret)) { | 240 | if (unlikely(LBUFSIZE == ret)) { |
241 | DBG_FLT("binfmt_flat: buffer overflow (ORIG_NAME)?\n"); | 241 | DBG_FLT("binfmt_flat: buffer overflow (ORIG_NAME)?\n"); |
@@ -243,7 +243,7 @@ static int decompress_exec( | |||
243 | } | 243 | } |
244 | } | 244 | } |
245 | if (buf[3] & COMMENT) { | 245 | if (buf[3] & COMMENT) { |
246 | for (; ret < LBUFSIZE && (buf[ret] != 0); ret++) | 246 | while (ret < LBUFSIZE && buf[ret++] != 0) |
247 | ; | 247 | ; |
248 | if (unlikely(LBUFSIZE == ret)) { | 248 | if (unlikely(LBUFSIZE == ret)) { |
249 | DBG_FLT("binfmt_flat: buffer overflow (COMMENT)?\n"); | 249 | DBG_FLT("binfmt_flat: buffer overflow (COMMENT)?\n"); |
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 8d7e88e02e0f..f2744ab4e5b3 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c | |||
@@ -117,7 +117,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
117 | goto _ret; | 117 | goto _ret; |
118 | 118 | ||
119 | retval = -ENOEXEC; | 119 | retval = -ENOEXEC; |
120 | if (bprm->misc_bang) | 120 | if (bprm->recursion_depth > BINPRM_MAX_RECURSION) |
121 | goto _ret; | 121 | goto _ret; |
122 | 122 | ||
123 | /* to keep locking time low, we copy the interpreter string */ | 123 | /* to keep locking time low, we copy the interpreter string */ |
@@ -197,7 +197,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
197 | if (retval < 0) | 197 | if (retval < 0) |
198 | goto _error; | 198 | goto _error; |
199 | 199 | ||
200 | bprm->misc_bang = 1; | 200 | bprm->recursion_depth++; |
201 | 201 | ||
202 | retval = search_binary_handler (bprm, regs); | 202 | retval = search_binary_handler (bprm, regs); |
203 | if (retval < 0) | 203 | if (retval < 0) |
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 9e3963f7ebf1..08343505e184 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c | |||
@@ -22,14 +22,15 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) | |||
22 | char interp[BINPRM_BUF_SIZE]; | 22 | char interp[BINPRM_BUF_SIZE]; |
23 | int retval; | 23 | int retval; |
24 | 24 | ||
25 | if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!') || (bprm->sh_bang)) | 25 | if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!') || |
26 | (bprm->recursion_depth > BINPRM_MAX_RECURSION)) | ||
26 | return -ENOEXEC; | 27 | return -ENOEXEC; |
27 | /* | 28 | /* |
28 | * This section does the #! interpretation. | 29 | * This section does the #! interpretation. |
29 | * Sorta complicated, but hopefully it will work. -TYT | 30 | * Sorta complicated, but hopefully it will work. -TYT |
30 | */ | 31 | */ |
31 | 32 | ||
32 | bprm->sh_bang = 1; | 33 | bprm->recursion_depth++; |
33 | allow_write_access(bprm->file); | 34 | allow_write_access(bprm->file); |
34 | fput(bprm->file); | 35 | fput(bprm->file); |
35 | bprm->file = NULL; | 36 | bprm->file = NULL; |
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index 68be580ba289..74e587a52796 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c | |||
@@ -306,3 +306,5 @@ static void __exit exit_som_binfmt(void) | |||
306 | 306 | ||
307 | core_initcall(init_som_binfmt); | 307 | core_initcall(init_som_binfmt); |
308 | module_exit(exit_som_binfmt); | 308 | module_exit(exit_som_binfmt); |
309 | |||
310 | MODULE_LICENSE("GPL"); | ||
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index c3e174b35fe6..19caf7c962ac 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c | |||
@@ -107,7 +107,8 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs) | |||
107 | BUG_ON(bip == NULL); | 107 | BUG_ON(bip == NULL); |
108 | 108 | ||
109 | /* A cloned bio doesn't own the integrity metadata */ | 109 | /* A cloned bio doesn't own the integrity metadata */ |
110 | if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL) | 110 | if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY) |
111 | && bip->bip_buf != NULL) | ||
111 | kfree(bip->bip_buf); | 112 | kfree(bip->bip_buf); |
112 | 113 | ||
113 | mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); | 114 | mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); |
@@ -150,6 +151,24 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, | |||
150 | } | 151 | } |
151 | EXPORT_SYMBOL(bio_integrity_add_page); | 152 | EXPORT_SYMBOL(bio_integrity_add_page); |
152 | 153 | ||
154 | static int bdev_integrity_enabled(struct block_device *bdev, int rw) | ||
155 | { | ||
156 | struct blk_integrity *bi = bdev_get_integrity(bdev); | ||
157 | |||
158 | if (bi == NULL) | ||
159 | return 0; | ||
160 | |||
161 | if (rw == READ && bi->verify_fn != NULL && | ||
162 | (bi->flags & INTEGRITY_FLAG_READ)) | ||
163 | return 1; | ||
164 | |||
165 | if (rw == WRITE && bi->generate_fn != NULL && | ||
166 | (bi->flags & INTEGRITY_FLAG_WRITE)) | ||
167 | return 1; | ||
168 | |||
169 | return 0; | ||
170 | } | ||
171 | |||
153 | /** | 172 | /** |
154 | * bio_integrity_enabled - Check whether integrity can be passed | 173 | * bio_integrity_enabled - Check whether integrity can be passed |
155 | * @bio: bio to check | 174 | * @bio: bio to check |
@@ -313,6 +332,14 @@ static void bio_integrity_generate(struct bio *bio) | |||
313 | } | 332 | } |
314 | } | 333 | } |
315 | 334 | ||
335 | static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) | ||
336 | { | ||
337 | if (bi) | ||
338 | return bi->tuple_size; | ||
339 | |||
340 | return 0; | ||
341 | } | ||
342 | |||
316 | /** | 343 | /** |
317 | * bio_integrity_prep - Prepare bio for integrity I/O | 344 | * bio_integrity_prep - Prepare bio for integrity I/O |
318 | * @bio: bio to prepare | 345 | * @bio: bio to prepare |
@@ -30,7 +30,7 @@ | |||
30 | 30 | ||
31 | static struct kmem_cache *bio_slab __read_mostly; | 31 | static struct kmem_cache *bio_slab __read_mostly; |
32 | 32 | ||
33 | mempool_t *bio_split_pool __read_mostly; | 33 | static mempool_t *bio_split_pool __read_mostly; |
34 | 34 | ||
35 | /* | 35 | /* |
36 | * if you change this list, also change bvec_alloc or things will | 36 | * if you change this list, also change bvec_alloc or things will |
@@ -60,25 +60,46 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct | |||
60 | struct bio_vec *bvl; | 60 | struct bio_vec *bvl; |
61 | 61 | ||
62 | /* | 62 | /* |
63 | * see comment near bvec_array define! | 63 | * If 'bs' is given, lookup the pool and do the mempool alloc. |
64 | * If not, this is a bio_kmalloc() allocation and just do a | ||
65 | * kzalloc() for the exact number of vecs right away. | ||
64 | */ | 66 | */ |
65 | switch (nr) { | 67 | if (bs) { |
66 | case 1 : *idx = 0; break; | 68 | /* |
67 | case 2 ... 4: *idx = 1; break; | 69 | * see comment near bvec_array define! |
68 | case 5 ... 16: *idx = 2; break; | 70 | */ |
69 | case 17 ... 64: *idx = 3; break; | 71 | switch (nr) { |
70 | case 65 ... 128: *idx = 4; break; | 72 | case 1: |
71 | case 129 ... BIO_MAX_PAGES: *idx = 5; break; | 73 | *idx = 0; |
74 | break; | ||
75 | case 2 ... 4: | ||
76 | *idx = 1; | ||
77 | break; | ||
78 | case 5 ... 16: | ||
79 | *idx = 2; | ||
80 | break; | ||
81 | case 17 ... 64: | ||
82 | *idx = 3; | ||
83 | break; | ||
84 | case 65 ... 128: | ||
85 | *idx = 4; | ||
86 | break; | ||
87 | case 129 ... BIO_MAX_PAGES: | ||
88 | *idx = 5; | ||
89 | break; | ||
72 | default: | 90 | default: |
73 | return NULL; | 91 | return NULL; |
74 | } | 92 | } |
75 | /* | ||
76 | * idx now points to the pool we want to allocate from | ||
77 | */ | ||
78 | 93 | ||
79 | bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); | 94 | /* |
80 | if (bvl) | 95 | * idx now points to the pool we want to allocate from |
81 | memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); | 96 | */ |
97 | bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); | ||
98 | if (bvl) | ||
99 | memset(bvl, 0, | ||
100 | bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); | ||
101 | } else | ||
102 | bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask); | ||
82 | 103 | ||
83 | return bvl; | 104 | return bvl; |
84 | } | 105 | } |
@@ -107,10 +128,17 @@ static void bio_fs_destructor(struct bio *bio) | |||
107 | bio_free(bio, fs_bio_set); | 128 | bio_free(bio, fs_bio_set); |
108 | } | 129 | } |
109 | 130 | ||
131 | static void bio_kmalloc_destructor(struct bio *bio) | ||
132 | { | ||
133 | kfree(bio->bi_io_vec); | ||
134 | kfree(bio); | ||
135 | } | ||
136 | |||
110 | void bio_init(struct bio *bio) | 137 | void bio_init(struct bio *bio) |
111 | { | 138 | { |
112 | memset(bio, 0, sizeof(*bio)); | 139 | memset(bio, 0, sizeof(*bio)); |
113 | bio->bi_flags = 1 << BIO_UPTODATE; | 140 | bio->bi_flags = 1 << BIO_UPTODATE; |
141 | bio->bi_comp_cpu = -1; | ||
114 | atomic_set(&bio->bi_cnt, 1); | 142 | atomic_set(&bio->bi_cnt, 1); |
115 | } | 143 | } |
116 | 144 | ||
@@ -118,19 +146,25 @@ void bio_init(struct bio *bio) | |||
118 | * bio_alloc_bioset - allocate a bio for I/O | 146 | * bio_alloc_bioset - allocate a bio for I/O |
119 | * @gfp_mask: the GFP_ mask given to the slab allocator | 147 | * @gfp_mask: the GFP_ mask given to the slab allocator |
120 | * @nr_iovecs: number of iovecs to pre-allocate | 148 | * @nr_iovecs: number of iovecs to pre-allocate |
121 | * @bs: the bio_set to allocate from | 149 | * @bs: the bio_set to allocate from. If %NULL, just use kmalloc |
122 | * | 150 | * |
123 | * Description: | 151 | * Description: |
124 | * bio_alloc_bioset will first try it's on mempool to satisfy the allocation. | 152 | * bio_alloc_bioset will first try its own mempool to satisfy the allocation. |
125 | * If %__GFP_WAIT is set then we will block on the internal pool waiting | 153 | * If %__GFP_WAIT is set then we will block on the internal pool waiting |
126 | * for a &struct bio to become free. | 154 | * for a &struct bio to become free. If a %NULL @bs is passed in, we will |
155 | * fall back to just using @kmalloc to allocate the required memory. | ||
127 | * | 156 | * |
128 | * allocate bio and iovecs from the memory pools specified by the | 157 | * allocate bio and iovecs from the memory pools specified by the |
129 | * bio_set structure. | 158 | * bio_set structure, or @kmalloc if none given. |
130 | **/ | 159 | **/ |
131 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | 160 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) |
132 | { | 161 | { |
133 | struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask); | 162 | struct bio *bio; |
163 | |||
164 | if (bs) | ||
165 | bio = mempool_alloc(bs->bio_pool, gfp_mask); | ||
166 | else | ||
167 | bio = kmalloc(sizeof(*bio), gfp_mask); | ||
134 | 168 | ||
135 | if (likely(bio)) { | 169 | if (likely(bio)) { |
136 | struct bio_vec *bvl = NULL; | 170 | struct bio_vec *bvl = NULL; |
@@ -141,7 +175,10 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | |||
141 | 175 | ||
142 | bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); | 176 | bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); |
143 | if (unlikely(!bvl)) { | 177 | if (unlikely(!bvl)) { |
144 | mempool_free(bio, bs->bio_pool); | 178 | if (bs) |
179 | mempool_free(bio, bs->bio_pool); | ||
180 | else | ||
181 | kfree(bio); | ||
145 | bio = NULL; | 182 | bio = NULL; |
146 | goto out; | 183 | goto out; |
147 | } | 184 | } |
@@ -164,6 +201,23 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) | |||
164 | return bio; | 201 | return bio; |
165 | } | 202 | } |
166 | 203 | ||
204 | /* | ||
205 | * Like bio_alloc(), but doesn't use a mempool backing. This means that | ||
206 | * it CAN fail, but while bio_alloc() can only be used for allocations | ||
207 | * that have a short (finite) life span, bio_kmalloc() should be used | ||
208 | * for more permanent bio allocations (like allocating some bio's for | ||
209 | * initalization or setup purposes). | ||
210 | */ | ||
211 | struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) | ||
212 | { | ||
213 | struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); | ||
214 | |||
215 | if (bio) | ||
216 | bio->bi_destructor = bio_kmalloc_destructor; | ||
217 | |||
218 | return bio; | ||
219 | } | ||
220 | |||
167 | void zero_fill_bio(struct bio *bio) | 221 | void zero_fill_bio(struct bio *bio) |
168 | { | 222 | { |
169 | unsigned long flags; | 223 | unsigned long flags; |
@@ -208,14 +262,6 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio) | |||
208 | return bio->bi_phys_segments; | 262 | return bio->bi_phys_segments; |
209 | } | 263 | } |
210 | 264 | ||
211 | inline int bio_hw_segments(struct request_queue *q, struct bio *bio) | ||
212 | { | ||
213 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) | ||
214 | blk_recount_segments(q, bio); | ||
215 | |||
216 | return bio->bi_hw_segments; | ||
217 | } | ||
218 | |||
219 | /** | 265 | /** |
220 | * __bio_clone - clone a bio | 266 | * __bio_clone - clone a bio |
221 | * @bio: destination bio | 267 | * @bio: destination bio |
@@ -350,8 +396,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
350 | */ | 396 | */ |
351 | 397 | ||
352 | while (bio->bi_phys_segments >= q->max_phys_segments | 398 | while (bio->bi_phys_segments >= q->max_phys_segments |
353 | || bio->bi_hw_segments >= q->max_hw_segments | 399 | || bio->bi_phys_segments >= q->max_hw_segments) { |
354 | || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) { | ||
355 | 400 | ||
356 | if (retried_segments) | 401 | if (retried_segments) |
357 | return 0; | 402 | return 0; |
@@ -395,13 +440,11 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
395 | } | 440 | } |
396 | 441 | ||
397 | /* If we may be able to merge these biovecs, force a recount */ | 442 | /* If we may be able to merge these biovecs, force a recount */ |
398 | if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) || | 443 | if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) |
399 | BIOVEC_VIRT_MERGEABLE(bvec-1, bvec))) | ||
400 | bio->bi_flags &= ~(1 << BIO_SEG_VALID); | 444 | bio->bi_flags &= ~(1 << BIO_SEG_VALID); |
401 | 445 | ||
402 | bio->bi_vcnt++; | 446 | bio->bi_vcnt++; |
403 | bio->bi_phys_segments++; | 447 | bio->bi_phys_segments++; |
404 | bio->bi_hw_segments++; | ||
405 | done: | 448 | done: |
406 | bio->bi_size += len; | 449 | bio->bi_size += len; |
407 | return len; | 450 | return len; |
@@ -449,16 +492,19 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, | |||
449 | 492 | ||
450 | struct bio_map_data { | 493 | struct bio_map_data { |
451 | struct bio_vec *iovecs; | 494 | struct bio_vec *iovecs; |
452 | int nr_sgvecs; | ||
453 | struct sg_iovec *sgvecs; | 495 | struct sg_iovec *sgvecs; |
496 | int nr_sgvecs; | ||
497 | int is_our_pages; | ||
454 | }; | 498 | }; |
455 | 499 | ||
456 | static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, | 500 | static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, |
457 | struct sg_iovec *iov, int iov_count) | 501 | struct sg_iovec *iov, int iov_count, |
502 | int is_our_pages) | ||
458 | { | 503 | { |
459 | memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); | 504 | memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); |
460 | memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); | 505 | memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); |
461 | bmd->nr_sgvecs = iov_count; | 506 | bmd->nr_sgvecs = iov_count; |
507 | bmd->is_our_pages = is_our_pages; | ||
462 | bio->bi_private = bmd; | 508 | bio->bi_private = bmd; |
463 | } | 509 | } |
464 | 510 | ||
@@ -469,20 +515,21 @@ static void bio_free_map_data(struct bio_map_data *bmd) | |||
469 | kfree(bmd); | 515 | kfree(bmd); |
470 | } | 516 | } |
471 | 517 | ||
472 | static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count) | 518 | static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, |
519 | gfp_t gfp_mask) | ||
473 | { | 520 | { |
474 | struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL); | 521 | struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask); |
475 | 522 | ||
476 | if (!bmd) | 523 | if (!bmd) |
477 | return NULL; | 524 | return NULL; |
478 | 525 | ||
479 | bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL); | 526 | bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask); |
480 | if (!bmd->iovecs) { | 527 | if (!bmd->iovecs) { |
481 | kfree(bmd); | 528 | kfree(bmd); |
482 | return NULL; | 529 | return NULL; |
483 | } | 530 | } |
484 | 531 | ||
485 | bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, GFP_KERNEL); | 532 | bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask); |
486 | if (bmd->sgvecs) | 533 | if (bmd->sgvecs) |
487 | return bmd; | 534 | return bmd; |
488 | 535 | ||
@@ -491,8 +538,9 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count) | |||
491 | return NULL; | 538 | return NULL; |
492 | } | 539 | } |
493 | 540 | ||
494 | static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, | 541 | static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, |
495 | int uncopy) | 542 | struct sg_iovec *iov, int iov_count, int uncopy, |
543 | int do_free_page) | ||
496 | { | 544 | { |
497 | int ret = 0, i; | 545 | int ret = 0, i; |
498 | struct bio_vec *bvec; | 546 | struct bio_vec *bvec; |
@@ -502,7 +550,7 @@ static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, | |||
502 | 550 | ||
503 | __bio_for_each_segment(bvec, bio, i, 0) { | 551 | __bio_for_each_segment(bvec, bio, i, 0) { |
504 | char *bv_addr = page_address(bvec->bv_page); | 552 | char *bv_addr = page_address(bvec->bv_page); |
505 | unsigned int bv_len = bvec->bv_len; | 553 | unsigned int bv_len = iovecs[i].bv_len; |
506 | 554 | ||
507 | while (bv_len && iov_idx < iov_count) { | 555 | while (bv_len && iov_idx < iov_count) { |
508 | unsigned int bytes; | 556 | unsigned int bytes; |
@@ -535,7 +583,7 @@ static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, | |||
535 | } | 583 | } |
536 | } | 584 | } |
537 | 585 | ||
538 | if (uncopy) | 586 | if (do_free_page) |
539 | __free_page(bvec->bv_page); | 587 | __free_page(bvec->bv_page); |
540 | } | 588 | } |
541 | 589 | ||
@@ -552,10 +600,11 @@ static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, | |||
552 | int bio_uncopy_user(struct bio *bio) | 600 | int bio_uncopy_user(struct bio *bio) |
553 | { | 601 | { |
554 | struct bio_map_data *bmd = bio->bi_private; | 602 | struct bio_map_data *bmd = bio->bi_private; |
555 | int ret; | 603 | int ret = 0; |
556 | |||
557 | ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1); | ||
558 | 604 | ||
605 | if (!bio_flagged(bio, BIO_NULL_MAPPED)) | ||
606 | ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, | ||
607 | bmd->nr_sgvecs, 1, bmd->is_our_pages); | ||
559 | bio_free_map_data(bmd); | 608 | bio_free_map_data(bmd); |
560 | bio_put(bio); | 609 | bio_put(bio); |
561 | return ret; | 610 | return ret; |
@@ -564,16 +613,20 @@ int bio_uncopy_user(struct bio *bio) | |||
564 | /** | 613 | /** |
565 | * bio_copy_user_iov - copy user data to bio | 614 | * bio_copy_user_iov - copy user data to bio |
566 | * @q: destination block queue | 615 | * @q: destination block queue |
616 | * @map_data: pointer to the rq_map_data holding pages (if necessary) | ||
567 | * @iov: the iovec. | 617 | * @iov: the iovec. |
568 | * @iov_count: number of elements in the iovec | 618 | * @iov_count: number of elements in the iovec |
569 | * @write_to_vm: bool indicating writing to pages or not | 619 | * @write_to_vm: bool indicating writing to pages or not |
620 | * @gfp_mask: memory allocation flags | ||
570 | * | 621 | * |
571 | * Prepares and returns a bio for indirect user io, bouncing data | 622 | * Prepares and returns a bio for indirect user io, bouncing data |
572 | * to/from kernel pages as necessary. Must be paired with | 623 | * to/from kernel pages as necessary. Must be paired with |
573 | * call bio_uncopy_user() on io completion. | 624 | * call bio_uncopy_user() on io completion. |
574 | */ | 625 | */ |
575 | struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | 626 | struct bio *bio_copy_user_iov(struct request_queue *q, |
576 | int iov_count, int write_to_vm) | 627 | struct rq_map_data *map_data, |
628 | struct sg_iovec *iov, int iov_count, | ||
629 | int write_to_vm, gfp_t gfp_mask) | ||
577 | { | 630 | { |
578 | struct bio_map_data *bmd; | 631 | struct bio_map_data *bmd; |
579 | struct bio_vec *bvec; | 632 | struct bio_vec *bvec; |
@@ -596,25 +649,38 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | |||
596 | len += iov[i].iov_len; | 649 | len += iov[i].iov_len; |
597 | } | 650 | } |
598 | 651 | ||
599 | bmd = bio_alloc_map_data(nr_pages, iov_count); | 652 | bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); |
600 | if (!bmd) | 653 | if (!bmd) |
601 | return ERR_PTR(-ENOMEM); | 654 | return ERR_PTR(-ENOMEM); |
602 | 655 | ||
603 | ret = -ENOMEM; | 656 | ret = -ENOMEM; |
604 | bio = bio_alloc(GFP_KERNEL, nr_pages); | 657 | bio = bio_alloc(gfp_mask, nr_pages); |
605 | if (!bio) | 658 | if (!bio) |
606 | goto out_bmd; | 659 | goto out_bmd; |
607 | 660 | ||
608 | bio->bi_rw |= (!write_to_vm << BIO_RW); | 661 | bio->bi_rw |= (!write_to_vm << BIO_RW); |
609 | 662 | ||
610 | ret = 0; | 663 | ret = 0; |
664 | i = 0; | ||
611 | while (len) { | 665 | while (len) { |
612 | unsigned int bytes = PAGE_SIZE; | 666 | unsigned int bytes; |
667 | |||
668 | if (map_data) | ||
669 | bytes = 1U << (PAGE_SHIFT + map_data->page_order); | ||
670 | else | ||
671 | bytes = PAGE_SIZE; | ||
613 | 672 | ||
614 | if (bytes > len) | 673 | if (bytes > len) |
615 | bytes = len; | 674 | bytes = len; |
616 | 675 | ||
617 | page = alloc_page(q->bounce_gfp | GFP_KERNEL); | 676 | if (map_data) { |
677 | if (i == map_data->nr_entries) { | ||
678 | ret = -ENOMEM; | ||
679 | break; | ||
680 | } | ||
681 | page = map_data->pages[i++]; | ||
682 | } else | ||
683 | page = alloc_page(q->bounce_gfp | gfp_mask); | ||
618 | if (!page) { | 684 | if (!page) { |
619 | ret = -ENOMEM; | 685 | ret = -ENOMEM; |
620 | break; | 686 | break; |
@@ -633,16 +699,17 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | |||
633 | * success | 699 | * success |
634 | */ | 700 | */ |
635 | if (!write_to_vm) { | 701 | if (!write_to_vm) { |
636 | ret = __bio_copy_iov(bio, iov, iov_count, 0); | 702 | ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0); |
637 | if (ret) | 703 | if (ret) |
638 | goto cleanup; | 704 | goto cleanup; |
639 | } | 705 | } |
640 | 706 | ||
641 | bio_set_map_data(bmd, bio, iov, iov_count); | 707 | bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1); |
642 | return bio; | 708 | return bio; |
643 | cleanup: | 709 | cleanup: |
644 | bio_for_each_segment(bvec, bio, i) | 710 | if (!map_data) |
645 | __free_page(bvec->bv_page); | 711 | bio_for_each_segment(bvec, bio, i) |
712 | __free_page(bvec->bv_page); | ||
646 | 713 | ||
647 | bio_put(bio); | 714 | bio_put(bio); |
648 | out_bmd: | 715 | out_bmd: |
@@ -653,29 +720,32 @@ out_bmd: | |||
653 | /** | 720 | /** |
654 | * bio_copy_user - copy user data to bio | 721 | * bio_copy_user - copy user data to bio |
655 | * @q: destination block queue | 722 | * @q: destination block queue |
723 | * @map_data: pointer to the rq_map_data holding pages (if necessary) | ||
656 | * @uaddr: start of user address | 724 | * @uaddr: start of user address |
657 | * @len: length in bytes | 725 | * @len: length in bytes |
658 | * @write_to_vm: bool indicating writing to pages or not | 726 | * @write_to_vm: bool indicating writing to pages or not |
727 | * @gfp_mask: memory allocation flags | ||
659 | * | 728 | * |
660 | * Prepares and returns a bio for indirect user io, bouncing data | 729 | * Prepares and returns a bio for indirect user io, bouncing data |
661 | * to/from kernel pages as necessary. Must be paired with | 730 | * to/from kernel pages as necessary. Must be paired with |
662 | * call bio_uncopy_user() on io completion. | 731 | * call bio_uncopy_user() on io completion. |
663 | */ | 732 | */ |
664 | struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, | 733 | struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data, |
665 | unsigned int len, int write_to_vm) | 734 | unsigned long uaddr, unsigned int len, |
735 | int write_to_vm, gfp_t gfp_mask) | ||
666 | { | 736 | { |
667 | struct sg_iovec iov; | 737 | struct sg_iovec iov; |
668 | 738 | ||
669 | iov.iov_base = (void __user *)uaddr; | 739 | iov.iov_base = (void __user *)uaddr; |
670 | iov.iov_len = len; | 740 | iov.iov_len = len; |
671 | 741 | ||
672 | return bio_copy_user_iov(q, &iov, 1, write_to_vm); | 742 | return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask); |
673 | } | 743 | } |
674 | 744 | ||
675 | static struct bio *__bio_map_user_iov(struct request_queue *q, | 745 | static struct bio *__bio_map_user_iov(struct request_queue *q, |
676 | struct block_device *bdev, | 746 | struct block_device *bdev, |
677 | struct sg_iovec *iov, int iov_count, | 747 | struct sg_iovec *iov, int iov_count, |
678 | int write_to_vm) | 748 | int write_to_vm, gfp_t gfp_mask) |
679 | { | 749 | { |
680 | int i, j; | 750 | int i, j; |
681 | int nr_pages = 0; | 751 | int nr_pages = 0; |
@@ -701,12 +771,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, | |||
701 | if (!nr_pages) | 771 | if (!nr_pages) |
702 | return ERR_PTR(-EINVAL); | 772 | return ERR_PTR(-EINVAL); |
703 | 773 | ||
704 | bio = bio_alloc(GFP_KERNEL, nr_pages); | 774 | bio = bio_alloc(gfp_mask, nr_pages); |
705 | if (!bio) | 775 | if (!bio) |
706 | return ERR_PTR(-ENOMEM); | 776 | return ERR_PTR(-ENOMEM); |
707 | 777 | ||
708 | ret = -ENOMEM; | 778 | ret = -ENOMEM; |
709 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); | 779 | pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); |
710 | if (!pages) | 780 | if (!pages) |
711 | goto out; | 781 | goto out; |
712 | 782 | ||
@@ -785,19 +855,21 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, | |||
785 | * @uaddr: start of user address | 855 | * @uaddr: start of user address |
786 | * @len: length in bytes | 856 | * @len: length in bytes |
787 | * @write_to_vm: bool indicating writing to pages or not | 857 | * @write_to_vm: bool indicating writing to pages or not |
858 | * @gfp_mask: memory allocation flags | ||
788 | * | 859 | * |
789 | * Map the user space address into a bio suitable for io to a block | 860 | * Map the user space address into a bio suitable for io to a block |
790 | * device. Returns an error pointer in case of error. | 861 | * device. Returns an error pointer in case of error. |
791 | */ | 862 | */ |
792 | struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, | 863 | struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, |
793 | unsigned long uaddr, unsigned int len, int write_to_vm) | 864 | unsigned long uaddr, unsigned int len, int write_to_vm, |
865 | gfp_t gfp_mask) | ||
794 | { | 866 | { |
795 | struct sg_iovec iov; | 867 | struct sg_iovec iov; |
796 | 868 | ||
797 | iov.iov_base = (void __user *)uaddr; | 869 | iov.iov_base = (void __user *)uaddr; |
798 | iov.iov_len = len; | 870 | iov.iov_len = len; |
799 | 871 | ||
800 | return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); | 872 | return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask); |
801 | } | 873 | } |
802 | 874 | ||
803 | /** | 875 | /** |
@@ -807,18 +879,19 @@ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, | |||
807 | * @iov: the iovec. | 879 | * @iov: the iovec. |
808 | * @iov_count: number of elements in the iovec | 880 | * @iov_count: number of elements in the iovec |
809 | * @write_to_vm: bool indicating writing to pages or not | 881 | * @write_to_vm: bool indicating writing to pages or not |
882 | * @gfp_mask: memory allocation flags | ||
810 | * | 883 | * |
811 | * Map the user space address into a bio suitable for io to a block | 884 | * Map the user space address into a bio suitable for io to a block |
812 | * device. Returns an error pointer in case of error. | 885 | * device. Returns an error pointer in case of error. |
813 | */ | 886 | */ |
814 | struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, | 887 | struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, |
815 | struct sg_iovec *iov, int iov_count, | 888 | struct sg_iovec *iov, int iov_count, |
816 | int write_to_vm) | 889 | int write_to_vm, gfp_t gfp_mask) |
817 | { | 890 | { |
818 | struct bio *bio; | 891 | struct bio *bio; |
819 | 892 | ||
820 | bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); | 893 | bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm, |
821 | 894 | gfp_mask); | |
822 | if (IS_ERR(bio)) | 895 | if (IS_ERR(bio)) |
823 | return bio; | 896 | return bio; |
824 | 897 | ||
@@ -942,19 +1015,22 @@ static void bio_copy_kern_endio(struct bio *bio, int err) | |||
942 | { | 1015 | { |
943 | struct bio_vec *bvec; | 1016 | struct bio_vec *bvec; |
944 | const int read = bio_data_dir(bio) == READ; | 1017 | const int read = bio_data_dir(bio) == READ; |
945 | char *p = bio->bi_private; | 1018 | struct bio_map_data *bmd = bio->bi_private; |
946 | int i; | 1019 | int i; |
1020 | char *p = bmd->sgvecs[0].iov_base; | ||
947 | 1021 | ||
948 | __bio_for_each_segment(bvec, bio, i, 0) { | 1022 | __bio_for_each_segment(bvec, bio, i, 0) { |
949 | char *addr = page_address(bvec->bv_page); | 1023 | char *addr = page_address(bvec->bv_page); |
1024 | int len = bmd->iovecs[i].bv_len; | ||
950 | 1025 | ||
951 | if (read && !err) | 1026 | if (read && !err) |
952 | memcpy(p, addr, bvec->bv_len); | 1027 | memcpy(p, addr, len); |
953 | 1028 | ||
954 | __free_page(bvec->bv_page); | 1029 | __free_page(bvec->bv_page); |
955 | p += bvec->bv_len; | 1030 | p += len; |
956 | } | 1031 | } |
957 | 1032 | ||
1033 | bio_free_map_data(bmd); | ||
958 | bio_put(bio); | 1034 | bio_put(bio); |
959 | } | 1035 | } |
960 | 1036 | ||
@@ -972,38 +1048,13 @@ static void bio_copy_kern_endio(struct bio *bio, int err) | |||
972 | struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, | 1048 | struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, |
973 | gfp_t gfp_mask, int reading) | 1049 | gfp_t gfp_mask, int reading) |
974 | { | 1050 | { |
975 | unsigned long kaddr = (unsigned long)data; | ||
976 | unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
977 | unsigned long start = kaddr >> PAGE_SHIFT; | ||
978 | const int nr_pages = end - start; | ||
979 | struct bio *bio; | 1051 | struct bio *bio; |
980 | struct bio_vec *bvec; | 1052 | struct bio_vec *bvec; |
981 | int i, ret; | 1053 | int i; |
982 | |||
983 | bio = bio_alloc(gfp_mask, nr_pages); | ||
984 | if (!bio) | ||
985 | return ERR_PTR(-ENOMEM); | ||
986 | |||
987 | while (len) { | ||
988 | struct page *page; | ||
989 | unsigned int bytes = PAGE_SIZE; | ||
990 | |||
991 | if (bytes > len) | ||
992 | bytes = len; | ||
993 | |||
994 | page = alloc_page(q->bounce_gfp | gfp_mask); | ||
995 | if (!page) { | ||
996 | ret = -ENOMEM; | ||
997 | goto cleanup; | ||
998 | } | ||
999 | |||
1000 | if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) { | ||
1001 | ret = -EINVAL; | ||
1002 | goto cleanup; | ||
1003 | } | ||
1004 | 1054 | ||
1005 | len -= bytes; | 1055 | bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask); |
1006 | } | 1056 | if (IS_ERR(bio)) |
1057 | return bio; | ||
1007 | 1058 | ||
1008 | if (!reading) { | 1059 | if (!reading) { |
1009 | void *p = data; | 1060 | void *p = data; |
@@ -1016,16 +1067,9 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, | |||
1016 | } | 1067 | } |
1017 | } | 1068 | } |
1018 | 1069 | ||
1019 | bio->bi_private = data; | ||
1020 | bio->bi_end_io = bio_copy_kern_endio; | 1070 | bio->bi_end_io = bio_copy_kern_endio; |
1021 | return bio; | ||
1022 | cleanup: | ||
1023 | bio_for_each_segment(bvec, bio, i) | ||
1024 | __free_page(bvec->bv_page); | ||
1025 | |||
1026 | bio_put(bio); | ||
1027 | 1071 | ||
1028 | return ERR_PTR(ret); | 1072 | return bio; |
1029 | } | 1073 | } |
1030 | 1074 | ||
1031 | /* | 1075 | /* |
@@ -1212,9 +1256,9 @@ static void bio_pair_end_2(struct bio *bi, int err) | |||
1212 | * split a bio - only worry about a bio with a single page | 1256 | * split a bio - only worry about a bio with a single page |
1213 | * in it's iovec | 1257 | * in it's iovec |
1214 | */ | 1258 | */ |
1215 | struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) | 1259 | struct bio_pair *bio_split(struct bio *bi, int first_sectors) |
1216 | { | 1260 | { |
1217 | struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO); | 1261 | struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO); |
1218 | 1262 | ||
1219 | if (!bp) | 1263 | if (!bp) |
1220 | return bp; | 1264 | return bp; |
@@ -1248,7 +1292,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) | |||
1248 | bp->bio2.bi_end_io = bio_pair_end_2; | 1292 | bp->bio2.bi_end_io = bio_pair_end_2; |
1249 | 1293 | ||
1250 | bp->bio1.bi_private = bi; | 1294 | bp->bio1.bi_private = bi; |
1251 | bp->bio2.bi_private = pool; | 1295 | bp->bio2.bi_private = bio_split_pool; |
1252 | 1296 | ||
1253 | if (bio_integrity(bi)) | 1297 | if (bio_integrity(bi)) |
1254 | bio_integrity_split(bi, bp, first_sectors); | 1298 | bio_integrity_split(bi, bp, first_sectors); |
@@ -1256,6 +1300,42 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) | |||
1256 | return bp; | 1300 | return bp; |
1257 | } | 1301 | } |
1258 | 1302 | ||
1303 | /** | ||
1304 | * bio_sector_offset - Find hardware sector offset in bio | ||
1305 | * @bio: bio to inspect | ||
1306 | * @index: bio_vec index | ||
1307 | * @offset: offset in bv_page | ||
1308 | * | ||
1309 | * Return the number of hardware sectors between beginning of bio | ||
1310 | * and an end point indicated by a bio_vec index and an offset | ||
1311 | * within that vector's page. | ||
1312 | */ | ||
1313 | sector_t bio_sector_offset(struct bio *bio, unsigned short index, | ||
1314 | unsigned int offset) | ||
1315 | { | ||
1316 | unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue); | ||
1317 | struct bio_vec *bv; | ||
1318 | sector_t sectors; | ||
1319 | int i; | ||
1320 | |||
1321 | sectors = 0; | ||
1322 | |||
1323 | if (index >= bio->bi_idx) | ||
1324 | index = bio->bi_vcnt - 1; | ||
1325 | |||
1326 | __bio_for_each_segment(bv, bio, i, 0) { | ||
1327 | if (i == index) { | ||
1328 | if (offset > bv->bv_offset) | ||
1329 | sectors += (offset - bv->bv_offset) / sector_sz; | ||
1330 | break; | ||
1331 | } | ||
1332 | |||
1333 | sectors += bv->bv_len / sector_sz; | ||
1334 | } | ||
1335 | |||
1336 | return sectors; | ||
1337 | } | ||
1338 | EXPORT_SYMBOL(bio_sector_offset); | ||
1259 | 1339 | ||
1260 | /* | 1340 | /* |
1261 | * create memory pools for biovec's in a bio_set. | 1341 | * create memory pools for biovec's in a bio_set. |
@@ -1358,6 +1438,7 @@ static int __init init_bio(void) | |||
1358 | subsys_initcall(init_bio); | 1438 | subsys_initcall(init_bio); |
1359 | 1439 | ||
1360 | EXPORT_SYMBOL(bio_alloc); | 1440 | EXPORT_SYMBOL(bio_alloc); |
1441 | EXPORT_SYMBOL(bio_kmalloc); | ||
1361 | EXPORT_SYMBOL(bio_put); | 1442 | EXPORT_SYMBOL(bio_put); |
1362 | EXPORT_SYMBOL(bio_free); | 1443 | EXPORT_SYMBOL(bio_free); |
1363 | EXPORT_SYMBOL(bio_endio); | 1444 | EXPORT_SYMBOL(bio_endio); |
@@ -1365,7 +1446,6 @@ EXPORT_SYMBOL(bio_init); | |||
1365 | EXPORT_SYMBOL(__bio_clone); | 1446 | EXPORT_SYMBOL(__bio_clone); |
1366 | EXPORT_SYMBOL(bio_clone); | 1447 | EXPORT_SYMBOL(bio_clone); |
1367 | EXPORT_SYMBOL(bio_phys_segments); | 1448 | EXPORT_SYMBOL(bio_phys_segments); |
1368 | EXPORT_SYMBOL(bio_hw_segments); | ||
1369 | EXPORT_SYMBOL(bio_add_page); | 1449 | EXPORT_SYMBOL(bio_add_page); |
1370 | EXPORT_SYMBOL(bio_add_pc_page); | 1450 | EXPORT_SYMBOL(bio_add_pc_page); |
1371 | EXPORT_SYMBOL(bio_get_nr_vecs); | 1451 | EXPORT_SYMBOL(bio_get_nr_vecs); |
@@ -1375,7 +1455,6 @@ EXPORT_SYMBOL(bio_map_kern); | |||
1375 | EXPORT_SYMBOL(bio_copy_kern); | 1455 | EXPORT_SYMBOL(bio_copy_kern); |
1376 | EXPORT_SYMBOL(bio_pair_release); | 1456 | EXPORT_SYMBOL(bio_pair_release); |
1377 | EXPORT_SYMBOL(bio_split); | 1457 | EXPORT_SYMBOL(bio_split); |
1378 | EXPORT_SYMBOL(bio_split_pool); | ||
1379 | EXPORT_SYMBOL(bio_copy_user); | 1458 | EXPORT_SYMBOL(bio_copy_user); |
1380 | EXPORT_SYMBOL(bio_uncopy_user); | 1459 | EXPORT_SYMBOL(bio_uncopy_user); |
1381 | EXPORT_SYMBOL(bioset_create); | 1460 | EXPORT_SYMBOL(bioset_create); |
diff --git a/fs/block_dev.c b/fs/block_dev.c index aff54219e049..218408eed1bb 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -540,22 +540,6 @@ EXPORT_SYMBOL(bd_release); | |||
540 | * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 | 540 | * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 |
541 | */ | 541 | */ |
542 | 542 | ||
543 | static struct kobject *bdev_get_kobj(struct block_device *bdev) | ||
544 | { | ||
545 | if (bdev->bd_contains != bdev) | ||
546 | return kobject_get(&bdev->bd_part->dev.kobj); | ||
547 | else | ||
548 | return kobject_get(&bdev->bd_disk->dev.kobj); | ||
549 | } | ||
550 | |||
551 | static struct kobject *bdev_get_holder(struct block_device *bdev) | ||
552 | { | ||
553 | if (bdev->bd_contains != bdev) | ||
554 | return kobject_get(bdev->bd_part->holder_dir); | ||
555 | else | ||
556 | return kobject_get(bdev->bd_disk->holder_dir); | ||
557 | } | ||
558 | |||
559 | static int add_symlink(struct kobject *from, struct kobject *to) | 543 | static int add_symlink(struct kobject *from, struct kobject *to) |
560 | { | 544 | { |
561 | if (!from || !to) | 545 | if (!from || !to) |
@@ -604,11 +588,11 @@ static int bd_holder_grab_dirs(struct block_device *bdev, | |||
604 | if (!bo->hdev) | 588 | if (!bo->hdev) |
605 | goto fail_put_sdir; | 589 | goto fail_put_sdir; |
606 | 590 | ||
607 | bo->sdev = bdev_get_kobj(bdev); | 591 | bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj); |
608 | if (!bo->sdev) | 592 | if (!bo->sdev) |
609 | goto fail_put_hdev; | 593 | goto fail_put_hdev; |
610 | 594 | ||
611 | bo->hdir = bdev_get_holder(bdev); | 595 | bo->hdir = kobject_get(bdev->bd_part->holder_dir); |
612 | if (!bo->hdir) | 596 | if (!bo->hdir) |
613 | goto fail_put_sdev; | 597 | goto fail_put_sdev; |
614 | 598 | ||
@@ -868,6 +852,87 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode) | |||
868 | 852 | ||
869 | EXPORT_SYMBOL(open_by_devnum); | 853 | EXPORT_SYMBOL(open_by_devnum); |
870 | 854 | ||
855 | /** | ||
856 | * flush_disk - invalidates all buffer-cache entries on a disk | ||
857 | * | ||
858 | * @bdev: struct block device to be flushed | ||
859 | * | ||
860 | * Invalidates all buffer-cache entries on a disk. It should be called | ||
861 | * when a disk has been changed -- either by a media change or online | ||
862 | * resize. | ||
863 | */ | ||
864 | static void flush_disk(struct block_device *bdev) | ||
865 | { | ||
866 | if (__invalidate_device(bdev)) { | ||
867 | char name[BDEVNAME_SIZE] = ""; | ||
868 | |||
869 | if (bdev->bd_disk) | ||
870 | disk_name(bdev->bd_disk, 0, name); | ||
871 | printk(KERN_WARNING "VFS: busy inodes on changed media or " | ||
872 | "resized disk %s\n", name); | ||
873 | } | ||
874 | |||
875 | if (!bdev->bd_disk) | ||
876 | return; | ||
877 | if (disk_partitionable(bdev->bd_disk)) | ||
878 | bdev->bd_invalidated = 1; | ||
879 | } | ||
880 | |||
881 | /** | ||
882 | * check_disk_size_change - checks for disk size change and adjusts bdev size. | ||
883 | * @disk: struct gendisk to check | ||
884 | * @bdev: struct bdev to adjust. | ||
885 | * | ||
886 | * This routine checks to see if the bdev size does not match the disk size | ||
887 | * and adjusts it if it differs. | ||
888 | */ | ||
889 | void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) | ||
890 | { | ||
891 | loff_t disk_size, bdev_size; | ||
892 | |||
893 | disk_size = (loff_t)get_capacity(disk) << 9; | ||
894 | bdev_size = i_size_read(bdev->bd_inode); | ||
895 | if (disk_size != bdev_size) { | ||
896 | char name[BDEVNAME_SIZE]; | ||
897 | |||
898 | disk_name(disk, 0, name); | ||
899 | printk(KERN_INFO | ||
900 | "%s: detected capacity change from %lld to %lld\n", | ||
901 | name, bdev_size, disk_size); | ||
902 | i_size_write(bdev->bd_inode, disk_size); | ||
903 | flush_disk(bdev); | ||
904 | } | ||
905 | } | ||
906 | EXPORT_SYMBOL(check_disk_size_change); | ||
907 | |||
908 | /** | ||
909 | * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back | ||
910 | * @disk: struct gendisk to be revalidated | ||
911 | * | ||
912 | * This routine is a wrapper for lower-level driver's revalidate_disk | ||
913 | * call-backs. It is used to do common pre and post operations needed | ||
914 | * for all revalidate_disk operations. | ||
915 | */ | ||
916 | int revalidate_disk(struct gendisk *disk) | ||
917 | { | ||
918 | struct block_device *bdev; | ||
919 | int ret = 0; | ||
920 | |||
921 | if (disk->fops->revalidate_disk) | ||
922 | ret = disk->fops->revalidate_disk(disk); | ||
923 | |||
924 | bdev = bdget_disk(disk, 0); | ||
925 | if (!bdev) | ||
926 | return ret; | ||
927 | |||
928 | mutex_lock(&bdev->bd_mutex); | ||
929 | check_disk_size_change(disk, bdev); | ||
930 | mutex_unlock(&bdev->bd_mutex); | ||
931 | bdput(bdev); | ||
932 | return ret; | ||
933 | } | ||
934 | EXPORT_SYMBOL(revalidate_disk); | ||
935 | |||
871 | /* | 936 | /* |
872 | * This routine checks whether a removable media has been changed, | 937 | * This routine checks whether a removable media has been changed, |
873 | * and invalidates all buffer-cache-entries in that case. This | 938 | * and invalidates all buffer-cache-entries in that case. This |
@@ -887,13 +952,9 @@ int check_disk_change(struct block_device *bdev) | |||
887 | if (!bdops->media_changed(bdev->bd_disk)) | 952 | if (!bdops->media_changed(bdev->bd_disk)) |
888 | return 0; | 953 | return 0; |
889 | 954 | ||
890 | if (__invalidate_device(bdev)) | 955 | flush_disk(bdev); |
891 | printk("VFS: busy inodes on changed media.\n"); | ||
892 | |||
893 | if (bdops->revalidate_disk) | 956 | if (bdops->revalidate_disk) |
894 | bdops->revalidate_disk(bdev->bd_disk); | 957 | bdops->revalidate_disk(bdev->bd_disk); |
895 | if (bdev->bd_disk->minors > 1) | ||
896 | bdev->bd_invalidated = 1; | ||
897 | return 1; | 958 | return 1; |
898 | } | 959 | } |
899 | 960 | ||
@@ -927,10 +988,10 @@ static int __blkdev_put(struct block_device *bdev, int for_part); | |||
927 | 988 | ||
928 | static int do_open(struct block_device *bdev, struct file *file, int for_part) | 989 | static int do_open(struct block_device *bdev, struct file *file, int for_part) |
929 | { | 990 | { |
930 | struct module *owner = NULL; | ||
931 | struct gendisk *disk; | 991 | struct gendisk *disk; |
992 | struct hd_struct *part = NULL; | ||
932 | int ret; | 993 | int ret; |
933 | int part; | 994 | int partno; |
934 | int perm = 0; | 995 | int perm = 0; |
935 | 996 | ||
936 | if (file->f_mode & FMODE_READ) | 997 | if (file->f_mode & FMODE_READ) |
@@ -948,25 +1009,27 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
948 | 1009 | ||
949 | ret = -ENXIO; | 1010 | ret = -ENXIO; |
950 | file->f_mapping = bdev->bd_inode->i_mapping; | 1011 | file->f_mapping = bdev->bd_inode->i_mapping; |
1012 | |||
951 | lock_kernel(); | 1013 | lock_kernel(); |
952 | disk = get_gendisk(bdev->bd_dev, &part); | 1014 | |
953 | if (!disk) { | 1015 | disk = get_gendisk(bdev->bd_dev, &partno); |
954 | unlock_kernel(); | 1016 | if (!disk) |
955 | bdput(bdev); | 1017 | goto out_unlock_kernel; |
956 | return ret; | 1018 | part = disk_get_part(disk, partno); |
957 | } | 1019 | if (!part) |
958 | owner = disk->fops->owner; | 1020 | goto out_unlock_kernel; |
959 | 1021 | ||
960 | mutex_lock_nested(&bdev->bd_mutex, for_part); | 1022 | mutex_lock_nested(&bdev->bd_mutex, for_part); |
961 | if (!bdev->bd_openers) { | 1023 | if (!bdev->bd_openers) { |
962 | bdev->bd_disk = disk; | 1024 | bdev->bd_disk = disk; |
1025 | bdev->bd_part = part; | ||
963 | bdev->bd_contains = bdev; | 1026 | bdev->bd_contains = bdev; |
964 | if (!part) { | 1027 | if (!partno) { |
965 | struct backing_dev_info *bdi; | 1028 | struct backing_dev_info *bdi; |
966 | if (disk->fops->open) { | 1029 | if (disk->fops->open) { |
967 | ret = disk->fops->open(bdev->bd_inode, file); | 1030 | ret = disk->fops->open(bdev->bd_inode, file); |
968 | if (ret) | 1031 | if (ret) |
969 | goto out_first; | 1032 | goto out_clear; |
970 | } | 1033 | } |
971 | if (!bdev->bd_openers) { | 1034 | if (!bdev->bd_openers) { |
972 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); | 1035 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); |
@@ -978,36 +1041,36 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
978 | if (bdev->bd_invalidated) | 1041 | if (bdev->bd_invalidated) |
979 | rescan_partitions(disk, bdev); | 1042 | rescan_partitions(disk, bdev); |
980 | } else { | 1043 | } else { |
981 | struct hd_struct *p; | ||
982 | struct block_device *whole; | 1044 | struct block_device *whole; |
983 | whole = bdget_disk(disk, 0); | 1045 | whole = bdget_disk(disk, 0); |
984 | ret = -ENOMEM; | 1046 | ret = -ENOMEM; |
985 | if (!whole) | 1047 | if (!whole) |
986 | goto out_first; | 1048 | goto out_clear; |
987 | BUG_ON(for_part); | 1049 | BUG_ON(for_part); |
988 | ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); | 1050 | ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); |
989 | if (ret) | 1051 | if (ret) |
990 | goto out_first; | 1052 | goto out_clear; |
991 | bdev->bd_contains = whole; | 1053 | bdev->bd_contains = whole; |
992 | p = disk->part[part - 1]; | ||
993 | bdev->bd_inode->i_data.backing_dev_info = | 1054 | bdev->bd_inode->i_data.backing_dev_info = |
994 | whole->bd_inode->i_data.backing_dev_info; | 1055 | whole->bd_inode->i_data.backing_dev_info; |
995 | if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { | 1056 | if (!(disk->flags & GENHD_FL_UP) || |
1057 | !part || !part->nr_sects) { | ||
996 | ret = -ENXIO; | 1058 | ret = -ENXIO; |
997 | goto out_first; | 1059 | goto out_clear; |
998 | } | 1060 | } |
999 | kobject_get(&p->dev.kobj); | 1061 | bd_set_size(bdev, (loff_t)part->nr_sects << 9); |
1000 | bdev->bd_part = p; | ||
1001 | bd_set_size(bdev, (loff_t) p->nr_sects << 9); | ||
1002 | } | 1062 | } |
1003 | } else { | 1063 | } else { |
1064 | disk_put_part(part); | ||
1004 | put_disk(disk); | 1065 | put_disk(disk); |
1005 | module_put(owner); | 1066 | module_put(disk->fops->owner); |
1067 | part = NULL; | ||
1068 | disk = NULL; | ||
1006 | if (bdev->bd_contains == bdev) { | 1069 | if (bdev->bd_contains == bdev) { |
1007 | if (bdev->bd_disk->fops->open) { | 1070 | if (bdev->bd_disk->fops->open) { |
1008 | ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); | 1071 | ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); |
1009 | if (ret) | 1072 | if (ret) |
1010 | goto out; | 1073 | goto out_unlock_bdev; |
1011 | } | 1074 | } |
1012 | if (bdev->bd_invalidated) | 1075 | if (bdev->bd_invalidated) |
1013 | rescan_partitions(bdev->bd_disk, bdev); | 1076 | rescan_partitions(bdev->bd_disk, bdev); |
@@ -1020,19 +1083,24 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
1020 | unlock_kernel(); | 1083 | unlock_kernel(); |
1021 | return 0; | 1084 | return 0; |
1022 | 1085 | ||
1023 | out_first: | 1086 | out_clear: |
1024 | bdev->bd_disk = NULL; | 1087 | bdev->bd_disk = NULL; |
1088 | bdev->bd_part = NULL; | ||
1025 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1089 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; |
1026 | if (bdev != bdev->bd_contains) | 1090 | if (bdev != bdev->bd_contains) |
1027 | __blkdev_put(bdev->bd_contains, 1); | 1091 | __blkdev_put(bdev->bd_contains, 1); |
1028 | bdev->bd_contains = NULL; | 1092 | bdev->bd_contains = NULL; |
1029 | put_disk(disk); | 1093 | out_unlock_bdev: |
1030 | module_put(owner); | ||
1031 | out: | ||
1032 | mutex_unlock(&bdev->bd_mutex); | 1094 | mutex_unlock(&bdev->bd_mutex); |
1095 | out_unlock_kernel: | ||
1033 | unlock_kernel(); | 1096 | unlock_kernel(); |
1034 | if (ret) | 1097 | |
1035 | bdput(bdev); | 1098 | disk_put_part(part); |
1099 | if (disk) | ||
1100 | module_put(disk->fops->owner); | ||
1101 | put_disk(disk); | ||
1102 | bdput(bdev); | ||
1103 | |||
1036 | return ret; | 1104 | return ret; |
1037 | } | 1105 | } |
1038 | 1106 | ||
@@ -1117,11 +1185,8 @@ static int __blkdev_put(struct block_device *bdev, int for_part) | |||
1117 | 1185 | ||
1118 | put_disk(disk); | 1186 | put_disk(disk); |
1119 | module_put(owner); | 1187 | module_put(owner); |
1120 | 1188 | disk_put_part(bdev->bd_part); | |
1121 | if (bdev->bd_contains != bdev) { | 1189 | bdev->bd_part = NULL; |
1122 | kobject_put(&bdev->bd_part->dev.kobj); | ||
1123 | bdev->bd_part = NULL; | ||
1124 | } | ||
1125 | bdev->bd_disk = NULL; | 1190 | bdev->bd_disk = NULL; |
1126 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1191 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; |
1127 | if (bdev != bdev->bd_contains) | 1192 | if (bdev != bdev->bd_contains) |
@@ -1197,10 +1262,9 @@ EXPORT_SYMBOL(ioctl_by_bdev); | |||
1197 | 1262 | ||
1198 | /** | 1263 | /** |
1199 | * lookup_bdev - lookup a struct block_device by name | 1264 | * lookup_bdev - lookup a struct block_device by name |
1200 | * | ||
1201 | * @path: special file representing the block device | 1265 | * @path: special file representing the block device |
1202 | * | 1266 | * |
1203 | * Get a reference to the blockdevice at @path in the current | 1267 | * Get a reference to the blockdevice at @pathname in the current |
1204 | * namespace if possible and return it. Return ERR_PTR(error) | 1268 | * namespace if possible and return it. Return ERR_PTR(error) |
1205 | * otherwise. | 1269 | * otherwise. |
1206 | */ | 1270 | */ |
diff --git a/fs/buffer.c b/fs/buffer.c index 38653e36e225..ac78d4c19b3b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -2926,14 +2926,17 @@ int submit_bh(int rw, struct buffer_head * bh) | |||
2926 | BUG_ON(!buffer_mapped(bh)); | 2926 | BUG_ON(!buffer_mapped(bh)); |
2927 | BUG_ON(!bh->b_end_io); | 2927 | BUG_ON(!bh->b_end_io); |
2928 | 2928 | ||
2929 | if (buffer_ordered(bh) && (rw == WRITE)) | 2929 | /* |
2930 | rw = WRITE_BARRIER; | 2930 | * Mask in barrier bit for a write (could be either a WRITE or a |
2931 | * WRITE_SYNC | ||
2932 | */ | ||
2933 | if (buffer_ordered(bh) && (rw & WRITE)) | ||
2934 | rw |= WRITE_BARRIER; | ||
2931 | 2935 | ||
2932 | /* | 2936 | /* |
2933 | * Only clear out a write error when rewriting, should this | 2937 | * Only clear out a write error when rewriting |
2934 | * include WRITE_SYNC as well? | ||
2935 | */ | 2938 | */ |
2936 | if (test_set_buffer_req(bh) && (rw == WRITE || rw == WRITE_BARRIER)) | 2939 | if (test_set_buffer_req(bh) && (rw & WRITE)) |
2937 | clear_buffer_write_io_error(bh); | 2940 | clear_buffer_write_io_error(bh); |
2938 | 2941 | ||
2939 | /* | 2942 | /* |
diff --git a/fs/char_dev.c b/fs/char_dev.c index 3cb7cda3d780..262fa10e213d 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
@@ -22,9 +22,6 @@ | |||
22 | #include <linux/mutex.h> | 22 | #include <linux/mutex.h> |
23 | #include <linux/backing-dev.h> | 23 | #include <linux/backing-dev.h> |
24 | 24 | ||
25 | #ifdef CONFIG_KMOD | ||
26 | #include <linux/kmod.h> | ||
27 | #endif | ||
28 | #include "internal.h" | 25 | #include "internal.h" |
29 | 26 | ||
30 | /* | 27 | /* |
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index f5d0083e09fa..06e521a945c3 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES | |||
@@ -4,7 +4,15 @@ Fix premature write failure on congested networks (we would give up | |||
4 | on EAGAIN from the socket too quickly on large writes). | 4 | on EAGAIN from the socket too quickly on large writes). |
5 | Cifs_mkdir and cifs_create now respect the setgid bit on parent dir. | 5 | Cifs_mkdir and cifs_create now respect the setgid bit on parent dir. |
6 | Fix endian problems in acl (mode from/to cifs acl) on bigendian | 6 | Fix endian problems in acl (mode from/to cifs acl) on bigendian |
7 | architectures. | 7 | architectures. Fix problems with preserving timestamps on copying open |
8 | files (e.g. "cp -a") to Windows servers. For mkdir and create honor setgid bit | ||
9 | on parent directory when server supports Unix Extensions but not POSIX | ||
10 | create. Update cifs.upcall version to handle new Kerberos sec flags | ||
11 | (this requires update of cifs.upcall program from Samba). Fix memory leak | ||
12 | on dns_upcall (resolving DFS referralls). Fix plain text password | ||
13 | authentication (requires setting SecurityFlags to 0x30030 to enable | ||
14 | lanman and plain text though). Fix writes to be at correct offset when | ||
15 | file is open with O_APPEND and file is on a directio (forcediretio) mount. | ||
8 | 16 | ||
9 | Version 1.53 | 17 | Version 1.53 |
10 | ------------ | 18 | ------------ |
diff --git a/fs/cifs/README b/fs/cifs/README index 2bd6fe556f88..bd2343d4c6a6 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -542,10 +542,20 @@ SecurityFlags Flags which control security negotiation and | |||
542 | hashing mechanisms (as "must use") on the other hand | 542 | hashing mechanisms (as "must use") on the other hand |
543 | does not make much sense. Default flags are | 543 | does not make much sense. Default flags are |
544 | 0x07007 | 544 | 0x07007 |
545 | (NTLM, NTLMv2 and packet signing allowed). Maximum | 545 | (NTLM, NTLMv2 and packet signing allowed). The maximum |
546 | allowable flags if you want to allow mounts to servers | 546 | allowable flags if you want to allow mounts to servers |
547 | using weaker password hashes is 0x37037 (lanman, | 547 | using weaker password hashes is 0x37037 (lanman, |
548 | plaintext, ntlm, ntlmv2, signing allowed): | 548 | plaintext, ntlm, ntlmv2, signing allowed). Some |
549 | SecurityFlags require the corresponding menuconfig | ||
550 | options to be enabled (lanman and plaintext require | ||
551 | CONFIG_CIFS_WEAK_PW_HASH for example). Enabling | ||
552 | plaintext authentication currently requires also | ||
553 | enabling lanman authentication in the security flags | ||
554 | because the cifs module only supports sending | ||
555 | laintext passwords using the older lanman dialect | ||
556 | form of the session setup SMB. (e.g. for authentication | ||
557 | using plain text passwords, set the SecurityFlags | ||
558 | to 0x30030): | ||
549 | 559 | ||
550 | may use packet signing 0x00001 | 560 | may use packet signing 0x00001 |
551 | must use packet signing 0x01001 | 561 | must use packet signing 0x01001 |
@@ -642,8 +652,30 @@ The statistics for the number of total SMBs and oplock breaks are different in | |||
642 | that they represent all for that share, not just those for which the server | 652 | that they represent all for that share, not just those for which the server |
643 | returned success. | 653 | returned success. |
644 | 654 | ||
645 | Also note that "cat /proc/fs/cifs/DebugData" will display information about | 655 | Also note that "cat /proc/fs/cifs/DebugData" will display information about |
646 | the active sessions and the shares that are mounted. | 656 | the active sessions and the shares that are mounted. |
647 | Enabling Kerberos (extended security) works when CONFIG_CIFS_EXPERIMENTAL is | 657 | |
648 | on but requires a user space helper (from the Samba project). NTLM and NTLMv2 and | 658 | Enabling Kerberos (extended security) works but requires version 1.2 or later |
649 | LANMAN support do not require this helper. | 659 | of the helper program cifs.upcall to be present and to be configured in the |
660 | /etc/request-key.conf file. The cifs.upcall helper program is from the Samba | ||
661 | project(http://www.samba.org). NTLM and NTLMv2 and LANMAN support do not | ||
662 | require this helper. Note that NTLMv2 security (which does not require the | ||
663 | cifs.upcall helper program), instead of using Kerberos, is sufficient for | ||
664 | some use cases. | ||
665 | |||
666 | Enabling DFS support (used to access shares transparently in an MS-DFS | ||
667 | global name space) requires that CONFIG_CIFS_EXPERIMENTAL be enabled. In | ||
668 | addition, DFS support for target shares which are specified as UNC | ||
669 | names which begin with host names (rather than IP addresses) requires | ||
670 | a user space helper (such as cifs.upcall) to be present in order to | ||
671 | translate host names to ip address, and the user space helper must also | ||
672 | be configured in the file /etc/request-key.conf | ||
673 | |||
674 | To use cifs Kerberos and DFS support, the Linux keyutils package should be | ||
675 | installed and something like the following lines should be added to the | ||
676 | /etc/request-key.conf file: | ||
677 | |||
678 | create cifs.spnego * * /usr/local/sbin/cifs.upcall %k | ||
679 | create dns_resolver * * /usr/local/sbin/cifs.upcall %k | ||
680 | |||
681 | |||
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index 5fabd2caf93c..1b09f1670061 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c | |||
@@ -476,6 +476,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, | |||
476 | unsigned int cls, con, tag, oidlen, rc; | 476 | unsigned int cls, con, tag, oidlen, rc; |
477 | bool use_ntlmssp = false; | 477 | bool use_ntlmssp = false; |
478 | bool use_kerberos = false; | 478 | bool use_kerberos = false; |
479 | bool use_mskerberos = false; | ||
479 | 480 | ||
480 | *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ | 481 | *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ |
481 | 482 | ||
@@ -574,10 +575,12 @@ decode_negTokenInit(unsigned char *security_blob, int length, | |||
574 | *(oid + 1), *(oid + 2), *(oid + 3))); | 575 | *(oid + 1), *(oid + 2), *(oid + 3))); |
575 | 576 | ||
576 | if (compare_oid(oid, oidlen, MSKRB5_OID, | 577 | if (compare_oid(oid, oidlen, MSKRB5_OID, |
577 | MSKRB5_OID_LEN)) | 578 | MSKRB5_OID_LEN) && |
578 | use_kerberos = true; | 579 | !use_kerberos) |
580 | use_mskerberos = true; | ||
579 | else if (compare_oid(oid, oidlen, KRB5_OID, | 581 | else if (compare_oid(oid, oidlen, KRB5_OID, |
580 | KRB5_OID_LEN)) | 582 | KRB5_OID_LEN) && |
583 | !use_mskerberos) | ||
581 | use_kerberos = true; | 584 | use_kerberos = true; |
582 | else if (compare_oid(oid, oidlen, NTLMSSP_OID, | 585 | else if (compare_oid(oid, oidlen, NTLMSSP_OID, |
583 | NTLMSSP_OID_LEN)) | 586 | NTLMSSP_OID_LEN)) |
@@ -630,6 +633,8 @@ decode_negTokenInit(unsigned char *security_blob, int length, | |||
630 | 633 | ||
631 | if (use_kerberos) | 634 | if (use_kerberos) |
632 | *secType = Kerberos; | 635 | *secType = Kerberos; |
636 | else if (use_mskerberos) | ||
637 | *secType = MSKerberos; | ||
633 | else if (use_ntlmssp) | 638 | else if (use_ntlmssp) |
634 | *secType = NTLMSSP; | 639 | *secType = NTLMSSP; |
635 | 640 | ||
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 2434ab0e8791..fcee9298b620 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c | |||
@@ -66,11 +66,28 @@ struct key_type cifs_spnego_key_type = { | |||
66 | .describe = user_describe, | 66 | .describe = user_describe, |
67 | }; | 67 | }; |
68 | 68 | ||
69 | #define MAX_VER_STR_LEN 8 /* length of longest version string e.g. | 69 | /* length of longest version string e.g. strlen("ver=0xFF") */ |
70 | strlen("ver=0xFF") */ | 70 | #define MAX_VER_STR_LEN 8 |
71 | #define MAX_MECH_STR_LEN 13 /* length of longest security mechanism name, eg | 71 | |
72 | in future could have strlen(";sec=ntlmsspi") */ | 72 | /* length of longest security mechanism name, eg in future could have |
73 | #define MAX_IPV6_ADDR_LEN 42 /* eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */ | 73 | * strlen(";sec=ntlmsspi") */ |
74 | #define MAX_MECH_STR_LEN 13 | ||
75 | |||
76 | /* max possible addr len eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */ | ||
77 | #define MAX_IPV6_ADDR_LEN 42 | ||
78 | |||
79 | /* strlen of "host=" */ | ||
80 | #define HOST_KEY_LEN 5 | ||
81 | |||
82 | /* strlen of ";ip4=" or ";ip6=" */ | ||
83 | #define IP_KEY_LEN 5 | ||
84 | |||
85 | /* strlen of ";uid=0x" */ | ||
86 | #define UID_KEY_LEN 7 | ||
87 | |||
88 | /* strlen of ";user=" */ | ||
89 | #define USER_KEY_LEN 6 | ||
90 | |||
74 | /* get a key struct with a SPNEGO security blob, suitable for session setup */ | 91 | /* get a key struct with a SPNEGO security blob, suitable for session setup */ |
75 | struct key * | 92 | struct key * |
76 | cifs_get_spnego_key(struct cifsSesInfo *sesInfo) | 93 | cifs_get_spnego_key(struct cifsSesInfo *sesInfo) |
@@ -84,11 +101,11 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) | |||
84 | /* length of fields (with semicolons): ver=0xyz ip4=ipaddress | 101 | /* length of fields (with semicolons): ver=0xyz ip4=ipaddress |
85 | host=hostname sec=mechanism uid=0xFF user=username */ | 102 | host=hostname sec=mechanism uid=0xFF user=username */ |
86 | desc_len = MAX_VER_STR_LEN + | 103 | desc_len = MAX_VER_STR_LEN + |
87 | 6 /* len of "host=" */ + strlen(hostname) + | 104 | HOST_KEY_LEN + strlen(hostname) + |
88 | 5 /* len of ";ipv4=" */ + MAX_IPV6_ADDR_LEN + | 105 | IP_KEY_LEN + MAX_IPV6_ADDR_LEN + |
89 | MAX_MECH_STR_LEN + | 106 | MAX_MECH_STR_LEN + |
90 | 7 /* len of ";uid=0x" */ + (sizeof(uid_t) * 2) + | 107 | UID_KEY_LEN + (sizeof(uid_t) * 2) + |
91 | 6 /* len of ";user=" */ + strlen(sesInfo->userName) + 1; | 108 | USER_KEY_LEN + strlen(sesInfo->userName) + 1; |
92 | 109 | ||
93 | spnego_key = ERR_PTR(-ENOMEM); | 110 | spnego_key = ERR_PTR(-ENOMEM); |
94 | description = kzalloc(desc_len, GFP_KERNEL); | 111 | description = kzalloc(desc_len, GFP_KERNEL); |
@@ -114,9 +131,11 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) | |||
114 | 131 | ||
115 | dp = description + strlen(description); | 132 | dp = description + strlen(description); |
116 | 133 | ||
117 | /* for now, only sec=krb5 is valid */ | 134 | /* for now, only sec=krb5 and sec=mskrb5 are valid */ |
118 | if (server->secType == Kerberos) | 135 | if (server->secType == Kerberos) |
119 | sprintf(dp, ";sec=krb5"); | 136 | sprintf(dp, ";sec=krb5"); |
137 | else if (server->secType == MSKerberos) | ||
138 | sprintf(dp, ";sec=mskrb5"); | ||
120 | else | 139 | else |
121 | goto out; | 140 | goto out; |
122 | 141 | ||
diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h index 05a34b17a1ab..e4041ec4d712 100644 --- a/fs/cifs/cifs_spnego.h +++ b/fs/cifs/cifs_spnego.h | |||
@@ -23,7 +23,7 @@ | |||
23 | #ifndef _CIFS_SPNEGO_H | 23 | #ifndef _CIFS_SPNEGO_H |
24 | #define _CIFS_SPNEGO_H | 24 | #define _CIFS_SPNEGO_H |
25 | 25 | ||
26 | #define CIFS_SPNEGO_UPCALL_VERSION 1 | 26 | #define CIFS_SPNEGO_UPCALL_VERSION 2 |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * The version field should always be set to CIFS_SPNEGO_UPCALL_VERSION. | 29 | * The version field should always be set to CIFS_SPNEGO_UPCALL_VERSION. |
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 83fd40dc1ef0..bd5f13d38450 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
@@ -294,6 +294,7 @@ void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key) | |||
294 | 294 | ||
295 | if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) | 295 | if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) |
296 | if (extended_security & CIFSSEC_MAY_PLNTXT) { | 296 | if (extended_security & CIFSSEC_MAY_PLNTXT) { |
297 | memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE); | ||
297 | memcpy(lnm_session_key, password_with_pad, | 298 | memcpy(lnm_session_key, password_with_pad, |
298 | CIFS_ENCPWD_SIZE); | 299 | CIFS_ENCPWD_SIZE); |
299 | return; | 300 | return; |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 135c965c4137..f7b4a5cd837b 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -41,7 +41,7 @@ extern int cifs_create(struct inode *, struct dentry *, int, | |||
41 | struct nameidata *); | 41 | struct nameidata *); |
42 | extern struct dentry *cifs_lookup(struct inode *, struct dentry *, | 42 | extern struct dentry *cifs_lookup(struct inode *, struct dentry *, |
43 | struct nameidata *); | 43 | struct nameidata *); |
44 | extern int cifs_unlink(struct inode *, struct dentry *); | 44 | extern int cifs_unlink(struct inode *dir, struct dentry *dentry); |
45 | extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); | 45 | extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); |
46 | extern int cifs_mknod(struct inode *, struct dentry *, int, dev_t); | 46 | extern int cifs_mknod(struct inode *, struct dentry *, int, dev_t); |
47 | extern int cifs_mkdir(struct inode *, struct dentry *, int); | 47 | extern int cifs_mkdir(struct inode *, struct dentry *, int); |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 7e1cf262effe..0d22479d99b7 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -80,7 +80,8 @@ enum securityEnum { | |||
80 | NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ | 80 | NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ |
81 | RawNTLMSSP, /* NTLMSSP without SPNEGO */ | 81 | RawNTLMSSP, /* NTLMSSP without SPNEGO */ |
82 | NTLMSSP, /* NTLMSSP via SPNEGO */ | 82 | NTLMSSP, /* NTLMSSP via SPNEGO */ |
83 | Kerberos /* Kerberos via SPNEGO */ | 83 | Kerberos, /* Kerberos via SPNEGO */ |
84 | MSKerberos, /* MS Kerberos via SPNEGO */ | ||
84 | }; | 85 | }; |
85 | 86 | ||
86 | enum protocolEnum { | 87 | enum protocolEnum { |
@@ -308,6 +309,7 @@ struct cifs_search_info { | |||
308 | __u32 resume_key; | 309 | __u32 resume_key; |
309 | char *ntwrk_buf_start; | 310 | char *ntwrk_buf_start; |
310 | char *srch_entries_start; | 311 | char *srch_entries_start; |
312 | char *last_entry; | ||
311 | char *presume_name; | 313 | char *presume_name; |
312 | unsigned int resume_name_len; | 314 | unsigned int resume_name_len; |
313 | bool endOfSearch:1; | 315 | bool endOfSearch:1; |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index a729d083e6f4..0cff7fe986e8 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -179,6 +179,8 @@ extern int CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, | |||
179 | extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, | 179 | extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, |
180 | const FILE_BASIC_INFO *data, __u16 fid, | 180 | const FILE_BASIC_INFO *data, __u16 fid, |
181 | __u32 pid_of_opener); | 181 | __u32 pid_of_opener); |
182 | extern int CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon, | ||
183 | bool delete_file, __u16 fid, __u32 pid_of_opener); | ||
182 | #if 0 | 184 | #if 0 |
183 | extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, | 185 | extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, |
184 | char *fileName, __u16 dos_attributes, | 186 | char *fileName, __u16 dos_attributes, |
@@ -229,7 +231,7 @@ extern int CIFSSMBRename(const int xid, struct cifsTconInfo *tcon, | |||
229 | const struct nls_table *nls_codepage, | 231 | const struct nls_table *nls_codepage, |
230 | int remap_special_chars); | 232 | int remap_special_chars); |
231 | extern int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, | 233 | extern int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, |
232 | int netfid, char *target_name, | 234 | int netfid, const char *target_name, |
233 | const struct nls_table *nls_codepage, | 235 | const struct nls_table *nls_codepage, |
234 | int remap_special_chars); | 236 | int remap_special_chars); |
235 | extern int CIFSCreateHardLink(const int xid, | 237 | extern int CIFSCreateHardLink(const int xid, |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 994de7c90474..6f4ffe15d68d 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -2017,7 +2017,7 @@ renameRetry: | |||
2017 | } | 2017 | } |
2018 | 2018 | ||
2019 | int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, | 2019 | int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, |
2020 | int netfid, char *target_name, | 2020 | int netfid, const char *target_name, |
2021 | const struct nls_table *nls_codepage, int remap) | 2021 | const struct nls_table *nls_codepage, int remap) |
2022 | { | 2022 | { |
2023 | struct smb_com_transaction2_sfi_req *pSMB = NULL; | 2023 | struct smb_com_transaction2_sfi_req *pSMB = NULL; |
@@ -2071,7 +2071,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, | |||
2071 | remap); | 2071 | remap); |
2072 | } | 2072 | } |
2073 | rename_info->target_name_len = cpu_to_le32(2 * len_of_str); | 2073 | rename_info->target_name_len = cpu_to_le32(2 * len_of_str); |
2074 | count = 12 /* sizeof(struct set_file_rename) */ + (2 * len_of_str) + 2; | 2074 | count = 12 /* sizeof(struct set_file_rename) */ + (2 * len_of_str); |
2075 | byte_count += count; | 2075 | byte_count += count; |
2076 | pSMB->DataCount = cpu_to_le16(count); | 2076 | pSMB->DataCount = cpu_to_le16(count); |
2077 | pSMB->TotalDataCount = pSMB->DataCount; | 2077 | pSMB->TotalDataCount = pSMB->DataCount; |
@@ -3614,6 +3614,8 @@ findFirstRetry: | |||
3614 | /* BB remember to free buffer if error BB */ | 3614 | /* BB remember to free buffer if error BB */ |
3615 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); | 3615 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); |
3616 | if (rc == 0) { | 3616 | if (rc == 0) { |
3617 | unsigned int lnoff; | ||
3618 | |||
3617 | if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) | 3619 | if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) |
3618 | psrch_inf->unicode = true; | 3620 | psrch_inf->unicode = true; |
3619 | else | 3621 | else |
@@ -3636,6 +3638,17 @@ findFirstRetry: | |||
3636 | le16_to_cpu(parms->SearchCount); | 3638 | le16_to_cpu(parms->SearchCount); |
3637 | psrch_inf->index_of_last_entry = 2 /* skip . and .. */ + | 3639 | psrch_inf->index_of_last_entry = 2 /* skip . and .. */ + |
3638 | psrch_inf->entries_in_buffer; | 3640 | psrch_inf->entries_in_buffer; |
3641 | lnoff = le16_to_cpu(parms->LastNameOffset); | ||
3642 | if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE < | ||
3643 | lnoff) { | ||
3644 | cERROR(1, ("ignoring corrupt resume name")); | ||
3645 | psrch_inf->last_entry = NULL; | ||
3646 | return rc; | ||
3647 | } | ||
3648 | |||
3649 | psrch_inf->last_entry = psrch_inf->srch_entries_start + | ||
3650 | lnoff; | ||
3651 | |||
3639 | *pnetfid = parms->SearchHandle; | 3652 | *pnetfid = parms->SearchHandle; |
3640 | } else { | 3653 | } else { |
3641 | cifs_buf_release(pSMB); | 3654 | cifs_buf_release(pSMB); |
@@ -3725,6 +3738,8 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, | |||
3725 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); | 3738 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); |
3726 | 3739 | ||
3727 | if (rc == 0) { | 3740 | if (rc == 0) { |
3741 | unsigned int lnoff; | ||
3742 | |||
3728 | /* BB fixme add lock for file (srch_info) struct here */ | 3743 | /* BB fixme add lock for file (srch_info) struct here */ |
3729 | if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) | 3744 | if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) |
3730 | psrch_inf->unicode = true; | 3745 | psrch_inf->unicode = true; |
@@ -3751,6 +3766,16 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, | |||
3751 | le16_to_cpu(parms->SearchCount); | 3766 | le16_to_cpu(parms->SearchCount); |
3752 | psrch_inf->index_of_last_entry += | 3767 | psrch_inf->index_of_last_entry += |
3753 | psrch_inf->entries_in_buffer; | 3768 | psrch_inf->entries_in_buffer; |
3769 | lnoff = le16_to_cpu(parms->LastNameOffset); | ||
3770 | if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE < | ||
3771 | lnoff) { | ||
3772 | cERROR(1, ("ignoring corrupt resume name")); | ||
3773 | psrch_inf->last_entry = NULL; | ||
3774 | return rc; | ||
3775 | } else | ||
3776 | psrch_inf->last_entry = | ||
3777 | psrch_inf->srch_entries_start + lnoff; | ||
3778 | |||
3754 | /* cFYI(1,("fnxt2 entries in buf %d index_of_last %d", | 3779 | /* cFYI(1,("fnxt2 entries in buf %d index_of_last %d", |
3755 | psrch_inf->entries_in_buffer, psrch_inf->index_of_last_entry)); */ | 3780 | psrch_inf->entries_in_buffer, psrch_inf->index_of_last_entry)); */ |
3756 | 3781 | ||
@@ -4876,6 +4901,61 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, | |||
4876 | return rc; | 4901 | return rc; |
4877 | } | 4902 | } |
4878 | 4903 | ||
4904 | int | ||
4905 | CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon, | ||
4906 | bool delete_file, __u16 fid, __u32 pid_of_opener) | ||
4907 | { | ||
4908 | struct smb_com_transaction2_sfi_req *pSMB = NULL; | ||
4909 | char *data_offset; | ||
4910 | int rc = 0; | ||
4911 | __u16 params, param_offset, offset, byte_count, count; | ||
4912 | |||
4913 | cFYI(1, ("Set File Disposition (via SetFileInfo)")); | ||
4914 | rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); | ||
4915 | |||
4916 | if (rc) | ||
4917 | return rc; | ||
4918 | |||
4919 | pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); | ||
4920 | pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16)); | ||
4921 | |||
4922 | params = 6; | ||
4923 | pSMB->MaxSetupCount = 0; | ||
4924 | pSMB->Reserved = 0; | ||
4925 | pSMB->Flags = 0; | ||
4926 | pSMB->Timeout = 0; | ||
4927 | pSMB->Reserved2 = 0; | ||
4928 | param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; | ||
4929 | offset = param_offset + params; | ||
4930 | |||
4931 | data_offset = (char *) (&pSMB->hdr.Protocol) + offset; | ||
4932 | |||
4933 | count = 1; | ||
4934 | pSMB->MaxParameterCount = cpu_to_le16(2); | ||
4935 | /* BB find max SMB PDU from sess */ | ||
4936 | pSMB->MaxDataCount = cpu_to_le16(1000); | ||
4937 | pSMB->SetupCount = 1; | ||
4938 | pSMB->Reserved3 = 0; | ||
4939 | pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION); | ||
4940 | byte_count = 3 /* pad */ + params + count; | ||
4941 | pSMB->DataCount = cpu_to_le16(count); | ||
4942 | pSMB->ParameterCount = cpu_to_le16(params); | ||
4943 | pSMB->TotalDataCount = pSMB->DataCount; | ||
4944 | pSMB->TotalParameterCount = pSMB->ParameterCount; | ||
4945 | pSMB->ParameterOffset = cpu_to_le16(param_offset); | ||
4946 | pSMB->DataOffset = cpu_to_le16(offset); | ||
4947 | pSMB->Fid = fid; | ||
4948 | pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO); | ||
4949 | pSMB->Reserved4 = 0; | ||
4950 | pSMB->hdr.smb_buf_length += byte_count; | ||
4951 | pSMB->ByteCount = cpu_to_le16(byte_count); | ||
4952 | *data_offset = delete_file ? 1 : 0; | ||
4953 | rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); | ||
4954 | if (rc) | ||
4955 | cFYI(1, ("Send error in SetFileDisposition = %d", rc)); | ||
4956 | |||
4957 | return rc; | ||
4958 | } | ||
4879 | 4959 | ||
4880 | int | 4960 | int |
4881 | CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, | 4961 | CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0711db65afe8..4c13bcdb92a5 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -3598,19 +3598,21 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, | |||
3598 | char ntlm_session_key[CIFS_SESS_KEY_SIZE]; | 3598 | char ntlm_session_key[CIFS_SESS_KEY_SIZE]; |
3599 | bool ntlmv2_flag = false; | 3599 | bool ntlmv2_flag = false; |
3600 | int first_time = 0; | 3600 | int first_time = 0; |
3601 | struct TCP_Server_Info *server = pSesInfo->server; | ||
3601 | 3602 | ||
3602 | /* what if server changes its buffer size after dropping the session? */ | 3603 | /* what if server changes its buffer size after dropping the session? */ |
3603 | if (pSesInfo->server->maxBuf == 0) /* no need to send on reconnect */ { | 3604 | if (server->maxBuf == 0) /* no need to send on reconnect */ { |
3604 | rc = CIFSSMBNegotiate(xid, pSesInfo); | 3605 | rc = CIFSSMBNegotiate(xid, pSesInfo); |
3605 | if (rc == -EAGAIN) /* retry only once on 1st time connection */ { | 3606 | if (rc == -EAGAIN) { |
3607 | /* retry only once on 1st time connection */ | ||
3606 | rc = CIFSSMBNegotiate(xid, pSesInfo); | 3608 | rc = CIFSSMBNegotiate(xid, pSesInfo); |
3607 | if (rc == -EAGAIN) | 3609 | if (rc == -EAGAIN) |
3608 | rc = -EHOSTDOWN; | 3610 | rc = -EHOSTDOWN; |
3609 | } | 3611 | } |
3610 | if (rc == 0) { | 3612 | if (rc == 0) { |
3611 | spin_lock(&GlobalMid_Lock); | 3613 | spin_lock(&GlobalMid_Lock); |
3612 | if (pSesInfo->server->tcpStatus != CifsExiting) | 3614 | if (server->tcpStatus != CifsExiting) |
3613 | pSesInfo->server->tcpStatus = CifsGood; | 3615 | server->tcpStatus = CifsGood; |
3614 | else | 3616 | else |
3615 | rc = -EHOSTDOWN; | 3617 | rc = -EHOSTDOWN; |
3616 | spin_unlock(&GlobalMid_Lock); | 3618 | spin_unlock(&GlobalMid_Lock); |
@@ -3623,23 +3625,22 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, | |||
3623 | goto ss_err_exit; | 3625 | goto ss_err_exit; |
3624 | 3626 | ||
3625 | pSesInfo->flags = 0; | 3627 | pSesInfo->flags = 0; |
3626 | pSesInfo->capabilities = pSesInfo->server->capabilities; | 3628 | pSesInfo->capabilities = server->capabilities; |
3627 | if (linuxExtEnabled == 0) | 3629 | if (linuxExtEnabled == 0) |
3628 | pSesInfo->capabilities &= (~CAP_UNIX); | 3630 | pSesInfo->capabilities &= (~CAP_UNIX); |
3629 | /* pSesInfo->sequence_number = 0;*/ | 3631 | /* pSesInfo->sequence_number = 0;*/ |
3630 | cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", | 3632 | cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", |
3631 | pSesInfo->server->secMode, | 3633 | server->secMode, server->capabilities, server->timeAdj)); |
3632 | pSesInfo->server->capabilities, | 3634 | |
3633 | pSesInfo->server->timeAdj)); | ||
3634 | if (experimEnabled < 2) | 3635 | if (experimEnabled < 2) |
3635 | rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info); | 3636 | rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info); |
3636 | else if (extended_security | 3637 | else if (extended_security |
3637 | && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) | 3638 | && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) |
3638 | && (pSesInfo->server->secType == NTLMSSP)) { | 3639 | && (server->secType == NTLMSSP)) { |
3639 | rc = -EOPNOTSUPP; | 3640 | rc = -EOPNOTSUPP; |
3640 | } else if (extended_security | 3641 | } else if (extended_security |
3641 | && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) | 3642 | && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) |
3642 | && (pSesInfo->server->secType == RawNTLMSSP)) { | 3643 | && (server->secType == RawNTLMSSP)) { |
3643 | cFYI(1, ("NTLMSSP sesssetup")); | 3644 | cFYI(1, ("NTLMSSP sesssetup")); |
3644 | rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag, | 3645 | rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag, |
3645 | nls_info); | 3646 | nls_info); |
@@ -3668,12 +3669,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, | |||
3668 | 3669 | ||
3669 | } else { | 3670 | } else { |
3670 | SMBNTencrypt(pSesInfo->password, | 3671 | SMBNTencrypt(pSesInfo->password, |
3671 | pSesInfo->server->cryptKey, | 3672 | server->cryptKey, |
3672 | ntlm_session_key); | 3673 | ntlm_session_key); |
3673 | 3674 | ||
3674 | if (first_time) | 3675 | if (first_time) |
3675 | cifs_calculate_mac_key( | 3676 | cifs_calculate_mac_key( |
3676 | &pSesInfo->server->mac_signing_key, | 3677 | &server->mac_signing_key, |
3677 | ntlm_session_key, | 3678 | ntlm_session_key, |
3678 | pSesInfo->password); | 3679 | pSesInfo->password); |
3679 | } | 3680 | } |
@@ -3686,13 +3687,13 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, | |||
3686 | nls_info); | 3687 | nls_info); |
3687 | } | 3688 | } |
3688 | } else { /* old style NTLM 0.12 session setup */ | 3689 | } else { /* old style NTLM 0.12 session setup */ |
3689 | SMBNTencrypt(pSesInfo->password, pSesInfo->server->cryptKey, | 3690 | SMBNTencrypt(pSesInfo->password, server->cryptKey, |
3690 | ntlm_session_key); | 3691 | ntlm_session_key); |
3691 | 3692 | ||
3692 | if (first_time) | 3693 | if (first_time) |
3693 | cifs_calculate_mac_key( | 3694 | cifs_calculate_mac_key(&server->mac_signing_key, |
3694 | &pSesInfo->server->mac_signing_key, | 3695 | ntlm_session_key, |
3695 | ntlm_session_key, pSesInfo->password); | 3696 | pSesInfo->password); |
3696 | 3697 | ||
3697 | rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info); | 3698 | rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info); |
3698 | } | 3699 | } |
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index f730ef35499e..1e0c1bd8f2e4 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c | |||
@@ -29,38 +29,13 @@ | |||
29 | #include "cifsproto.h" | 29 | #include "cifsproto.h" |
30 | #include "cifs_debug.h" | 30 | #include "cifs_debug.h" |
31 | 31 | ||
32 | static int dns_resolver_instantiate(struct key *key, const void *data, | ||
33 | size_t datalen) | ||
34 | { | ||
35 | int rc = 0; | ||
36 | char *ip; | ||
37 | |||
38 | ip = kmalloc(datalen+1, GFP_KERNEL); | ||
39 | if (!ip) | ||
40 | return -ENOMEM; | ||
41 | |||
42 | memcpy(ip, data, datalen); | ||
43 | ip[datalen] = '\0'; | ||
44 | |||
45 | rcu_assign_pointer(key->payload.data, ip); | ||
46 | |||
47 | return rc; | ||
48 | } | ||
49 | |||
50 | struct key_type key_type_dns_resolver = { | ||
51 | .name = "dns_resolver", | ||
52 | .def_datalen = sizeof(struct in_addr), | ||
53 | .describe = user_describe, | ||
54 | .instantiate = dns_resolver_instantiate, | ||
55 | .match = user_match, | ||
56 | }; | ||
57 | |||
58 | /* Checks if supplied name is IP address | 32 | /* Checks if supplied name is IP address |
59 | * returns: | 33 | * returns: |
60 | * 1 - name is IP | 34 | * 1 - name is IP |
61 | * 0 - name is not IP | 35 | * 0 - name is not IP |
62 | */ | 36 | */ |
63 | static int is_ip(const char *name) | 37 | static int |
38 | is_ip(const char *name) | ||
64 | { | 39 | { |
65 | int rc; | 40 | int rc; |
66 | struct sockaddr_in sin_server; | 41 | struct sockaddr_in sin_server; |
@@ -82,6 +57,47 @@ static int is_ip(const char *name) | |||
82 | return 0; | 57 | return 0; |
83 | } | 58 | } |
84 | 59 | ||
60 | static int | ||
61 | dns_resolver_instantiate(struct key *key, const void *data, | ||
62 | size_t datalen) | ||
63 | { | ||
64 | int rc = 0; | ||
65 | char *ip; | ||
66 | |||
67 | ip = kmalloc(datalen + 1, GFP_KERNEL); | ||
68 | if (!ip) | ||
69 | return -ENOMEM; | ||
70 | |||
71 | memcpy(ip, data, datalen); | ||
72 | ip[datalen] = '\0'; | ||
73 | |||
74 | /* make sure this looks like an address */ | ||
75 | if (!is_ip((const char *) ip)) { | ||
76 | kfree(ip); | ||
77 | return -EINVAL; | ||
78 | } | ||
79 | |||
80 | key->type_data.x[0] = datalen; | ||
81 | rcu_assign_pointer(key->payload.data, ip); | ||
82 | |||
83 | return rc; | ||
84 | } | ||
85 | |||
86 | static void | ||
87 | dns_resolver_destroy(struct key *key) | ||
88 | { | ||
89 | kfree(key->payload.data); | ||
90 | } | ||
91 | |||
92 | struct key_type key_type_dns_resolver = { | ||
93 | .name = "dns_resolver", | ||
94 | .def_datalen = sizeof(struct in_addr), | ||
95 | .describe = user_describe, | ||
96 | .instantiate = dns_resolver_instantiate, | ||
97 | .destroy = dns_resolver_destroy, | ||
98 | .match = user_match, | ||
99 | }; | ||
100 | |||
85 | /* Resolves server name to ip address. | 101 | /* Resolves server name to ip address. |
86 | * input: | 102 | * input: |
87 | * unc - server UNC | 103 | * unc - server UNC |
@@ -133,6 +149,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) | |||
133 | 149 | ||
134 | rkey = request_key(&key_type_dns_resolver, name, ""); | 150 | rkey = request_key(&key_type_dns_resolver, name, ""); |
135 | if (!IS_ERR(rkey)) { | 151 | if (!IS_ERR(rkey)) { |
152 | len = rkey->type_data.x[0]; | ||
136 | data = rkey->payload.data; | 153 | data = rkey->payload.data; |
137 | } else { | 154 | } else { |
138 | cERROR(1, ("%s: unable to resolve: %s", __func__, name)); | 155 | cERROR(1, ("%s: unable to resolve: %s", __func__, name)); |
@@ -141,11 +158,9 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) | |||
141 | 158 | ||
142 | skip_upcall: | 159 | skip_upcall: |
143 | if (data) { | 160 | if (data) { |
144 | len = strlen(data); | 161 | *ip_addr = kmalloc(len + 1, GFP_KERNEL); |
145 | *ip_addr = kmalloc(len+1, GFP_KERNEL); | ||
146 | if (*ip_addr) { | 162 | if (*ip_addr) { |
147 | memcpy(*ip_addr, data, len); | 163 | memcpy(*ip_addr, data, len + 1); |
148 | (*ip_addr)[len] = '\0'; | ||
149 | if (!IS_ERR(rkey)) | 164 | if (!IS_ERR(rkey)) |
150 | cFYI(1, ("%s: resolved: %s to %s", __func__, | 165 | cFYI(1, ("%s: resolved: %s to %s", __func__, |
151 | name, | 166 | name, |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ff14d14903a0..c4a8a0605125 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -107,7 +107,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, | |||
107 | 107 | ||
108 | /* want handles we can use to read with first | 108 | /* want handles we can use to read with first |
109 | in the list so we do not have to walk the | 109 | in the list so we do not have to walk the |
110 | list to search for one in prepare_write */ | 110 | list to search for one in write_begin */ |
111 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) { | 111 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) { |
112 | list_add_tail(&pCifsFile->flist, | 112 | list_add_tail(&pCifsFile->flist, |
113 | &pCifsInode->openFileList); | 113 | &pCifsInode->openFileList); |
@@ -833,6 +833,10 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
833 | return -EBADF; | 833 | return -EBADF; |
834 | open_file = (struct cifsFileInfo *) file->private_data; | 834 | open_file = (struct cifsFileInfo *) file->private_data; |
835 | 835 | ||
836 | rc = generic_write_checks(file, poffset, &write_size, 0); | ||
837 | if (rc) | ||
838 | return rc; | ||
839 | |||
836 | xid = GetXid(); | 840 | xid = GetXid(); |
837 | 841 | ||
838 | if (*poffset > file->f_path.dentry->d_inode->i_size) | 842 | if (*poffset > file->f_path.dentry->d_inode->i_size) |
@@ -911,7 +915,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
911 | } | 915 | } |
912 | 916 | ||
913 | static ssize_t cifs_write(struct file *file, const char *write_data, | 917 | static ssize_t cifs_write(struct file *file, const char *write_data, |
914 | size_t write_size, loff_t *poffset) | 918 | size_t write_size, loff_t *poffset) |
915 | { | 919 | { |
916 | int rc = 0; | 920 | int rc = 0; |
917 | unsigned int bytes_written = 0; | 921 | unsigned int bytes_written = 0; |
@@ -1061,6 +1065,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode) | |||
1061 | struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode) | 1065 | struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode) |
1062 | { | 1066 | { |
1063 | struct cifsFileInfo *open_file; | 1067 | struct cifsFileInfo *open_file; |
1068 | bool any_available = false; | ||
1064 | int rc; | 1069 | int rc; |
1065 | 1070 | ||
1066 | /* Having a null inode here (because mapping->host was set to zero by | 1071 | /* Having a null inode here (because mapping->host was set to zero by |
@@ -1076,8 +1081,10 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode) | |||
1076 | read_lock(&GlobalSMBSeslock); | 1081 | read_lock(&GlobalSMBSeslock); |
1077 | refind_writable: | 1082 | refind_writable: |
1078 | list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { | 1083 | list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { |
1079 | if (open_file->closePend) | 1084 | if (open_file->closePend || |
1085 | (!any_available && open_file->pid != current->tgid)) | ||
1080 | continue; | 1086 | continue; |
1087 | |||
1081 | if (open_file->pfile && | 1088 | if (open_file->pfile && |
1082 | ((open_file->pfile->f_flags & O_RDWR) || | 1089 | ((open_file->pfile->f_flags & O_RDWR) || |
1083 | (open_file->pfile->f_flags & O_WRONLY))) { | 1090 | (open_file->pfile->f_flags & O_WRONLY))) { |
@@ -1127,6 +1134,11 @@ refind_writable: | |||
1127 | of the loop here. */ | 1134 | of the loop here. */ |
1128 | } | 1135 | } |
1129 | } | 1136 | } |
1137 | /* couldn't find useable FH with same pid, try any available */ | ||
1138 | if (!any_available) { | ||
1139 | any_available = true; | ||
1140 | goto refind_writable; | ||
1141 | } | ||
1130 | read_unlock(&GlobalSMBSeslock); | 1142 | read_unlock(&GlobalSMBSeslock); |
1131 | return NULL; | 1143 | return NULL; |
1132 | } | 1144 | } |
@@ -1443,49 +1455,52 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc) | |||
1443 | return rc; | 1455 | return rc; |
1444 | } | 1456 | } |
1445 | 1457 | ||
1446 | static int cifs_commit_write(struct file *file, struct page *page, | 1458 | static int cifs_write_end(struct file *file, struct address_space *mapping, |
1447 | unsigned offset, unsigned to) | 1459 | loff_t pos, unsigned len, unsigned copied, |
1460 | struct page *page, void *fsdata) | ||
1448 | { | 1461 | { |
1449 | int xid; | 1462 | int rc; |
1450 | int rc = 0; | 1463 | struct inode *inode = mapping->host; |
1451 | struct inode *inode = page->mapping->host; | ||
1452 | loff_t position = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; | ||
1453 | char *page_data; | ||
1454 | 1464 | ||
1455 | xid = GetXid(); | 1465 | cFYI(1, ("write_end for page %p from pos %lld with %d bytes", |
1456 | cFYI(1, ("commit write for page %p up to position %lld for %d", | 1466 | page, pos, copied)); |
1457 | page, position, to)); | 1467 | |
1458 | spin_lock(&inode->i_lock); | 1468 | if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE) |
1459 | if (position > inode->i_size) | 1469 | SetPageUptodate(page); |
1460 | i_size_write(inode, position); | ||
1461 | 1470 | ||
1462 | spin_unlock(&inode->i_lock); | ||
1463 | if (!PageUptodate(page)) { | 1471 | if (!PageUptodate(page)) { |
1464 | position = ((loff_t)page->index << PAGE_CACHE_SHIFT) + offset; | 1472 | char *page_data; |
1465 | /* can not rely on (or let) writepage write this data */ | 1473 | unsigned offset = pos & (PAGE_CACHE_SIZE - 1); |
1466 | if (to < offset) { | 1474 | int xid; |
1467 | cFYI(1, ("Illegal offsets, can not copy from %d to %d", | 1475 | |
1468 | offset, to)); | 1476 | xid = GetXid(); |
1469 | FreeXid(xid); | ||
1470 | return rc; | ||
1471 | } | ||
1472 | /* this is probably better than directly calling | 1477 | /* this is probably better than directly calling |
1473 | partialpage_write since in this function the file handle is | 1478 | partialpage_write since in this function the file handle is |
1474 | known which we might as well leverage */ | 1479 | known which we might as well leverage */ |
1475 | /* BB check if anything else missing out of ppw | 1480 | /* BB check if anything else missing out of ppw |
1476 | such as updating last write time */ | 1481 | such as updating last write time */ |
1477 | page_data = kmap(page); | 1482 | page_data = kmap(page); |
1478 | rc = cifs_write(file, page_data + offset, to-offset, | 1483 | rc = cifs_write(file, page_data + offset, copied, &pos); |
1479 | &position); | 1484 | /* if (rc < 0) should we set writebehind rc? */ |
1480 | if (rc > 0) | ||
1481 | rc = 0; | ||
1482 | /* else if (rc < 0) should we set writebehind rc? */ | ||
1483 | kunmap(page); | 1485 | kunmap(page); |
1486 | |||
1487 | FreeXid(xid); | ||
1484 | } else { | 1488 | } else { |
1489 | rc = copied; | ||
1490 | pos += copied; | ||
1485 | set_page_dirty(page); | 1491 | set_page_dirty(page); |
1486 | } | 1492 | } |
1487 | 1493 | ||
1488 | FreeXid(xid); | 1494 | if (rc > 0) { |
1495 | spin_lock(&inode->i_lock); | ||
1496 | if (pos > inode->i_size) | ||
1497 | i_size_write(inode, pos); | ||
1498 | spin_unlock(&inode->i_lock); | ||
1499 | } | ||
1500 | |||
1501 | unlock_page(page); | ||
1502 | page_cache_release(page); | ||
1503 | |||
1489 | return rc; | 1504 | return rc; |
1490 | } | 1505 | } |
1491 | 1506 | ||
@@ -2031,49 +2046,44 @@ bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file) | |||
2031 | return true; | 2046 | return true; |
2032 | } | 2047 | } |
2033 | 2048 | ||
2034 | static int cifs_prepare_write(struct file *file, struct page *page, | 2049 | static int cifs_write_begin(struct file *file, struct address_space *mapping, |
2035 | unsigned from, unsigned to) | 2050 | loff_t pos, unsigned len, unsigned flags, |
2051 | struct page **pagep, void **fsdata) | ||
2036 | { | 2052 | { |
2037 | int rc = 0; | 2053 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
2038 | loff_t i_size; | 2054 | loff_t offset = pos & (PAGE_CACHE_SIZE - 1); |
2039 | loff_t offset; | 2055 | |
2056 | cFYI(1, ("write_begin from %lld len %d", (long long)pos, len)); | ||
2040 | 2057 | ||
2041 | cFYI(1, ("prepare write for page %p from %d to %d", page, from, to)); | 2058 | *pagep = __grab_cache_page(mapping, index); |
2042 | if (PageUptodate(page)) | 2059 | if (!*pagep) |
2060 | return -ENOMEM; | ||
2061 | |||
2062 | if (PageUptodate(*pagep)) | ||
2043 | return 0; | 2063 | return 0; |
2044 | 2064 | ||
2045 | /* If we are writing a full page it will be up to date, | 2065 | /* If we are writing a full page it will be up to date, |
2046 | no need to read from the server */ | 2066 | no need to read from the server */ |
2047 | if ((to == PAGE_CACHE_SIZE) && (from == 0)) { | 2067 | if (len == PAGE_CACHE_SIZE && flags & AOP_FLAG_UNINTERRUPTIBLE) |
2048 | SetPageUptodate(page); | ||
2049 | return 0; | 2068 | return 0; |
2050 | } | ||
2051 | 2069 | ||
2052 | offset = (loff_t)page->index << PAGE_CACHE_SHIFT; | 2070 | if ((file->f_flags & O_ACCMODE) != O_WRONLY) { |
2053 | i_size = i_size_read(page->mapping->host); | 2071 | int rc; |
2054 | 2072 | ||
2055 | if ((offset >= i_size) || | ||
2056 | ((from == 0) && (offset + to) >= i_size)) { | ||
2057 | /* | ||
2058 | * We don't need to read data beyond the end of the file. | ||
2059 | * zero it, and set the page uptodate | ||
2060 | */ | ||
2061 | simple_prepare_write(file, page, from, to); | ||
2062 | SetPageUptodate(page); | ||
2063 | } else if ((file->f_flags & O_ACCMODE) != O_WRONLY) { | ||
2064 | /* might as well read a page, it is fast enough */ | 2073 | /* might as well read a page, it is fast enough */ |
2065 | rc = cifs_readpage_worker(file, page, &offset); | 2074 | rc = cifs_readpage_worker(file, *pagep, &offset); |
2075 | |||
2076 | /* we do not need to pass errors back | ||
2077 | e.g. if we do not have read access to the file | ||
2078 | because cifs_write_end will attempt synchronous writes | ||
2079 | -- shaggy */ | ||
2066 | } else { | 2080 | } else { |
2067 | /* we could try using another file handle if there is one - | 2081 | /* we could try using another file handle if there is one - |
2068 | but how would we lock it to prevent close of that handle | 2082 | but how would we lock it to prevent close of that handle |
2069 | racing with this read? In any case | 2083 | racing with this read? In any case |
2070 | this will be written out by commit_write so is fine */ | 2084 | this will be written out by write_end so is fine */ |
2071 | } | 2085 | } |
2072 | 2086 | ||
2073 | /* we do not need to pass errors back | ||
2074 | e.g. if we do not have read access to the file | ||
2075 | because cifs_commit_write will do the right thing. -- shaggy */ | ||
2076 | |||
2077 | return 0; | 2087 | return 0; |
2078 | } | 2088 | } |
2079 | 2089 | ||
@@ -2082,8 +2092,8 @@ const struct address_space_operations cifs_addr_ops = { | |||
2082 | .readpages = cifs_readpages, | 2092 | .readpages = cifs_readpages, |
2083 | .writepage = cifs_writepage, | 2093 | .writepage = cifs_writepage, |
2084 | .writepages = cifs_writepages, | 2094 | .writepages = cifs_writepages, |
2085 | .prepare_write = cifs_prepare_write, | 2095 | .write_begin = cifs_write_begin, |
2086 | .commit_write = cifs_commit_write, | 2096 | .write_end = cifs_write_end, |
2087 | .set_page_dirty = __set_page_dirty_nobuffers, | 2097 | .set_page_dirty = __set_page_dirty_nobuffers, |
2088 | /* .sync_page = cifs_sync_page, */ | 2098 | /* .sync_page = cifs_sync_page, */ |
2089 | /* .direct_IO = */ | 2099 | /* .direct_IO = */ |
@@ -2098,8 +2108,8 @@ const struct address_space_operations cifs_addr_ops_smallbuf = { | |||
2098 | .readpage = cifs_readpage, | 2108 | .readpage = cifs_readpage, |
2099 | .writepage = cifs_writepage, | 2109 | .writepage = cifs_writepage, |
2100 | .writepages = cifs_writepages, | 2110 | .writepages = cifs_writepages, |
2101 | .prepare_write = cifs_prepare_write, | 2111 | .write_begin = cifs_write_begin, |
2102 | .commit_write = cifs_commit_write, | 2112 | .write_end = cifs_write_end, |
2103 | .set_page_dirty = __set_page_dirty_nobuffers, | 2113 | .set_page_dirty = __set_page_dirty_nobuffers, |
2104 | /* .sync_page = cifs_sync_page, */ | 2114 | /* .sync_page = cifs_sync_page, */ |
2105 | /* .direct_IO = */ | 2115 | /* .direct_IO = */ |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 848286861c31..a8c833345fc9 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -546,7 +546,8 @@ int cifs_get_inode_info(struct inode **pinode, | |||
546 | if ((inode->i_mode & S_IWUGO) == 0 && | 546 | if ((inode->i_mode & S_IWUGO) == 0 && |
547 | (attr & ATTR_READONLY) == 0) | 547 | (attr & ATTR_READONLY) == 0) |
548 | inode->i_mode |= (S_IWUGO & default_mode); | 548 | inode->i_mode |= (S_IWUGO & default_mode); |
549 | inode->i_mode &= ~S_IFMT; | 549 | |
550 | inode->i_mode &= ~S_IFMT; | ||
550 | } | 551 | } |
551 | /* clear write bits if ATTR_READONLY is set */ | 552 | /* clear write bits if ATTR_READONLY is set */ |
552 | if (attr & ATTR_READONLY) | 553 | if (attr & ATTR_READONLY) |
@@ -664,40 +665,201 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino) | |||
664 | return inode; | 665 | return inode; |
665 | } | 666 | } |
666 | 667 | ||
667 | int cifs_unlink(struct inode *inode, struct dentry *direntry) | 668 | static int |
669 | cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid, | ||
670 | char *full_path, __u32 dosattr) | ||
671 | { | ||
672 | int rc; | ||
673 | int oplock = 0; | ||
674 | __u16 netfid; | ||
675 | __u32 netpid; | ||
676 | bool set_time = false; | ||
677 | struct cifsFileInfo *open_file; | ||
678 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | ||
679 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
680 | struct cifsTconInfo *pTcon = cifs_sb->tcon; | ||
681 | FILE_BASIC_INFO info_buf; | ||
682 | |||
683 | if (attrs->ia_valid & ATTR_ATIME) { | ||
684 | set_time = true; | ||
685 | info_buf.LastAccessTime = | ||
686 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); | ||
687 | } else | ||
688 | info_buf.LastAccessTime = 0; | ||
689 | |||
690 | if (attrs->ia_valid & ATTR_MTIME) { | ||
691 | set_time = true; | ||
692 | info_buf.LastWriteTime = | ||
693 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime)); | ||
694 | } else | ||
695 | info_buf.LastWriteTime = 0; | ||
696 | |||
697 | /* | ||
698 | * Samba throws this field away, but windows may actually use it. | ||
699 | * Do not set ctime unless other time stamps are changed explicitly | ||
700 | * (i.e. by utimes()) since we would then have a mix of client and | ||
701 | * server times. | ||
702 | */ | ||
703 | if (set_time && (attrs->ia_valid & ATTR_CTIME)) { | ||
704 | cFYI(1, ("CIFS - CTIME changed")); | ||
705 | info_buf.ChangeTime = | ||
706 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); | ||
707 | } else | ||
708 | info_buf.ChangeTime = 0; | ||
709 | |||
710 | info_buf.CreationTime = 0; /* don't change */ | ||
711 | info_buf.Attributes = cpu_to_le32(dosattr); | ||
712 | |||
713 | /* | ||
714 | * If the file is already open for write, just use that fileid | ||
715 | */ | ||
716 | open_file = find_writable_file(cifsInode); | ||
717 | if (open_file) { | ||
718 | netfid = open_file->netfid; | ||
719 | netpid = open_file->pid; | ||
720 | goto set_via_filehandle; | ||
721 | } | ||
722 | |||
723 | /* | ||
724 | * NT4 apparently returns success on this call, but it doesn't | ||
725 | * really work. | ||
726 | */ | ||
727 | if (!(pTcon->ses->flags & CIFS_SES_NT4)) { | ||
728 | rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, | ||
729 | &info_buf, cifs_sb->local_nls, | ||
730 | cifs_sb->mnt_cifs_flags & | ||
731 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
732 | if (rc == 0) { | ||
733 | cifsInode->cifsAttrs = dosattr; | ||
734 | goto out; | ||
735 | } else if (rc != -EOPNOTSUPP && rc != -EINVAL) | ||
736 | goto out; | ||
737 | } | ||
738 | |||
739 | cFYI(1, ("calling SetFileInfo since SetPathInfo for " | ||
740 | "times not supported by this server")); | ||
741 | rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, | ||
742 | SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, | ||
743 | CREATE_NOT_DIR, &netfid, &oplock, | ||
744 | NULL, cifs_sb->local_nls, | ||
745 | cifs_sb->mnt_cifs_flags & | ||
746 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
747 | |||
748 | if (rc != 0) { | ||
749 | if (rc == -EIO) | ||
750 | rc = -EINVAL; | ||
751 | goto out; | ||
752 | } | ||
753 | |||
754 | netpid = current->tgid; | ||
755 | |||
756 | set_via_filehandle: | ||
757 | rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid); | ||
758 | if (!rc) | ||
759 | cifsInode->cifsAttrs = dosattr; | ||
760 | |||
761 | if (open_file == NULL) | ||
762 | CIFSSMBClose(xid, pTcon, netfid); | ||
763 | else | ||
764 | atomic_dec(&open_file->wrtPending); | ||
765 | out: | ||
766 | return rc; | ||
767 | } | ||
768 | |||
769 | /* | ||
770 | * open the given file (if it isn't already), set the DELETE_ON_CLOSE bit | ||
771 | * and rename it to a random name that hopefully won't conflict with | ||
772 | * anything else. | ||
773 | */ | ||
774 | static int | ||
775 | cifs_rename_pending_delete(char *full_path, struct inode *inode, int xid) | ||
776 | { | ||
777 | int oplock = 0; | ||
778 | int rc; | ||
779 | __u16 netfid; | ||
780 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | ||
781 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
782 | struct cifsTconInfo *tcon = cifs_sb->tcon; | ||
783 | __u32 dosattr; | ||
784 | FILE_BASIC_INFO *info_buf; | ||
785 | |||
786 | rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, | ||
787 | DELETE|FILE_WRITE_ATTRIBUTES, | ||
788 | CREATE_NOT_DIR|CREATE_DELETE_ON_CLOSE, | ||
789 | &netfid, &oplock, NULL, cifs_sb->local_nls, | ||
790 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
791 | if (rc != 0) | ||
792 | goto out; | ||
793 | |||
794 | /* set ATTR_HIDDEN and clear ATTR_READONLY */ | ||
795 | cifsInode = CIFS_I(inode); | ||
796 | dosattr = cifsInode->cifsAttrs & ~ATTR_READONLY; | ||
797 | if (dosattr == 0) | ||
798 | dosattr |= ATTR_NORMAL; | ||
799 | dosattr |= ATTR_HIDDEN; | ||
800 | |||
801 | info_buf = kzalloc(sizeof(*info_buf), GFP_KERNEL); | ||
802 | if (info_buf == NULL) { | ||
803 | rc = -ENOMEM; | ||
804 | goto out_close; | ||
805 | } | ||
806 | info_buf->Attributes = cpu_to_le32(dosattr); | ||
807 | rc = CIFSSMBSetFileInfo(xid, tcon, info_buf, netfid, current->tgid); | ||
808 | kfree(info_buf); | ||
809 | if (rc != 0) | ||
810 | goto out_close; | ||
811 | cifsInode->cifsAttrs = dosattr; | ||
812 | |||
813 | /* silly-rename the file */ | ||
814 | CIFSSMBRenameOpenFile(xid, tcon, netfid, NULL, cifs_sb->local_nls, | ||
815 | cifs_sb->mnt_cifs_flags & | ||
816 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
817 | |||
818 | /* set DELETE_ON_CLOSE */ | ||
819 | rc = CIFSSMBSetFileDisposition(xid, tcon, true, netfid, current->tgid); | ||
820 | |||
821 | /* | ||
822 | * some samba versions return -ENOENT when we try to set the file | ||
823 | * disposition here. Likely a samba bug, but work around it for now | ||
824 | */ | ||
825 | if (rc == -ENOENT) | ||
826 | rc = 0; | ||
827 | |||
828 | out_close: | ||
829 | CIFSSMBClose(xid, tcon, netfid); | ||
830 | out: | ||
831 | return rc; | ||
832 | } | ||
833 | |||
834 | int cifs_unlink(struct inode *dir, struct dentry *dentry) | ||
668 | { | 835 | { |
669 | int rc = 0; | 836 | int rc = 0; |
670 | int xid; | 837 | int xid; |
671 | struct cifs_sb_info *cifs_sb; | ||
672 | struct cifsTconInfo *pTcon; | ||
673 | char *full_path = NULL; | 838 | char *full_path = NULL; |
674 | struct cifsInodeInfo *cifsInode; | 839 | struct inode *inode = dentry->d_inode; |
675 | FILE_BASIC_INFO *pinfo_buf; | 840 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); |
841 | struct super_block *sb = dir->i_sb; | ||
842 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | ||
843 | struct cifsTconInfo *tcon = cifs_sb->tcon; | ||
844 | struct iattr *attrs = NULL; | ||
845 | __u32 dosattr = 0, origattr = 0; | ||
676 | 846 | ||
677 | cFYI(1, ("cifs_unlink, inode = 0x%p", inode)); | 847 | cFYI(1, ("cifs_unlink, dir=0x%p, dentry=0x%p", dir, dentry)); |
678 | 848 | ||
679 | xid = GetXid(); | 849 | xid = GetXid(); |
680 | 850 | ||
681 | if (inode) | 851 | /* Unlink can be called from rename so we can not take the |
682 | cifs_sb = CIFS_SB(inode->i_sb); | 852 | * sb->s_vfs_rename_mutex here */ |
683 | else | 853 | full_path = build_path_from_dentry(dentry); |
684 | cifs_sb = CIFS_SB(direntry->d_sb); | ||
685 | pTcon = cifs_sb->tcon; | ||
686 | |||
687 | /* Unlink can be called from rename so we can not grab the sem here | ||
688 | since we deadlock otherwise */ | ||
689 | /* mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);*/ | ||
690 | full_path = build_path_from_dentry(direntry); | ||
691 | /* mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);*/ | ||
692 | if (full_path == NULL) { | 854 | if (full_path == NULL) { |
693 | FreeXid(xid); | 855 | FreeXid(xid); |
694 | return -ENOMEM; | 856 | return -ENOMEM; |
695 | } | 857 | } |
696 | 858 | ||
697 | if ((pTcon->ses->capabilities & CAP_UNIX) && | 859 | if ((tcon->ses->capabilities & CAP_UNIX) && |
698 | (CIFS_UNIX_POSIX_PATH_OPS_CAP & | 860 | (CIFS_UNIX_POSIX_PATH_OPS_CAP & |
699 | le64_to_cpu(pTcon->fsUnixInfo.Capability))) { | 861 | le64_to_cpu(tcon->fsUnixInfo.Capability))) { |
700 | rc = CIFSPOSIXDelFile(xid, pTcon, full_path, | 862 | rc = CIFSPOSIXDelFile(xid, tcon, full_path, |
701 | SMB_POSIX_UNLINK_FILE_TARGET, cifs_sb->local_nls, | 863 | SMB_POSIX_UNLINK_FILE_TARGET, cifs_sb->local_nls, |
702 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 864 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
703 | cFYI(1, ("posix del rc %d", rc)); | 865 | cFYI(1, ("posix del rc %d", rc)); |
@@ -705,125 +867,60 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) | |||
705 | goto psx_del_no_retry; | 867 | goto psx_del_no_retry; |
706 | } | 868 | } |
707 | 869 | ||
708 | rc = CIFSSMBDelFile(xid, pTcon, full_path, cifs_sb->local_nls, | 870 | retry_std_delete: |
871 | rc = CIFSSMBDelFile(xid, tcon, full_path, cifs_sb->local_nls, | ||
709 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 872 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
873 | |||
710 | psx_del_no_retry: | 874 | psx_del_no_retry: |
711 | if (!rc) { | 875 | if (!rc) { |
712 | if (direntry->d_inode) | 876 | if (inode) |
713 | drop_nlink(direntry->d_inode); | 877 | drop_nlink(inode); |
714 | } else if (rc == -ENOENT) { | 878 | } else if (rc == -ENOENT) { |
715 | d_drop(direntry); | 879 | d_drop(dentry); |
716 | } else if (rc == -ETXTBSY) { | 880 | } else if (rc == -ETXTBSY) { |
717 | int oplock = 0; | 881 | rc = cifs_rename_pending_delete(full_path, inode, xid); |
718 | __u16 netfid; | 882 | if (rc == 0) |
719 | 883 | drop_nlink(inode); | |
720 | rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, DELETE, | 884 | } else if (rc == -EACCES && dosattr == 0) { |
721 | CREATE_NOT_DIR | CREATE_DELETE_ON_CLOSE, | 885 | attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); |
722 | &netfid, &oplock, NULL, cifs_sb->local_nls, | 886 | if (attrs == NULL) { |
723 | cifs_sb->mnt_cifs_flags & | 887 | rc = -ENOMEM; |
724 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 888 | goto out_reval; |
725 | if (rc == 0) { | ||
726 | CIFSSMBRenameOpenFile(xid, pTcon, netfid, NULL, | ||
727 | cifs_sb->local_nls, | ||
728 | cifs_sb->mnt_cifs_flags & | ||
729 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
730 | CIFSSMBClose(xid, pTcon, netfid); | ||
731 | if (direntry->d_inode) | ||
732 | drop_nlink(direntry->d_inode); | ||
733 | } | 889 | } |
734 | } else if (rc == -EACCES) { | ||
735 | /* try only if r/o attribute set in local lookup data? */ | ||
736 | pinfo_buf = kzalloc(sizeof(FILE_BASIC_INFO), GFP_KERNEL); | ||
737 | if (pinfo_buf) { | ||
738 | /* ATTRS set to normal clears r/o bit */ | ||
739 | pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL); | ||
740 | if (!(pTcon->ses->flags & CIFS_SES_NT4)) | ||
741 | rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, | ||
742 | pinfo_buf, | ||
743 | cifs_sb->local_nls, | ||
744 | cifs_sb->mnt_cifs_flags & | ||
745 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
746 | else | ||
747 | rc = -EOPNOTSUPP; | ||
748 | 890 | ||
749 | if (rc == -EOPNOTSUPP) { | 891 | /* try to reset dos attributes */ |
750 | int oplock = 0; | 892 | origattr = cifsInode->cifsAttrs; |
751 | __u16 netfid; | 893 | if (origattr == 0) |
752 | /* rc = CIFSSMBSetAttrLegacy(xid, pTcon, | 894 | origattr |= ATTR_NORMAL; |
753 | full_path, | 895 | dosattr = origattr & ~ATTR_READONLY; |
754 | (__u16)ATTR_NORMAL, | 896 | if (dosattr == 0) |
755 | cifs_sb->local_nls); | 897 | dosattr |= ATTR_NORMAL; |
756 | For some strange reason it seems that NT4 eats the | 898 | dosattr |= ATTR_HIDDEN; |
757 | old setattr call without actually setting the | 899 | |
758 | attributes so on to the third attempted workaround | 900 | rc = cifs_set_file_info(inode, attrs, xid, full_path, dosattr); |
759 | */ | 901 | if (rc != 0) |
760 | 902 | goto out_reval; | |
761 | /* BB could scan to see if we already have it open | 903 | |
762 | and pass in pid of opener to function */ | 904 | goto retry_std_delete; |
763 | rc = CIFSSMBOpen(xid, pTcon, full_path, | ||
764 | FILE_OPEN, SYNCHRONIZE | | ||
765 | FILE_WRITE_ATTRIBUTES, 0, | ||
766 | &netfid, &oplock, NULL, | ||
767 | cifs_sb->local_nls, | ||
768 | cifs_sb->mnt_cifs_flags & | ||
769 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
770 | if (rc == 0) { | ||
771 | rc = CIFSSMBSetFileInfo(xid, pTcon, | ||
772 | pinfo_buf, | ||
773 | netfid, | ||
774 | current->tgid); | ||
775 | CIFSSMBClose(xid, pTcon, netfid); | ||
776 | } | ||
777 | } | ||
778 | kfree(pinfo_buf); | ||
779 | } | ||
780 | if (rc == 0) { | ||
781 | rc = CIFSSMBDelFile(xid, pTcon, full_path, | ||
782 | cifs_sb->local_nls, | ||
783 | cifs_sb->mnt_cifs_flags & | ||
784 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
785 | if (!rc) { | ||
786 | if (direntry->d_inode) | ||
787 | drop_nlink(direntry->d_inode); | ||
788 | } else if (rc == -ETXTBSY) { | ||
789 | int oplock = 0; | ||
790 | __u16 netfid; | ||
791 | |||
792 | rc = CIFSSMBOpen(xid, pTcon, full_path, | ||
793 | FILE_OPEN, DELETE, | ||
794 | CREATE_NOT_DIR | | ||
795 | CREATE_DELETE_ON_CLOSE, | ||
796 | &netfid, &oplock, NULL, | ||
797 | cifs_sb->local_nls, | ||
798 | cifs_sb->mnt_cifs_flags & | ||
799 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
800 | if (rc == 0) { | ||
801 | CIFSSMBRenameOpenFile(xid, pTcon, | ||
802 | netfid, NULL, | ||
803 | cifs_sb->local_nls, | ||
804 | cifs_sb->mnt_cifs_flags & | ||
805 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
806 | CIFSSMBClose(xid, pTcon, netfid); | ||
807 | if (direntry->d_inode) | ||
808 | drop_nlink(direntry->d_inode); | ||
809 | } | ||
810 | /* BB if rc = -ETXTBUSY goto the rename logic BB */ | ||
811 | } | ||
812 | } | ||
813 | } | ||
814 | if (direntry->d_inode) { | ||
815 | cifsInode = CIFS_I(direntry->d_inode); | ||
816 | cifsInode->time = 0; /* will force revalidate to get info | ||
817 | when needed */ | ||
818 | direntry->d_inode->i_ctime = current_fs_time(inode->i_sb); | ||
819 | } | 905 | } |
906 | |||
907 | /* undo the setattr if we errored out and it's needed */ | ||
908 | if (rc != 0 && dosattr != 0) | ||
909 | cifs_set_file_info(inode, attrs, xid, full_path, origattr); | ||
910 | |||
911 | out_reval: | ||
820 | if (inode) { | 912 | if (inode) { |
821 | inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb); | ||
822 | cifsInode = CIFS_I(inode); | 913 | cifsInode = CIFS_I(inode); |
823 | cifsInode->time = 0; /* force revalidate of dir as well */ | 914 | cifsInode->time = 0; /* will force revalidate to get info |
915 | when needed */ | ||
916 | inode->i_ctime = current_fs_time(sb); | ||
824 | } | 917 | } |
918 | dir->i_ctime = dir->i_mtime = current_fs_time(sb); | ||
919 | cifsInode = CIFS_I(dir); | ||
920 | CIFS_I(dir)->time = 0; /* force revalidate of dir as well */ | ||
825 | 921 | ||
826 | kfree(full_path); | 922 | kfree(full_path); |
923 | kfree(attrs); | ||
827 | FreeXid(xid); | 924 | FreeXid(xid); |
828 | return rc; | 925 | return rc; |
829 | } | 926 | } |
@@ -868,7 +965,7 @@ static void posix_fill_in_inode(struct inode *tmp_inode, | |||
868 | 965 | ||
869 | int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) | 966 | int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) |
870 | { | 967 | { |
871 | int rc = 0; | 968 | int rc = 0, tmprc; |
872 | int xid; | 969 | int xid; |
873 | struct cifs_sb_info *cifs_sb; | 970 | struct cifs_sb_info *cifs_sb; |
874 | struct cifsTconInfo *pTcon; | 971 | struct cifsTconInfo *pTcon; |
@@ -930,6 +1027,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) | |||
930 | kfree(pInfo); | 1027 | kfree(pInfo); |
931 | goto mkdir_get_info; | 1028 | goto mkdir_get_info; |
932 | } | 1029 | } |
1030 | |||
933 | /* Is an i_ino of zero legal? */ | 1031 | /* Is an i_ino of zero legal? */ |
934 | /* Are there sanity checks we can use to ensure that | 1032 | /* Are there sanity checks we can use to ensure that |
935 | the server is really filling in that field? */ | 1033 | the server is really filling in that field? */ |
@@ -1018,12 +1116,20 @@ mkdir_get_info: | |||
1018 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && | 1116 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && |
1019 | (mode & S_IWUGO) == 0) { | 1117 | (mode & S_IWUGO) == 0) { |
1020 | FILE_BASIC_INFO pInfo; | 1118 | FILE_BASIC_INFO pInfo; |
1119 | struct cifsInodeInfo *cifsInode; | ||
1120 | u32 dosattrs; | ||
1121 | |||
1021 | memset(&pInfo, 0, sizeof(pInfo)); | 1122 | memset(&pInfo, 0, sizeof(pInfo)); |
1022 | pInfo.Attributes = cpu_to_le32(ATTR_READONLY); | 1123 | cifsInode = CIFS_I(newinode); |
1023 | CIFSSMBSetPathInfo(xid, pTcon, full_path, | 1124 | dosattrs = cifsInode->cifsAttrs|ATTR_READONLY; |
1024 | &pInfo, cifs_sb->local_nls, | 1125 | pInfo.Attributes = cpu_to_le32(dosattrs); |
1126 | tmprc = CIFSSMBSetPathInfo(xid, pTcon, | ||
1127 | full_path, &pInfo, | ||
1128 | cifs_sb->local_nls, | ||
1025 | cifs_sb->mnt_cifs_flags & | 1129 | cifs_sb->mnt_cifs_flags & |
1026 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1130 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
1131 | if (tmprc == 0) | ||
1132 | cifsInode->cifsAttrs = dosattrs; | ||
1027 | } | 1133 | } |
1028 | if (direntry->d_inode) { | 1134 | if (direntry->d_inode) { |
1029 | if (cifs_sb->mnt_cifs_flags & | 1135 | if (cifs_sb->mnt_cifs_flags & |
@@ -1095,117 +1201,141 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) | |||
1095 | return rc; | 1201 | return rc; |
1096 | } | 1202 | } |
1097 | 1203 | ||
1204 | static int | ||
1205 | cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath, | ||
1206 | struct dentry *to_dentry, const char *toPath) | ||
1207 | { | ||
1208 | struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb); | ||
1209 | struct cifsTconInfo *pTcon = cifs_sb->tcon; | ||
1210 | __u16 srcfid; | ||
1211 | int oplock, rc; | ||
1212 | |||
1213 | /* try path-based rename first */ | ||
1214 | rc = CIFSSMBRename(xid, pTcon, fromPath, toPath, cifs_sb->local_nls, | ||
1215 | cifs_sb->mnt_cifs_flags & | ||
1216 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1217 | |||
1218 | /* | ||
1219 | * don't bother with rename by filehandle unless file is busy and | ||
1220 | * source Note that cross directory moves do not work with | ||
1221 | * rename by filehandle to various Windows servers. | ||
1222 | */ | ||
1223 | if (rc == 0 || rc != -ETXTBSY) | ||
1224 | return rc; | ||
1225 | |||
1226 | /* open the file to be renamed -- we need DELETE perms */ | ||
1227 | rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE, | ||
1228 | CREATE_NOT_DIR, &srcfid, &oplock, NULL, | ||
1229 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | ||
1230 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1231 | |||
1232 | if (rc == 0) { | ||
1233 | rc = CIFSSMBRenameOpenFile(xid, pTcon, srcfid, | ||
1234 | (const char *) to_dentry->d_name.name, | ||
1235 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | ||
1236 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1237 | |||
1238 | CIFSSMBClose(xid, pTcon, srcfid); | ||
1239 | } | ||
1240 | |||
1241 | return rc; | ||
1242 | } | ||
1243 | |||
1098 | int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, | 1244 | int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, |
1099 | struct inode *target_inode, struct dentry *target_direntry) | 1245 | struct inode *target_inode, struct dentry *target_direntry) |
1100 | { | 1246 | { |
1101 | char *fromName; | 1247 | char *fromName = NULL; |
1102 | char *toName; | 1248 | char *toName = NULL; |
1103 | struct cifs_sb_info *cifs_sb_source; | 1249 | struct cifs_sb_info *cifs_sb_source; |
1104 | struct cifs_sb_info *cifs_sb_target; | 1250 | struct cifs_sb_info *cifs_sb_target; |
1105 | struct cifsTconInfo *pTcon; | 1251 | struct cifsTconInfo *pTcon; |
1252 | FILE_UNIX_BASIC_INFO *info_buf_source = NULL; | ||
1253 | FILE_UNIX_BASIC_INFO *info_buf_target; | ||
1106 | int xid; | 1254 | int xid; |
1107 | int rc = 0; | 1255 | int rc; |
1108 | |||
1109 | xid = GetXid(); | ||
1110 | 1256 | ||
1111 | cifs_sb_target = CIFS_SB(target_inode->i_sb); | 1257 | cifs_sb_target = CIFS_SB(target_inode->i_sb); |
1112 | cifs_sb_source = CIFS_SB(source_inode->i_sb); | 1258 | cifs_sb_source = CIFS_SB(source_inode->i_sb); |
1113 | pTcon = cifs_sb_source->tcon; | 1259 | pTcon = cifs_sb_source->tcon; |
1114 | 1260 | ||
1261 | xid = GetXid(); | ||
1262 | |||
1263 | /* | ||
1264 | * BB: this might be allowed if same server, but different share. | ||
1265 | * Consider adding support for this | ||
1266 | */ | ||
1115 | if (pTcon != cifs_sb_target->tcon) { | 1267 | if (pTcon != cifs_sb_target->tcon) { |
1116 | FreeXid(xid); | 1268 | rc = -EXDEV; |
1117 | return -EXDEV; /* BB actually could be allowed if same server, | 1269 | goto cifs_rename_exit; |
1118 | but different share. | ||
1119 | Might eventually add support for this */ | ||
1120 | } | 1270 | } |
1121 | 1271 | ||
1122 | /* we already have the rename sem so we do not need to grab it again | 1272 | /* |
1123 | here to protect the path integrity */ | 1273 | * we already have the rename sem so we do not need to |
1274 | * grab it again here to protect the path integrity | ||
1275 | */ | ||
1124 | fromName = build_path_from_dentry(source_direntry); | 1276 | fromName = build_path_from_dentry(source_direntry); |
1277 | if (fromName == NULL) { | ||
1278 | rc = -ENOMEM; | ||
1279 | goto cifs_rename_exit; | ||
1280 | } | ||
1281 | |||
1125 | toName = build_path_from_dentry(target_direntry); | 1282 | toName = build_path_from_dentry(target_direntry); |
1126 | if ((fromName == NULL) || (toName == NULL)) { | 1283 | if (toName == NULL) { |
1127 | rc = -ENOMEM; | 1284 | rc = -ENOMEM; |
1128 | goto cifs_rename_exit; | 1285 | goto cifs_rename_exit; |
1129 | } | 1286 | } |
1130 | 1287 | ||
1131 | rc = CIFSSMBRename(xid, pTcon, fromName, toName, | 1288 | rc = cifs_do_rename(xid, source_direntry, fromName, |
1132 | cifs_sb_source->local_nls, | 1289 | target_direntry, toName); |
1133 | cifs_sb_source->mnt_cifs_flags & | 1290 | |
1134 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1135 | if (rc == -EEXIST) { | 1291 | if (rc == -EEXIST) { |
1136 | /* check if they are the same file because rename of hardlinked | 1292 | if (pTcon->unix_ext) { |
1137 | files is a noop */ | 1293 | /* |
1138 | FILE_UNIX_BASIC_INFO *info_buf_source; | 1294 | * Are src and dst hardlinks of same inode? We can |
1139 | FILE_UNIX_BASIC_INFO *info_buf_target; | 1295 | * only tell with unix extensions enabled |
1140 | 1296 | */ | |
1141 | info_buf_source = | 1297 | info_buf_source = |
1142 | kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); | 1298 | kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), |
1143 | if (info_buf_source != NULL) { | 1299 | GFP_KERNEL); |
1300 | if (info_buf_source == NULL) | ||
1301 | goto unlink_target; | ||
1302 | |||
1144 | info_buf_target = info_buf_source + 1; | 1303 | info_buf_target = info_buf_source + 1; |
1145 | if (pTcon->unix_ext) | 1304 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName, |
1146 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName, | 1305 | info_buf_source, |
1147 | info_buf_source, | 1306 | cifs_sb_source->local_nls, |
1148 | cifs_sb_source->local_nls, | 1307 | cifs_sb_source->mnt_cifs_flags & |
1149 | cifs_sb_source->mnt_cifs_flags & | ||
1150 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1308 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
1151 | /* else rc is still EEXIST so will fall through to | 1309 | if (rc != 0) |
1152 | unlink the target and retry rename */ | 1310 | goto unlink_target; |
1153 | if (rc == 0) { | 1311 | |
1154 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, toName, | 1312 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, |
1155 | info_buf_target, | 1313 | toName, info_buf_target, |
1156 | cifs_sb_target->local_nls, | 1314 | cifs_sb_target->local_nls, |
1157 | /* remap based on source sb */ | 1315 | /* remap based on source sb */ |
1158 | cifs_sb_source->mnt_cifs_flags & | 1316 | cifs_sb_source->mnt_cifs_flags & |
1159 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1160 | } | ||
1161 | if ((rc == 0) && | ||
1162 | (info_buf_source->UniqueId == | ||
1163 | info_buf_target->UniqueId)) { | ||
1164 | /* do not rename since the files are hardlinked which | ||
1165 | is a noop */ | ||
1166 | } else { | ||
1167 | /* we either can not tell the files are hardlinked | ||
1168 | (as with Windows servers) or files are not | ||
1169 | hardlinked so delete the target manually before | ||
1170 | renaming to follow POSIX rather than Windows | ||
1171 | semantics */ | ||
1172 | cifs_unlink(target_inode, target_direntry); | ||
1173 | rc = CIFSSMBRename(xid, pTcon, fromName, | ||
1174 | toName, | ||
1175 | cifs_sb_source->local_nls, | ||
1176 | cifs_sb_source->mnt_cifs_flags | ||
1177 | & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1178 | } | ||
1179 | kfree(info_buf_source); | ||
1180 | } /* if we can not get memory just leave rc as EEXIST */ | ||
1181 | } | ||
1182 | |||
1183 | if (rc) | ||
1184 | cFYI(1, ("rename rc %d", rc)); | ||
1185 | |||
1186 | if ((rc == -EIO) || (rc == -EEXIST)) { | ||
1187 | int oplock = 0; | ||
1188 | __u16 netfid; | ||
1189 | |||
1190 | /* BB FIXME Is Generic Read correct for rename? */ | ||
1191 | /* if renaming directory - we should not say CREATE_NOT_DIR, | ||
1192 | need to test renaming open directory, also GENERIC_READ | ||
1193 | might not right be right access to request */ | ||
1194 | rc = CIFSSMBOpen(xid, pTcon, fromName, FILE_OPEN, GENERIC_READ, | ||
1195 | CREATE_NOT_DIR, &netfid, &oplock, NULL, | ||
1196 | cifs_sb_source->local_nls, | ||
1197 | cifs_sb_source->mnt_cifs_flags & | ||
1198 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1199 | if (rc == 0) { | ||
1200 | rc = CIFSSMBRenameOpenFile(xid, pTcon, netfid, toName, | ||
1201 | cifs_sb_source->local_nls, | ||
1202 | cifs_sb_source->mnt_cifs_flags & | ||
1203 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1317 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
1204 | CIFSSMBClose(xid, pTcon, netfid); | 1318 | |
1205 | } | 1319 | if (rc == 0 && (info_buf_source->UniqueId == |
1320 | info_buf_target->UniqueId)) | ||
1321 | /* same file, POSIX says that this is a noop */ | ||
1322 | goto cifs_rename_exit; | ||
1323 | } /* else ... BB we could add the same check for Windows by | ||
1324 | checking the UniqueId via FILE_INTERNAL_INFO */ | ||
1325 | unlink_target: | ||
1326 | /* | ||
1327 | * we either can not tell the files are hardlinked (as with | ||
1328 | * Windows servers) or files are not hardlinked. Delete the | ||
1329 | * target manually before renaming to follow POSIX rather than | ||
1330 | * Windows semantics | ||
1331 | */ | ||
1332 | cifs_unlink(target_inode, target_direntry); | ||
1333 | rc = cifs_do_rename(xid, source_direntry, fromName, | ||
1334 | target_direntry, toName); | ||
1206 | } | 1335 | } |
1207 | 1336 | ||
1208 | cifs_rename_exit: | 1337 | cifs_rename_exit: |
1338 | kfree(info_buf_source); | ||
1209 | kfree(fromName); | 1339 | kfree(fromName); |
1210 | kfree(toName); | 1340 | kfree(toName); |
1211 | FreeXid(xid); | 1341 | FreeXid(xid); |
@@ -1506,101 +1636,6 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, | |||
1506 | } | 1636 | } |
1507 | 1637 | ||
1508 | static int | 1638 | static int |
1509 | cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid, | ||
1510 | char *full_path, __u32 dosattr) | ||
1511 | { | ||
1512 | int rc; | ||
1513 | int oplock = 0; | ||
1514 | __u16 netfid; | ||
1515 | __u32 netpid; | ||
1516 | bool set_time = false; | ||
1517 | struct cifsFileInfo *open_file; | ||
1518 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | ||
1519 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
1520 | struct cifsTconInfo *pTcon = cifs_sb->tcon; | ||
1521 | FILE_BASIC_INFO info_buf; | ||
1522 | |||
1523 | if (attrs->ia_valid & ATTR_ATIME) { | ||
1524 | set_time = true; | ||
1525 | info_buf.LastAccessTime = | ||
1526 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); | ||
1527 | } else | ||
1528 | info_buf.LastAccessTime = 0; | ||
1529 | |||
1530 | if (attrs->ia_valid & ATTR_MTIME) { | ||
1531 | set_time = true; | ||
1532 | info_buf.LastWriteTime = | ||
1533 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime)); | ||
1534 | } else | ||
1535 | info_buf.LastWriteTime = 0; | ||
1536 | |||
1537 | /* | ||
1538 | * Samba throws this field away, but windows may actually use it. | ||
1539 | * Do not set ctime unless other time stamps are changed explicitly | ||
1540 | * (i.e. by utimes()) since we would then have a mix of client and | ||
1541 | * server times. | ||
1542 | */ | ||
1543 | if (set_time && (attrs->ia_valid & ATTR_CTIME)) { | ||
1544 | cFYI(1, ("CIFS - CTIME changed")); | ||
1545 | info_buf.ChangeTime = | ||
1546 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); | ||
1547 | } else | ||
1548 | info_buf.ChangeTime = 0; | ||
1549 | |||
1550 | info_buf.CreationTime = 0; /* don't change */ | ||
1551 | info_buf.Attributes = cpu_to_le32(dosattr); | ||
1552 | |||
1553 | /* | ||
1554 | * If the file is already open for write, just use that fileid | ||
1555 | */ | ||
1556 | open_file = find_writable_file(cifsInode); | ||
1557 | if (open_file) { | ||
1558 | netfid = open_file->netfid; | ||
1559 | netpid = open_file->pid; | ||
1560 | goto set_via_filehandle; | ||
1561 | } | ||
1562 | |||
1563 | /* | ||
1564 | * NT4 apparently returns success on this call, but it doesn't | ||
1565 | * really work. | ||
1566 | */ | ||
1567 | if (!(pTcon->ses->flags & CIFS_SES_NT4)) { | ||
1568 | rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, | ||
1569 | &info_buf, cifs_sb->local_nls, | ||
1570 | cifs_sb->mnt_cifs_flags & | ||
1571 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1572 | if (rc != -EOPNOTSUPP && rc != -EINVAL) | ||
1573 | goto out; | ||
1574 | } | ||
1575 | |||
1576 | cFYI(1, ("calling SetFileInfo since SetPathInfo for " | ||
1577 | "times not supported by this server")); | ||
1578 | rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, | ||
1579 | SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, | ||
1580 | CREATE_NOT_DIR, &netfid, &oplock, | ||
1581 | NULL, cifs_sb->local_nls, | ||
1582 | cifs_sb->mnt_cifs_flags & | ||
1583 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
1584 | |||
1585 | if (rc != 0) { | ||
1586 | if (rc == -EIO) | ||
1587 | rc = -EINVAL; | ||
1588 | goto out; | ||
1589 | } | ||
1590 | |||
1591 | netpid = current->tgid; | ||
1592 | |||
1593 | set_via_filehandle: | ||
1594 | rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid); | ||
1595 | if (open_file == NULL) | ||
1596 | CIFSSMBClose(xid, pTcon, netfid); | ||
1597 | else | ||
1598 | atomic_dec(&open_file->wrtPending); | ||
1599 | out: | ||
1600 | return rc; | ||
1601 | } | ||
1602 | |||
1603 | static int | ||
1604 | cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) | 1639 | cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) |
1605 | { | 1640 | { |
1606 | int rc; | 1641 | int rc; |
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 4b17f8fe3157..88786ba02d27 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c | |||
@@ -150,8 +150,7 @@ cifs_buf_get(void) | |||
150 | but it may be more efficient to always alloc same size | 150 | but it may be more efficient to always alloc same size |
151 | albeit slightly larger than necessary and maxbuffersize | 151 | albeit slightly larger than necessary and maxbuffersize |
152 | defaults to this and can not be bigger */ | 152 | defaults to this and can not be bigger */ |
153 | ret_buf = (struct smb_hdr *) mempool_alloc(cifs_req_poolp, | 153 | ret_buf = mempool_alloc(cifs_req_poolp, GFP_NOFS); |
154 | GFP_KERNEL | GFP_NOFS); | ||
155 | 154 | ||
156 | /* clear the first few header bytes */ | 155 | /* clear the first few header bytes */ |
157 | /* for most paths, more is cleared in header_assemble */ | 156 | /* for most paths, more is cleared in header_assemble */ |
@@ -188,8 +187,7 @@ cifs_small_buf_get(void) | |||
188 | but it may be more efficient to always alloc same size | 187 | but it may be more efficient to always alloc same size |
189 | albeit slightly larger than necessary and maxbuffersize | 188 | albeit slightly larger than necessary and maxbuffersize |
190 | defaults to this and can not be bigger */ | 189 | defaults to this and can not be bigger */ |
191 | ret_buf = (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, | 190 | ret_buf = mempool_alloc(cifs_sm_req_poolp, GFP_NOFS); |
192 | GFP_KERNEL | GFP_NOFS); | ||
193 | if (ret_buf) { | 191 | if (ret_buf) { |
194 | /* No need to clear memory here, cleared in header assemble */ | 192 | /* No need to clear memory here, cleared in header assemble */ |
195 | /* memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/ | 193 | /* memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/ |
@@ -313,8 +311,6 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , | |||
313 | buffer->Flags2 = SMBFLG2_KNOWS_LONG_NAMES; | 311 | buffer->Flags2 = SMBFLG2_KNOWS_LONG_NAMES; |
314 | buffer->Pid = cpu_to_le16((__u16)current->tgid); | 312 | buffer->Pid = cpu_to_le16((__u16)current->tgid); |
315 | buffer->PidHigh = cpu_to_le16((__u16)(current->tgid >> 16)); | 313 | buffer->PidHigh = cpu_to_le16((__u16)(current->tgid >> 16)); |
316 | spin_lock(&GlobalMid_Lock); | ||
317 | spin_unlock(&GlobalMid_Lock); | ||
318 | if (treeCon) { | 314 | if (treeCon) { |
319 | buffer->Tid = treeCon->tid; | 315 | buffer->Tid = treeCon->tid; |
320 | if (treeCon->ses) { | 316 | if (treeCon->ses) { |
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 5f40ed3473f5..765adf12d54f 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
@@ -640,6 +640,70 @@ static int is_dir_changed(struct file *file) | |||
640 | 640 | ||
641 | } | 641 | } |
642 | 642 | ||
643 | static int cifs_save_resume_key(const char *current_entry, | ||
644 | struct cifsFileInfo *cifsFile) | ||
645 | { | ||
646 | int rc = 0; | ||
647 | unsigned int len = 0; | ||
648 | __u16 level; | ||
649 | char *filename; | ||
650 | |||
651 | if ((cifsFile == NULL) || (current_entry == NULL)) | ||
652 | return -EINVAL; | ||
653 | |||
654 | level = cifsFile->srch_inf.info_level; | ||
655 | |||
656 | if (level == SMB_FIND_FILE_UNIX) { | ||
657 | FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry; | ||
658 | |||
659 | filename = &pFindData->FileName[0]; | ||
660 | if (cifsFile->srch_inf.unicode) { | ||
661 | len = cifs_unicode_bytelen(filename); | ||
662 | } else { | ||
663 | /* BB should we make this strnlen of PATH_MAX? */ | ||
664 | len = strnlen(filename, PATH_MAX); | ||
665 | } | ||
666 | cifsFile->srch_inf.resume_key = pFindData->ResumeKey; | ||
667 | } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { | ||
668 | FILE_DIRECTORY_INFO *pFindData = | ||
669 | (FILE_DIRECTORY_INFO *)current_entry; | ||
670 | filename = &pFindData->FileName[0]; | ||
671 | len = le32_to_cpu(pFindData->FileNameLength); | ||
672 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
673 | } else if (level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) { | ||
674 | FILE_FULL_DIRECTORY_INFO *pFindData = | ||
675 | (FILE_FULL_DIRECTORY_INFO *)current_entry; | ||
676 | filename = &pFindData->FileName[0]; | ||
677 | len = le32_to_cpu(pFindData->FileNameLength); | ||
678 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
679 | } else if (level == SMB_FIND_FILE_ID_FULL_DIR_INFO) { | ||
680 | SEARCH_ID_FULL_DIR_INFO *pFindData = | ||
681 | (SEARCH_ID_FULL_DIR_INFO *)current_entry; | ||
682 | filename = &pFindData->FileName[0]; | ||
683 | len = le32_to_cpu(pFindData->FileNameLength); | ||
684 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
685 | } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { | ||
686 | FILE_BOTH_DIRECTORY_INFO *pFindData = | ||
687 | (FILE_BOTH_DIRECTORY_INFO *)current_entry; | ||
688 | filename = &pFindData->FileName[0]; | ||
689 | len = le32_to_cpu(pFindData->FileNameLength); | ||
690 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
691 | } else if (level == SMB_FIND_FILE_INFO_STANDARD) { | ||
692 | FIND_FILE_STANDARD_INFO *pFindData = | ||
693 | (FIND_FILE_STANDARD_INFO *)current_entry; | ||
694 | filename = &pFindData->FileName[0]; | ||
695 | /* one byte length, no name conversion */ | ||
696 | len = (unsigned int)pFindData->FileNameLength; | ||
697 | cifsFile->srch_inf.resume_key = pFindData->ResumeKey; | ||
698 | } else { | ||
699 | cFYI(1, ("Unknown findfirst level %d", level)); | ||
700 | return -EINVAL; | ||
701 | } | ||
702 | cifsFile->srch_inf.resume_name_len = len; | ||
703 | cifsFile->srch_inf.presume_name = filename; | ||
704 | return rc; | ||
705 | } | ||
706 | |||
643 | /* find the corresponding entry in the search */ | 707 | /* find the corresponding entry in the search */ |
644 | /* Note that the SMB server returns search entries for . and .. which | 708 | /* Note that the SMB server returns search entries for . and .. which |
645 | complicates logic here if we choose to parse for them and we do not | 709 | complicates logic here if we choose to parse for them and we do not |
@@ -703,6 +767,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, | |||
703 | while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && | 767 | while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && |
704 | (rc == 0) && !cifsFile->srch_inf.endOfSearch) { | 768 | (rc == 0) && !cifsFile->srch_inf.endOfSearch) { |
705 | cFYI(1, ("calling findnext2")); | 769 | cFYI(1, ("calling findnext2")); |
770 | cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); | ||
706 | rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, | 771 | rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, |
707 | &cifsFile->srch_inf); | 772 | &cifsFile->srch_inf); |
708 | if (rc) | 773 | if (rc) |
@@ -919,69 +984,6 @@ static int cifs_filldir(char *pfindEntry, struct file *file, | |||
919 | return rc; | 984 | return rc; |
920 | } | 985 | } |
921 | 986 | ||
922 | static int cifs_save_resume_key(const char *current_entry, | ||
923 | struct cifsFileInfo *cifsFile) | ||
924 | { | ||
925 | int rc = 0; | ||
926 | unsigned int len = 0; | ||
927 | __u16 level; | ||
928 | char *filename; | ||
929 | |||
930 | if ((cifsFile == NULL) || (current_entry == NULL)) | ||
931 | return -EINVAL; | ||
932 | |||
933 | level = cifsFile->srch_inf.info_level; | ||
934 | |||
935 | if (level == SMB_FIND_FILE_UNIX) { | ||
936 | FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry; | ||
937 | |||
938 | filename = &pFindData->FileName[0]; | ||
939 | if (cifsFile->srch_inf.unicode) { | ||
940 | len = cifs_unicode_bytelen(filename); | ||
941 | } else { | ||
942 | /* BB should we make this strnlen of PATH_MAX? */ | ||
943 | len = strnlen(filename, PATH_MAX); | ||
944 | } | ||
945 | cifsFile->srch_inf.resume_key = pFindData->ResumeKey; | ||
946 | } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { | ||
947 | FILE_DIRECTORY_INFO *pFindData = | ||
948 | (FILE_DIRECTORY_INFO *)current_entry; | ||
949 | filename = &pFindData->FileName[0]; | ||
950 | len = le32_to_cpu(pFindData->FileNameLength); | ||
951 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
952 | } else if (level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) { | ||
953 | FILE_FULL_DIRECTORY_INFO *pFindData = | ||
954 | (FILE_FULL_DIRECTORY_INFO *)current_entry; | ||
955 | filename = &pFindData->FileName[0]; | ||
956 | len = le32_to_cpu(pFindData->FileNameLength); | ||
957 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
958 | } else if (level == SMB_FIND_FILE_ID_FULL_DIR_INFO) { | ||
959 | SEARCH_ID_FULL_DIR_INFO *pFindData = | ||
960 | (SEARCH_ID_FULL_DIR_INFO *)current_entry; | ||
961 | filename = &pFindData->FileName[0]; | ||
962 | len = le32_to_cpu(pFindData->FileNameLength); | ||
963 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
964 | } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { | ||
965 | FILE_BOTH_DIRECTORY_INFO *pFindData = | ||
966 | (FILE_BOTH_DIRECTORY_INFO *)current_entry; | ||
967 | filename = &pFindData->FileName[0]; | ||
968 | len = le32_to_cpu(pFindData->FileNameLength); | ||
969 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
970 | } else if (level == SMB_FIND_FILE_INFO_STANDARD) { | ||
971 | FIND_FILE_STANDARD_INFO *pFindData = | ||
972 | (FIND_FILE_STANDARD_INFO *)current_entry; | ||
973 | filename = &pFindData->FileName[0]; | ||
974 | /* one byte length, no name conversion */ | ||
975 | len = (unsigned int)pFindData->FileNameLength; | ||
976 | cifsFile->srch_inf.resume_key = pFindData->ResumeKey; | ||
977 | } else { | ||
978 | cFYI(1, ("Unknown findfirst level %d", level)); | ||
979 | return -EINVAL; | ||
980 | } | ||
981 | cifsFile->srch_inf.resume_name_len = len; | ||
982 | cifsFile->srch_inf.presume_name = filename; | ||
983 | return rc; | ||
984 | } | ||
985 | 987 | ||
986 | int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | 988 | int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) |
987 | { | 989 | { |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index ed150efbe27c..2851d5da0c8c 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
@@ -409,6 +409,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
409 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | 409 | #ifdef CONFIG_CIFS_WEAK_PW_HASH |
410 | char lnm_session_key[CIFS_SESS_KEY_SIZE]; | 410 | char lnm_session_key[CIFS_SESS_KEY_SIZE]; |
411 | 411 | ||
412 | pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; | ||
413 | |||
412 | /* no capabilities flags in old lanman negotiation */ | 414 | /* no capabilities flags in old lanman negotiation */ |
413 | 415 | ||
414 | pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); | 416 | pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); |
@@ -505,7 +507,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
505 | unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); | 507 | unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); |
506 | } else | 508 | } else |
507 | ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); | 509 | ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); |
508 | } else if (type == Kerberos) { | 510 | } else if (type == Kerberos || type == MSKerberos) { |
509 | #ifdef CONFIG_CIFS_UPCALL | 511 | #ifdef CONFIG_CIFS_UPCALL |
510 | struct cifs_spnego_msg *msg; | 512 | struct cifs_spnego_msg *msg; |
511 | spnego_key = cifs_get_spnego_key(ses); | 513 | spnego_key = cifs_get_spnego_key(ses); |
@@ -516,6 +518,15 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
516 | } | 518 | } |
517 | 519 | ||
518 | msg = spnego_key->payload.data; | 520 | msg = spnego_key->payload.data; |
521 | /* check version field to make sure that cifs.upcall is | ||
522 | sending us a response in an expected form */ | ||
523 | if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) { | ||
524 | cERROR(1, ("incorrect version of cifs.upcall (expected" | ||
525 | " %d but got %d)", | ||
526 | CIFS_SPNEGO_UPCALL_VERSION, msg->version)); | ||
527 | rc = -EKEYREJECTED; | ||
528 | goto ssetup_exit; | ||
529 | } | ||
519 | /* bail out if key is too long */ | 530 | /* bail out if key is too long */ |
520 | if (msg->sesskey_len > | 531 | if (msg->sesskey_len > |
521 | sizeof(ses->server->mac_signing_key.data.krb5)) { | 532 | sizeof(ses->server->mac_signing_key.data.krb5)) { |
@@ -613,8 +624,10 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
613 | ses, nls_cp); | 624 | ses, nls_cp); |
614 | 625 | ||
615 | ssetup_exit: | 626 | ssetup_exit: |
616 | if (spnego_key) | 627 | if (spnego_key) { |
628 | key_revoke(spnego_key); | ||
617 | key_put(spnego_key); | 629 | key_put(spnego_key); |
630 | } | ||
618 | kfree(str_area); | 631 | kfree(str_area); |
619 | if (resp_buf_type == CIFS_SMALL_BUFFER) { | 632 | if (resp_buf_type == CIFS_SMALL_BUFFER) { |
620 | cFYI(1, ("ssetup freeing small buf %p", iov[0].iov_base)); | 633 | cFYI(1, ("ssetup freeing small buf %p", iov[0].iov_base)); |
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index e286db9f5ee2..bf0e6d8e382a 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
@@ -50,8 +50,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) | |||
50 | return NULL; | 50 | return NULL; |
51 | } | 51 | } |
52 | 52 | ||
53 | temp = (struct mid_q_entry *) mempool_alloc(cifs_mid_poolp, | 53 | temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); |
54 | GFP_KERNEL | GFP_NOFS); | ||
55 | if (temp == NULL) | 54 | if (temp == NULL) |
56 | return temp; | 55 | return temp; |
57 | else { | 56 | else { |
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index 0d9b80ec689c..cfd29da714d1 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c | |||
@@ -362,9 +362,8 @@ static int init_coda_psdev(void) | |||
362 | goto out_chrdev; | 362 | goto out_chrdev; |
363 | } | 363 | } |
364 | for (i = 0; i < MAX_CODADEVS; i++) | 364 | for (i = 0; i < MAX_CODADEVS; i++) |
365 | device_create_drvdata(coda_psdev_class, NULL, | 365 | device_create(coda_psdev_class, NULL, |
366 | MKDEV(CODA_PSDEV_MAJOR, i), | 366 | MKDEV(CODA_PSDEV_MAJOR, i), NULL, "cfs%d", i); |
367 | NULL, "cfs%d", i); | ||
368 | coda_sysctl_init(); | 367 | coda_sysctl_init(); |
369 | goto out; | 368 | goto out; |
370 | 369 | ||
diff --git a/fs/compat.c b/fs/compat.c index c9d1472e65c5..5f9ec449c799 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -137,6 +137,45 @@ asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval _ | |||
137 | return compat_sys_futimesat(AT_FDCWD, filename, t); | 137 | return compat_sys_futimesat(AT_FDCWD, filename, t); |
138 | } | 138 | } |
139 | 139 | ||
140 | static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) | ||
141 | { | ||
142 | compat_ino_t ino = stat->ino; | ||
143 | typeof(ubuf->st_uid) uid = 0; | ||
144 | typeof(ubuf->st_gid) gid = 0; | ||
145 | int err; | ||
146 | |||
147 | SET_UID(uid, stat->uid); | ||
148 | SET_GID(gid, stat->gid); | ||
149 | |||
150 | if ((u64) stat->size > MAX_NON_LFS || | ||
151 | !old_valid_dev(stat->dev) || | ||
152 | !old_valid_dev(stat->rdev)) | ||
153 | return -EOVERFLOW; | ||
154 | if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino) | ||
155 | return -EOVERFLOW; | ||
156 | |||
157 | if (clear_user(ubuf, sizeof(*ubuf))) | ||
158 | return -EFAULT; | ||
159 | |||
160 | err = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev); | ||
161 | err |= __put_user(ino, &ubuf->st_ino); | ||
162 | err |= __put_user(stat->mode, &ubuf->st_mode); | ||
163 | err |= __put_user(stat->nlink, &ubuf->st_nlink); | ||
164 | err |= __put_user(uid, &ubuf->st_uid); | ||
165 | err |= __put_user(gid, &ubuf->st_gid); | ||
166 | err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev); | ||
167 | err |= __put_user(stat->size, &ubuf->st_size); | ||
168 | err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime); | ||
169 | err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec); | ||
170 | err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime); | ||
171 | err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec); | ||
172 | err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime); | ||
173 | err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec); | ||
174 | err |= __put_user(stat->blksize, &ubuf->st_blksize); | ||
175 | err |= __put_user(stat->blocks, &ubuf->st_blocks); | ||
176 | return err; | ||
177 | } | ||
178 | |||
140 | asmlinkage long compat_sys_newstat(char __user * filename, | 179 | asmlinkage long compat_sys_newstat(char __user * filename, |
141 | struct compat_stat __user *statbuf) | 180 | struct compat_stat __user *statbuf) |
142 | { | 181 | { |
@@ -792,8 +831,10 @@ static int compat_fillonedir(void *__buf, const char *name, int namlen, | |||
792 | if (buf->result) | 831 | if (buf->result) |
793 | return -EINVAL; | 832 | return -EINVAL; |
794 | d_ino = ino; | 833 | d_ino = ino; |
795 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | 834 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
835 | buf->result = -EOVERFLOW; | ||
796 | return -EOVERFLOW; | 836 | return -EOVERFLOW; |
837 | } | ||
797 | buf->result++; | 838 | buf->result++; |
798 | dirent = buf->dirent; | 839 | dirent = buf->dirent; |
799 | if (!access_ok(VERIFY_WRITE, dirent, | 840 | if (!access_ok(VERIFY_WRITE, dirent, |
@@ -862,8 +903,10 @@ static int compat_filldir(void *__buf, const char *name, int namlen, | |||
862 | if (reclen > buf->count) | 903 | if (reclen > buf->count) |
863 | return -EINVAL; | 904 | return -EINVAL; |
864 | d_ino = ino; | 905 | d_ino = ino; |
865 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | 906 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
907 | buf->error = -EOVERFLOW; | ||
866 | return -EOVERFLOW; | 908 | return -EOVERFLOW; |
909 | } | ||
867 | dirent = buf->previous; | 910 | dirent = buf->previous; |
868 | if (dirent) { | 911 | if (dirent) { |
869 | if (__put_user(offset, &dirent->d_off)) | 912 | if (__put_user(offset, &dirent->d_off)) |
@@ -1235,7 +1278,7 @@ static int compat_count(compat_uptr_t __user *argv, int max) | |||
1235 | if (!p) | 1278 | if (!p) |
1236 | break; | 1279 | break; |
1237 | argv++; | 1280 | argv++; |
1238 | if(++i > max) | 1281 | if (i++ >= max) |
1239 | return -E2BIG; | 1282 | return -E2BIG; |
1240 | } | 1283 | } |
1241 | } | 1284 | } |
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 7a8db78a91d2..8e93341f3e82 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
@@ -1311,16 +1311,18 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1311 | * Ensure that no racing symlink() will make detach_prep() fail while | 1311 | * Ensure that no racing symlink() will make detach_prep() fail while |
1312 | * the new link is temporarily attached | 1312 | * the new link is temporarily attached |
1313 | */ | 1313 | */ |
1314 | mutex_lock(&configfs_symlink_mutex); | ||
1315 | spin_lock(&configfs_dirent_lock); | ||
1316 | do { | 1314 | do { |
1317 | struct mutex *wait_mutex; | 1315 | struct mutex *wait_mutex; |
1318 | 1316 | ||
1317 | mutex_lock(&configfs_symlink_mutex); | ||
1318 | spin_lock(&configfs_dirent_lock); | ||
1319 | ret = configfs_detach_prep(dentry, &wait_mutex); | 1319 | ret = configfs_detach_prep(dentry, &wait_mutex); |
1320 | if (ret) { | 1320 | if (ret) |
1321 | configfs_detach_rollback(dentry); | 1321 | configfs_detach_rollback(dentry); |
1322 | spin_unlock(&configfs_dirent_lock); | 1322 | spin_unlock(&configfs_dirent_lock); |
1323 | mutex_unlock(&configfs_symlink_mutex); | 1323 | mutex_unlock(&configfs_symlink_mutex); |
1324 | |||
1325 | if (ret) { | ||
1324 | if (ret != -EAGAIN) { | 1326 | if (ret != -EAGAIN) { |
1325 | config_item_put(parent_item); | 1327 | config_item_put(parent_item); |
1326 | return ret; | 1328 | return ret; |
@@ -1329,13 +1331,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1329 | /* Wait until the racing operation terminates */ | 1331 | /* Wait until the racing operation terminates */ |
1330 | mutex_lock(wait_mutex); | 1332 | mutex_lock(wait_mutex); |
1331 | mutex_unlock(wait_mutex); | 1333 | mutex_unlock(wait_mutex); |
1332 | |||
1333 | mutex_lock(&configfs_symlink_mutex); | ||
1334 | spin_lock(&configfs_dirent_lock); | ||
1335 | } | 1334 | } |
1336 | } while (ret == -EAGAIN); | 1335 | } while (ret == -EAGAIN); |
1337 | spin_unlock(&configfs_dirent_lock); | ||
1338 | mutex_unlock(&configfs_symlink_mutex); | ||
1339 | 1336 | ||
1340 | /* Get a working ref for the duration of this function */ | 1337 | /* Get a working ref for the duration of this function */ |
1341 | item = configfs_get_config_item(dentry); | 1338 | item = configfs_get_config_item(dentry); |
diff --git a/fs/dcache.c b/fs/dcache.c index 101663d15e9f..e7a1a99b7464 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -1236,7 +1236,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | |||
1236 | * If no entry exists with the exact case name, allocate new dentry with | 1236 | * If no entry exists with the exact case name, allocate new dentry with |
1237 | * the exact case, and return the spliced entry. | 1237 | * the exact case, and return the spliced entry. |
1238 | */ | 1238 | */ |
1239 | struct dentry *d_add_ci(struct inode *inode, struct dentry *dentry, | 1239 | struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, |
1240 | struct qstr *name) | 1240 | struct qstr *name) |
1241 | { | 1241 | { |
1242 | int error; | 1242 | int error; |
@@ -1395,6 +1395,10 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | |||
1395 | if (dentry->d_parent != parent) | 1395 | if (dentry->d_parent != parent) |
1396 | goto next; | 1396 | goto next; |
1397 | 1397 | ||
1398 | /* non-existing due to RCU? */ | ||
1399 | if (d_unhashed(dentry)) | ||
1400 | goto next; | ||
1401 | |||
1398 | /* | 1402 | /* |
1399 | * It is safe to compare names since d_move() cannot | 1403 | * It is safe to compare names since d_move() cannot |
1400 | * change the qstr (protected by d_lock). | 1404 | * change the qstr (protected by d_lock). |
@@ -1410,10 +1414,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | |||
1410 | goto next; | 1414 | goto next; |
1411 | } | 1415 | } |
1412 | 1416 | ||
1413 | if (!d_unhashed(dentry)) { | 1417 | atomic_inc(&dentry->d_count); |
1414 | atomic_inc(&dentry->d_count); | 1418 | found = dentry; |
1415 | found = dentry; | ||
1416 | } | ||
1417 | spin_unlock(&dentry->d_lock); | 1419 | spin_unlock(&dentry->d_lock); |
1418 | break; | 1420 | break; |
1419 | next: | 1421 | next: |
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 08e28c9bb416..3dbe2169cf36 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c | |||
@@ -26,8 +26,7 @@ | |||
26 | #include <linux/debugfs.h> | 26 | #include <linux/debugfs.h> |
27 | #include <linux/fsnotify.h> | 27 | #include <linux/fsnotify.h> |
28 | #include <linux/string.h> | 28 | #include <linux/string.h> |
29 | 29 | #include <linux/magic.h> | |
30 | #define DEBUGFS_MAGIC 0x64626720 | ||
31 | 30 | ||
32 | static struct vfsmount *debugfs_mount; | 31 | static struct vfsmount *debugfs_mount; |
33 | static int debugfs_mount_count; | 32 | static int debugfs_mount_count; |
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 488eb424f662..4a714f6c1bed 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #define DEVPTS_SUPER_MAGIC 0x1cd1 | 27 | #define DEVPTS_SUPER_MAGIC 0x1cd1 |
28 | 28 | ||
29 | #define DEVPTS_DEFAULT_MODE 0600 | 29 | #define DEVPTS_DEFAULT_MODE 0600 |
30 | #define PTMX_MINOR 2 | ||
30 | 31 | ||
31 | extern int pty_limit; /* Config limit on Unix98 ptys */ | 32 | extern int pty_limit; /* Config limit on Unix98 ptys */ |
32 | static DEFINE_IDA(allocated_ptys); | 33 | static DEFINE_IDA(allocated_ptys); |
@@ -48,7 +49,7 @@ enum { | |||
48 | Opt_err | 49 | Opt_err |
49 | }; | 50 | }; |
50 | 51 | ||
51 | static match_table_t tokens = { | 52 | static const match_table_t tokens = { |
52 | {Opt_uid, "uid=%u"}, | 53 | {Opt_uid, "uid=%u"}, |
53 | {Opt_gid, "gid=%u"}, | 54 | {Opt_gid, "gid=%u"}, |
54 | {Opt_mode, "mode=%o"}, | 55 | {Opt_mode, "mode=%o"}, |
@@ -169,15 +170,7 @@ static struct file_system_type devpts_fs_type = { | |||
169 | * to the System V naming convention | 170 | * to the System V naming convention |
170 | */ | 171 | */ |
171 | 172 | ||
172 | static struct dentry *get_node(int num) | 173 | int devpts_new_index(struct inode *ptmx_inode) |
173 | { | ||
174 | char s[12]; | ||
175 | struct dentry *root = devpts_root; | ||
176 | mutex_lock(&root->d_inode->i_mutex); | ||
177 | return lookup_one_len(s, root, sprintf(s, "%d", num)); | ||
178 | } | ||
179 | |||
180 | int devpts_new_index(void) | ||
181 | { | 174 | { |
182 | int index; | 175 | int index; |
183 | int ida_ret; | 176 | int ida_ret; |
@@ -205,20 +198,21 @@ retry: | |||
205 | return index; | 198 | return index; |
206 | } | 199 | } |
207 | 200 | ||
208 | void devpts_kill_index(int idx) | 201 | void devpts_kill_index(struct inode *ptmx_inode, int idx) |
209 | { | 202 | { |
210 | mutex_lock(&allocated_ptys_lock); | 203 | mutex_lock(&allocated_ptys_lock); |
211 | ida_remove(&allocated_ptys, idx); | 204 | ida_remove(&allocated_ptys, idx); |
212 | mutex_unlock(&allocated_ptys_lock); | 205 | mutex_unlock(&allocated_ptys_lock); |
213 | } | 206 | } |
214 | 207 | ||
215 | int devpts_pty_new(struct tty_struct *tty) | 208 | int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty) |
216 | { | 209 | { |
217 | int number = tty->index; /* tty layer puts index from devpts_new_index() in here */ | 210 | int number = tty->index; /* tty layer puts index from devpts_new_index() in here */ |
218 | struct tty_driver *driver = tty->driver; | 211 | struct tty_driver *driver = tty->driver; |
219 | dev_t device = MKDEV(driver->major, driver->minor_start+number); | 212 | dev_t device = MKDEV(driver->major, driver->minor_start+number); |
220 | struct dentry *dentry; | 213 | struct dentry *dentry; |
221 | struct inode *inode = new_inode(devpts_mnt->mnt_sb); | 214 | struct inode *inode = new_inode(devpts_mnt->mnt_sb); |
215 | char s[12]; | ||
222 | 216 | ||
223 | /* We're supposed to be given the slave end of a pty */ | 217 | /* We're supposed to be given the slave end of a pty */ |
224 | BUG_ON(driver->type != TTY_DRIVER_TYPE_PTY); | 218 | BUG_ON(driver->type != TTY_DRIVER_TYPE_PTY); |
@@ -233,10 +227,15 @@ int devpts_pty_new(struct tty_struct *tty) | |||
233 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 227 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
234 | init_special_inode(inode, S_IFCHR|config.mode, device); | 228 | init_special_inode(inode, S_IFCHR|config.mode, device); |
235 | inode->i_private = tty; | 229 | inode->i_private = tty; |
230 | tty->driver_data = inode; | ||
236 | 231 | ||
237 | dentry = get_node(number); | 232 | sprintf(s, "%d", number); |
238 | if (!IS_ERR(dentry) && !dentry->d_inode) { | 233 | |
239 | d_instantiate(dentry, inode); | 234 | mutex_lock(&devpts_root->d_inode->i_mutex); |
235 | |||
236 | dentry = d_alloc_name(devpts_root, s); | ||
237 | if (!IS_ERR(dentry)) { | ||
238 | d_add(dentry, inode); | ||
240 | fsnotify_create(devpts_root->d_inode, dentry); | 239 | fsnotify_create(devpts_root->d_inode, dentry); |
241 | } | 240 | } |
242 | 241 | ||
@@ -245,36 +244,31 @@ int devpts_pty_new(struct tty_struct *tty) | |||
245 | return 0; | 244 | return 0; |
246 | } | 245 | } |
247 | 246 | ||
248 | struct tty_struct *devpts_get_tty(int number) | 247 | struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number) |
249 | { | 248 | { |
250 | struct dentry *dentry = get_node(number); | 249 | BUG_ON(pts_inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR)); |
251 | struct tty_struct *tty; | ||
252 | |||
253 | tty = NULL; | ||
254 | if (!IS_ERR(dentry)) { | ||
255 | if (dentry->d_inode) | ||
256 | tty = dentry->d_inode->i_private; | ||
257 | dput(dentry); | ||
258 | } | ||
259 | 250 | ||
260 | mutex_unlock(&devpts_root->d_inode->i_mutex); | 251 | if (pts_inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) |
261 | 252 | return (struct tty_struct *)pts_inode->i_private; | |
262 | return tty; | 253 | return NULL; |
263 | } | 254 | } |
264 | 255 | ||
265 | void devpts_pty_kill(int number) | 256 | void devpts_pty_kill(struct tty_struct *tty) |
266 | { | 257 | { |
267 | struct dentry *dentry = get_node(number); | 258 | struct inode *inode = tty->driver_data; |
259 | struct dentry *dentry; | ||
268 | 260 | ||
269 | if (!IS_ERR(dentry)) { | 261 | BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR)); |
270 | struct inode *inode = dentry->d_inode; | 262 | |
271 | if (inode) { | 263 | mutex_lock(&devpts_root->d_inode->i_mutex); |
272 | inode->i_nlink--; | 264 | |
273 | d_delete(dentry); | 265 | dentry = d_find_alias(inode); |
274 | dput(dentry); | 266 | if (dentry && !IS_ERR(dentry)) { |
275 | } | 267 | inode->i_nlink--; |
268 | d_delete(dentry); | ||
276 | dput(dentry); | 269 | dput(dentry); |
277 | } | 270 | } |
271 | |||
278 | mutex_unlock(&devpts_root->d_inode->i_mutex); | 272 | mutex_unlock(&devpts_root->d_inode->i_mutex); |
279 | } | 273 | } |
280 | 274 | ||
diff --git a/fs/direct-io.c b/fs/direct-io.c index 9606ee848fd8..af0558dbe8b7 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -5,11 +5,11 @@ | |||
5 | * | 5 | * |
6 | * O_DIRECT | 6 | * O_DIRECT |
7 | * | 7 | * |
8 | * 04Jul2002 akpm@zip.com.au | 8 | * 04Jul2002 Andrew Morton |
9 | * Initial version | 9 | * Initial version |
10 | * 11Sep2002 janetinc@us.ibm.com | 10 | * 11Sep2002 janetinc@us.ibm.com |
11 | * added readv/writev support. | 11 | * added readv/writev support. |
12 | * 29Oct2002 akpm@zip.com.au | 12 | * 29Oct2002 Andrew Morton |
13 | * rewrote bio_add_page() support. | 13 | * rewrote bio_add_page() support. |
14 | * 30Oct2002 pbadari@us.ibm.com | 14 | * 30Oct2002 pbadari@us.ibm.com |
15 | * added support for non-aligned IO. | 15 | * added support for non-aligned IO. |
diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 89d2fb7b991a..fd9859f92fad 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c | |||
@@ -14,6 +14,9 @@ | |||
14 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/configfs.h> | 16 | #include <linux/configfs.h> |
17 | #include <linux/in.h> | ||
18 | #include <linux/in6.h> | ||
19 | #include <net/ipv6.h> | ||
17 | #include <net/sock.h> | 20 | #include <net/sock.h> |
18 | 21 | ||
19 | #include "config.h" | 22 | #include "config.h" |
@@ -377,24 +380,24 @@ static struct config_item_type node_type = { | |||
377 | .ct_owner = THIS_MODULE, | 380 | .ct_owner = THIS_MODULE, |
378 | }; | 381 | }; |
379 | 382 | ||
380 | static struct dlm_cluster *to_cluster(struct config_item *i) | 383 | static struct dlm_cluster *config_item_to_cluster(struct config_item *i) |
381 | { | 384 | { |
382 | return i ? container_of(to_config_group(i), struct dlm_cluster, group) : | 385 | return i ? container_of(to_config_group(i), struct dlm_cluster, group) : |
383 | NULL; | 386 | NULL; |
384 | } | 387 | } |
385 | 388 | ||
386 | static struct dlm_space *to_space(struct config_item *i) | 389 | static struct dlm_space *config_item_to_space(struct config_item *i) |
387 | { | 390 | { |
388 | return i ? container_of(to_config_group(i), struct dlm_space, group) : | 391 | return i ? container_of(to_config_group(i), struct dlm_space, group) : |
389 | NULL; | 392 | NULL; |
390 | } | 393 | } |
391 | 394 | ||
392 | static struct dlm_comm *to_comm(struct config_item *i) | 395 | static struct dlm_comm *config_item_to_comm(struct config_item *i) |
393 | { | 396 | { |
394 | return i ? container_of(i, struct dlm_comm, item) : NULL; | 397 | return i ? container_of(i, struct dlm_comm, item) : NULL; |
395 | } | 398 | } |
396 | 399 | ||
397 | static struct dlm_node *to_node(struct config_item *i) | 400 | static struct dlm_node *config_item_to_node(struct config_item *i) |
398 | { | 401 | { |
399 | return i ? container_of(i, struct dlm_node, item) : NULL; | 402 | return i ? container_of(i, struct dlm_node, item) : NULL; |
400 | } | 403 | } |
@@ -450,7 +453,7 @@ static struct config_group *make_cluster(struct config_group *g, | |||
450 | 453 | ||
451 | static void drop_cluster(struct config_group *g, struct config_item *i) | 454 | static void drop_cluster(struct config_group *g, struct config_item *i) |
452 | { | 455 | { |
453 | struct dlm_cluster *cl = to_cluster(i); | 456 | struct dlm_cluster *cl = config_item_to_cluster(i); |
454 | struct config_item *tmp; | 457 | struct config_item *tmp; |
455 | int j; | 458 | int j; |
456 | 459 | ||
@@ -468,7 +471,7 @@ static void drop_cluster(struct config_group *g, struct config_item *i) | |||
468 | 471 | ||
469 | static void release_cluster(struct config_item *i) | 472 | static void release_cluster(struct config_item *i) |
470 | { | 473 | { |
471 | struct dlm_cluster *cl = to_cluster(i); | 474 | struct dlm_cluster *cl = config_item_to_cluster(i); |
472 | kfree(cl->group.default_groups); | 475 | kfree(cl->group.default_groups); |
473 | kfree(cl); | 476 | kfree(cl); |
474 | } | 477 | } |
@@ -507,7 +510,7 @@ static struct config_group *make_space(struct config_group *g, const char *name) | |||
507 | 510 | ||
508 | static void drop_space(struct config_group *g, struct config_item *i) | 511 | static void drop_space(struct config_group *g, struct config_item *i) |
509 | { | 512 | { |
510 | struct dlm_space *sp = to_space(i); | 513 | struct dlm_space *sp = config_item_to_space(i); |
511 | struct config_item *tmp; | 514 | struct config_item *tmp; |
512 | int j; | 515 | int j; |
513 | 516 | ||
@@ -524,7 +527,7 @@ static void drop_space(struct config_group *g, struct config_item *i) | |||
524 | 527 | ||
525 | static void release_space(struct config_item *i) | 528 | static void release_space(struct config_item *i) |
526 | { | 529 | { |
527 | struct dlm_space *sp = to_space(i); | 530 | struct dlm_space *sp = config_item_to_space(i); |
528 | kfree(sp->group.default_groups); | 531 | kfree(sp->group.default_groups); |
529 | kfree(sp); | 532 | kfree(sp); |
530 | } | 533 | } |
@@ -546,7 +549,7 @@ static struct config_item *make_comm(struct config_group *g, const char *name) | |||
546 | 549 | ||
547 | static void drop_comm(struct config_group *g, struct config_item *i) | 550 | static void drop_comm(struct config_group *g, struct config_item *i) |
548 | { | 551 | { |
549 | struct dlm_comm *cm = to_comm(i); | 552 | struct dlm_comm *cm = config_item_to_comm(i); |
550 | if (local_comm == cm) | 553 | if (local_comm == cm) |
551 | local_comm = NULL; | 554 | local_comm = NULL; |
552 | dlm_lowcomms_close(cm->nodeid); | 555 | dlm_lowcomms_close(cm->nodeid); |
@@ -557,13 +560,13 @@ static void drop_comm(struct config_group *g, struct config_item *i) | |||
557 | 560 | ||
558 | static void release_comm(struct config_item *i) | 561 | static void release_comm(struct config_item *i) |
559 | { | 562 | { |
560 | struct dlm_comm *cm = to_comm(i); | 563 | struct dlm_comm *cm = config_item_to_comm(i); |
561 | kfree(cm); | 564 | kfree(cm); |
562 | } | 565 | } |
563 | 566 | ||
564 | static struct config_item *make_node(struct config_group *g, const char *name) | 567 | static struct config_item *make_node(struct config_group *g, const char *name) |
565 | { | 568 | { |
566 | struct dlm_space *sp = to_space(g->cg_item.ci_parent); | 569 | struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent); |
567 | struct dlm_node *nd; | 570 | struct dlm_node *nd; |
568 | 571 | ||
569 | nd = kzalloc(sizeof(struct dlm_node), GFP_KERNEL); | 572 | nd = kzalloc(sizeof(struct dlm_node), GFP_KERNEL); |
@@ -585,8 +588,8 @@ static struct config_item *make_node(struct config_group *g, const char *name) | |||
585 | 588 | ||
586 | static void drop_node(struct config_group *g, struct config_item *i) | 589 | static void drop_node(struct config_group *g, struct config_item *i) |
587 | { | 590 | { |
588 | struct dlm_space *sp = to_space(g->cg_item.ci_parent); | 591 | struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent); |
589 | struct dlm_node *nd = to_node(i); | 592 | struct dlm_node *nd = config_item_to_node(i); |
590 | 593 | ||
591 | mutex_lock(&sp->members_lock); | 594 | mutex_lock(&sp->members_lock); |
592 | list_del(&nd->list); | 595 | list_del(&nd->list); |
@@ -598,7 +601,7 @@ static void drop_node(struct config_group *g, struct config_item *i) | |||
598 | 601 | ||
599 | static void release_node(struct config_item *i) | 602 | static void release_node(struct config_item *i) |
600 | { | 603 | { |
601 | struct dlm_node *nd = to_node(i); | 604 | struct dlm_node *nd = config_item_to_node(i); |
602 | kfree(nd); | 605 | kfree(nd); |
603 | } | 606 | } |
604 | 607 | ||
@@ -632,7 +635,7 @@ void dlm_config_exit(void) | |||
632 | static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, | 635 | static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, |
633 | char *buf) | 636 | char *buf) |
634 | { | 637 | { |
635 | struct dlm_cluster *cl = to_cluster(i); | 638 | struct dlm_cluster *cl = config_item_to_cluster(i); |
636 | struct cluster_attribute *cla = | 639 | struct cluster_attribute *cla = |
637 | container_of(a, struct cluster_attribute, attr); | 640 | container_of(a, struct cluster_attribute, attr); |
638 | return cla->show ? cla->show(cl, buf) : 0; | 641 | return cla->show ? cla->show(cl, buf) : 0; |
@@ -642,7 +645,7 @@ static ssize_t store_cluster(struct config_item *i, | |||
642 | struct configfs_attribute *a, | 645 | struct configfs_attribute *a, |
643 | const char *buf, size_t len) | 646 | const char *buf, size_t len) |
644 | { | 647 | { |
645 | struct dlm_cluster *cl = to_cluster(i); | 648 | struct dlm_cluster *cl = config_item_to_cluster(i); |
646 | struct cluster_attribute *cla = | 649 | struct cluster_attribute *cla = |
647 | container_of(a, struct cluster_attribute, attr); | 650 | container_of(a, struct cluster_attribute, attr); |
648 | return cla->store ? cla->store(cl, buf, len) : -EINVAL; | 651 | return cla->store ? cla->store(cl, buf, len) : -EINVAL; |
@@ -651,7 +654,7 @@ static ssize_t store_cluster(struct config_item *i, | |||
651 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, | 654 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, |
652 | char *buf) | 655 | char *buf) |
653 | { | 656 | { |
654 | struct dlm_comm *cm = to_comm(i); | 657 | struct dlm_comm *cm = config_item_to_comm(i); |
655 | struct comm_attribute *cma = | 658 | struct comm_attribute *cma = |
656 | container_of(a, struct comm_attribute, attr); | 659 | container_of(a, struct comm_attribute, attr); |
657 | return cma->show ? cma->show(cm, buf) : 0; | 660 | return cma->show ? cma->show(cm, buf) : 0; |
@@ -660,7 +663,7 @@ static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, | |||
660 | static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, | 663 | static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, |
661 | const char *buf, size_t len) | 664 | const char *buf, size_t len) |
662 | { | 665 | { |
663 | struct dlm_comm *cm = to_comm(i); | 666 | struct dlm_comm *cm = config_item_to_comm(i); |
664 | struct comm_attribute *cma = | 667 | struct comm_attribute *cma = |
665 | container_of(a, struct comm_attribute, attr); | 668 | container_of(a, struct comm_attribute, attr); |
666 | return cma->store ? cma->store(cm, buf, len) : -EINVAL; | 669 | return cma->store ? cma->store(cm, buf, len) : -EINVAL; |
@@ -714,7 +717,7 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) | |||
714 | static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, | 717 | static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, |
715 | char *buf) | 718 | char *buf) |
716 | { | 719 | { |
717 | struct dlm_node *nd = to_node(i); | 720 | struct dlm_node *nd = config_item_to_node(i); |
718 | struct node_attribute *nda = | 721 | struct node_attribute *nda = |
719 | container_of(a, struct node_attribute, attr); | 722 | container_of(a, struct node_attribute, attr); |
720 | return nda->show ? nda->show(nd, buf) : 0; | 723 | return nda->show ? nda->show(nd, buf) : 0; |
@@ -723,7 +726,7 @@ static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, | |||
723 | static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, | 726 | static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, |
724 | const char *buf, size_t len) | 727 | const char *buf, size_t len) |
725 | { | 728 | { |
726 | struct dlm_node *nd = to_node(i); | 729 | struct dlm_node *nd = config_item_to_node(i); |
727 | struct node_attribute *nda = | 730 | struct node_attribute *nda = |
728 | container_of(a, struct node_attribute, attr); | 731 | container_of(a, struct node_attribute, attr); |
729 | return nda->store ? nda->store(nd, buf, len) : -EINVAL; | 732 | return nda->store ? nda->store(nd, buf, len) : -EINVAL; |
@@ -768,7 +771,7 @@ static struct dlm_space *get_space(char *name) | |||
768 | i = config_group_find_item(space_list, name); | 771 | i = config_group_find_item(space_list, name); |
769 | mutex_unlock(&space_list->cg_subsys->su_mutex); | 772 | mutex_unlock(&space_list->cg_subsys->su_mutex); |
770 | 773 | ||
771 | return to_space(i); | 774 | return config_item_to_space(i); |
772 | } | 775 | } |
773 | 776 | ||
774 | static void put_space(struct dlm_space *sp) | 777 | static void put_space(struct dlm_space *sp) |
@@ -776,6 +779,33 @@ static void put_space(struct dlm_space *sp) | |||
776 | config_item_put(&sp->group.cg_item); | 779 | config_item_put(&sp->group.cg_item); |
777 | } | 780 | } |
778 | 781 | ||
782 | static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) | ||
783 | { | ||
784 | switch (x->ss_family) { | ||
785 | case AF_INET: { | ||
786 | struct sockaddr_in *sinx = (struct sockaddr_in *)x; | ||
787 | struct sockaddr_in *siny = (struct sockaddr_in *)y; | ||
788 | if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) | ||
789 | return 0; | ||
790 | if (sinx->sin_port != siny->sin_port) | ||
791 | return 0; | ||
792 | break; | ||
793 | } | ||
794 | case AF_INET6: { | ||
795 | struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x; | ||
796 | struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y; | ||
797 | if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) | ||
798 | return 0; | ||
799 | if (sinx->sin6_port != siny->sin6_port) | ||
800 | return 0; | ||
801 | break; | ||
802 | } | ||
803 | default: | ||
804 | return 0; | ||
805 | } | ||
806 | return 1; | ||
807 | } | ||
808 | |||
779 | static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) | 809 | static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) |
780 | { | 810 | { |
781 | struct config_item *i; | 811 | struct config_item *i; |
@@ -788,7 +818,7 @@ static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) | |||
788 | mutex_lock(&clusters_root.subsys.su_mutex); | 818 | mutex_lock(&clusters_root.subsys.su_mutex); |
789 | 819 | ||
790 | list_for_each_entry(i, &comm_list->cg_children, ci_entry) { | 820 | list_for_each_entry(i, &comm_list->cg_children, ci_entry) { |
791 | cm = to_comm(i); | 821 | cm = config_item_to_comm(i); |
792 | 822 | ||
793 | if (nodeid) { | 823 | if (nodeid) { |
794 | if (cm->nodeid != nodeid) | 824 | if (cm->nodeid != nodeid) |
@@ -797,8 +827,7 @@ static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) | |||
797 | config_item_get(i); | 827 | config_item_get(i); |
798 | break; | 828 | break; |
799 | } else { | 829 | } else { |
800 | if (!cm->addr_count || | 830 | if (!cm->addr_count || !addr_compare(cm->addr[0], addr)) |
801 | memcmp(cm->addr[0], addr, sizeof(*addr))) | ||
802 | continue; | 831 | continue; |
803 | found = 1; | 832 | found = 1; |
804 | config_item_get(i); | 833 | config_item_get(i); |
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 5a7ac33b629c..868e4c9ef127 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -441,8 +441,11 @@ struct dlm_ls { | |||
441 | uint32_t ls_global_id; /* global unique lockspace ID */ | 441 | uint32_t ls_global_id; /* global unique lockspace ID */ |
442 | uint32_t ls_exflags; | 442 | uint32_t ls_exflags; |
443 | int ls_lvblen; | 443 | int ls_lvblen; |
444 | int ls_count; /* reference count */ | 444 | int ls_count; /* refcount of processes in |
445 | the dlm using this ls */ | ||
446 | int ls_create_count; /* create/release refcount */ | ||
445 | unsigned long ls_flags; /* LSFL_ */ | 447 | unsigned long ls_flags; /* LSFL_ */ |
448 | unsigned long ls_scan_time; | ||
446 | struct kobject ls_kobj; | 449 | struct kobject ls_kobj; |
447 | 450 | ||
448 | struct dlm_rsbtable *ls_rsbtbl; | 451 | struct dlm_rsbtable *ls_rsbtbl; |
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 499e16759e96..d910501de6d2 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -23,6 +23,7 @@ | |||
23 | #include "lock.h" | 23 | #include "lock.h" |
24 | #include "recover.h" | 24 | #include "recover.h" |
25 | #include "requestqueue.h" | 25 | #include "requestqueue.h" |
26 | #include "user.h" | ||
26 | 27 | ||
27 | static int ls_count; | 28 | static int ls_count; |
28 | static struct mutex ls_lock; | 29 | static struct mutex ls_lock; |
@@ -211,19 +212,41 @@ void dlm_lockspace_exit(void) | |||
211 | kset_unregister(dlm_kset); | 212 | kset_unregister(dlm_kset); |
212 | } | 213 | } |
213 | 214 | ||
215 | static struct dlm_ls *find_ls_to_scan(void) | ||
216 | { | ||
217 | struct dlm_ls *ls; | ||
218 | |||
219 | spin_lock(&lslist_lock); | ||
220 | list_for_each_entry(ls, &lslist, ls_list) { | ||
221 | if (time_after_eq(jiffies, ls->ls_scan_time + | ||
222 | dlm_config.ci_scan_secs * HZ)) { | ||
223 | spin_unlock(&lslist_lock); | ||
224 | return ls; | ||
225 | } | ||
226 | } | ||
227 | spin_unlock(&lslist_lock); | ||
228 | return NULL; | ||
229 | } | ||
230 | |||
214 | static int dlm_scand(void *data) | 231 | static int dlm_scand(void *data) |
215 | { | 232 | { |
216 | struct dlm_ls *ls; | 233 | struct dlm_ls *ls; |
234 | int timeout_jiffies = dlm_config.ci_scan_secs * HZ; | ||
217 | 235 | ||
218 | while (!kthread_should_stop()) { | 236 | while (!kthread_should_stop()) { |
219 | list_for_each_entry(ls, &lslist, ls_list) { | 237 | ls = find_ls_to_scan(); |
238 | if (ls) { | ||
220 | if (dlm_lock_recovery_try(ls)) { | 239 | if (dlm_lock_recovery_try(ls)) { |
240 | ls->ls_scan_time = jiffies; | ||
221 | dlm_scan_rsbs(ls); | 241 | dlm_scan_rsbs(ls); |
222 | dlm_scan_timeout(ls); | 242 | dlm_scan_timeout(ls); |
223 | dlm_unlock_recovery(ls); | 243 | dlm_unlock_recovery(ls); |
244 | } else { | ||
245 | ls->ls_scan_time += HZ; | ||
224 | } | 246 | } |
247 | } else { | ||
248 | schedule_timeout_interruptible(timeout_jiffies); | ||
225 | } | 249 | } |
226 | schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ); | ||
227 | } | 250 | } |
228 | return 0; | 251 | return 0; |
229 | } | 252 | } |
@@ -246,23 +269,6 @@ static void dlm_scand_stop(void) | |||
246 | kthread_stop(scand_task); | 269 | kthread_stop(scand_task); |
247 | } | 270 | } |
248 | 271 | ||
249 | static struct dlm_ls *dlm_find_lockspace_name(char *name, int namelen) | ||
250 | { | ||
251 | struct dlm_ls *ls; | ||
252 | |||
253 | spin_lock(&lslist_lock); | ||
254 | |||
255 | list_for_each_entry(ls, &lslist, ls_list) { | ||
256 | if (ls->ls_namelen == namelen && | ||
257 | memcmp(ls->ls_name, name, namelen) == 0) | ||
258 | goto out; | ||
259 | } | ||
260 | ls = NULL; | ||
261 | out: | ||
262 | spin_unlock(&lslist_lock); | ||
263 | return ls; | ||
264 | } | ||
265 | |||
266 | struct dlm_ls *dlm_find_lockspace_global(uint32_t id) | 272 | struct dlm_ls *dlm_find_lockspace_global(uint32_t id) |
267 | { | 273 | { |
268 | struct dlm_ls *ls; | 274 | struct dlm_ls *ls; |
@@ -327,6 +333,7 @@ static void remove_lockspace(struct dlm_ls *ls) | |||
327 | for (;;) { | 333 | for (;;) { |
328 | spin_lock(&lslist_lock); | 334 | spin_lock(&lslist_lock); |
329 | if (ls->ls_count == 0) { | 335 | if (ls->ls_count == 0) { |
336 | WARN_ON(ls->ls_create_count != 0); | ||
330 | list_del(&ls->ls_list); | 337 | list_del(&ls->ls_list); |
331 | spin_unlock(&lslist_lock); | 338 | spin_unlock(&lslist_lock); |
332 | return; | 339 | return; |
@@ -381,7 +388,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
381 | uint32_t flags, int lvblen) | 388 | uint32_t flags, int lvblen) |
382 | { | 389 | { |
383 | struct dlm_ls *ls; | 390 | struct dlm_ls *ls; |
384 | int i, size, error = -ENOMEM; | 391 | int i, size, error; |
385 | int do_unreg = 0; | 392 | int do_unreg = 0; |
386 | 393 | ||
387 | if (namelen > DLM_LOCKSPACE_LEN) | 394 | if (namelen > DLM_LOCKSPACE_LEN) |
@@ -393,12 +400,37 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
393 | if (!try_module_get(THIS_MODULE)) | 400 | if (!try_module_get(THIS_MODULE)) |
394 | return -EINVAL; | 401 | return -EINVAL; |
395 | 402 | ||
396 | ls = dlm_find_lockspace_name(name, namelen); | 403 | if (!dlm_user_daemon_available()) { |
397 | if (ls) { | 404 | module_put(THIS_MODULE); |
398 | *lockspace = ls; | 405 | return -EUNATCH; |
406 | } | ||
407 | |||
408 | error = 0; | ||
409 | |||
410 | spin_lock(&lslist_lock); | ||
411 | list_for_each_entry(ls, &lslist, ls_list) { | ||
412 | WARN_ON(ls->ls_create_count <= 0); | ||
413 | if (ls->ls_namelen != namelen) | ||
414 | continue; | ||
415 | if (memcmp(ls->ls_name, name, namelen)) | ||
416 | continue; | ||
417 | if (flags & DLM_LSFL_NEWEXCL) { | ||
418 | error = -EEXIST; | ||
419 | break; | ||
420 | } | ||
421 | ls->ls_create_count++; | ||
399 | module_put(THIS_MODULE); | 422 | module_put(THIS_MODULE); |
400 | return -EEXIST; | 423 | error = 1; /* not an error, return 0 */ |
424 | break; | ||
401 | } | 425 | } |
426 | spin_unlock(&lslist_lock); | ||
427 | |||
428 | if (error < 0) | ||
429 | goto out; | ||
430 | if (error) | ||
431 | goto ret_zero; | ||
432 | |||
433 | error = -ENOMEM; | ||
402 | 434 | ||
403 | ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL); | 435 | ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL); |
404 | if (!ls) | 436 | if (!ls) |
@@ -408,6 +440,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
408 | ls->ls_lvblen = lvblen; | 440 | ls->ls_lvblen = lvblen; |
409 | ls->ls_count = 0; | 441 | ls->ls_count = 0; |
410 | ls->ls_flags = 0; | 442 | ls->ls_flags = 0; |
443 | ls->ls_scan_time = jiffies; | ||
411 | 444 | ||
412 | if (flags & DLM_LSFL_TIMEWARN) | 445 | if (flags & DLM_LSFL_TIMEWARN) |
413 | set_bit(LSFL_TIMEWARN, &ls->ls_flags); | 446 | set_bit(LSFL_TIMEWARN, &ls->ls_flags); |
@@ -418,8 +451,9 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
418 | ls->ls_allocation = GFP_KERNEL; | 451 | ls->ls_allocation = GFP_KERNEL; |
419 | 452 | ||
420 | /* ls_exflags are forced to match among nodes, and we don't | 453 | /* ls_exflags are forced to match among nodes, and we don't |
421 | need to require all nodes to have TIMEWARN or FS set */ | 454 | need to require all nodes to have some flags set */ |
422 | ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS)); | 455 | ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS | |
456 | DLM_LSFL_NEWEXCL)); | ||
423 | 457 | ||
424 | size = dlm_config.ci_rsbtbl_size; | 458 | size = dlm_config.ci_rsbtbl_size; |
425 | ls->ls_rsbtbl_size = size; | 459 | ls->ls_rsbtbl_size = size; |
@@ -510,6 +544,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
510 | down_write(&ls->ls_in_recovery); | 544 | down_write(&ls->ls_in_recovery); |
511 | 545 | ||
512 | spin_lock(&lslist_lock); | 546 | spin_lock(&lslist_lock); |
547 | ls->ls_create_count = 1; | ||
513 | list_add(&ls->ls_list, &lslist); | 548 | list_add(&ls->ls_list, &lslist); |
514 | spin_unlock(&lslist_lock); | 549 | spin_unlock(&lslist_lock); |
515 | 550 | ||
@@ -548,7 +583,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
548 | dlm_create_debug_file(ls); | 583 | dlm_create_debug_file(ls); |
549 | 584 | ||
550 | log_debug(ls, "join complete"); | 585 | log_debug(ls, "join complete"); |
551 | 586 | ret_zero: | |
552 | *lockspace = ls; | 587 | *lockspace = ls; |
553 | return 0; | 588 | return 0; |
554 | 589 | ||
@@ -635,13 +670,34 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
635 | struct dlm_lkb *lkb; | 670 | struct dlm_lkb *lkb; |
636 | struct dlm_rsb *rsb; | 671 | struct dlm_rsb *rsb; |
637 | struct list_head *head; | 672 | struct list_head *head; |
638 | int i; | 673 | int i, busy, rv; |
639 | int busy = lockspace_busy(ls); | 674 | |
675 | busy = lockspace_busy(ls); | ||
676 | |||
677 | spin_lock(&lslist_lock); | ||
678 | if (ls->ls_create_count == 1) { | ||
679 | if (busy > force) | ||
680 | rv = -EBUSY; | ||
681 | else { | ||
682 | /* remove_lockspace takes ls off lslist */ | ||
683 | ls->ls_create_count = 0; | ||
684 | rv = 0; | ||
685 | } | ||
686 | } else if (ls->ls_create_count > 1) { | ||
687 | rv = --ls->ls_create_count; | ||
688 | } else { | ||
689 | rv = -EINVAL; | ||
690 | } | ||
691 | spin_unlock(&lslist_lock); | ||
640 | 692 | ||
641 | if (busy > force) | 693 | if (rv) { |
642 | return -EBUSY; | 694 | log_debug(ls, "release_lockspace no remove %d", rv); |
695 | return rv; | ||
696 | } | ||
697 | |||
698 | dlm_device_deregister(ls); | ||
643 | 699 | ||
644 | if (force < 3) | 700 | if (force < 3 && dlm_user_daemon_available()) |
645 | do_uevent(ls, 0); | 701 | do_uevent(ls, 0); |
646 | 702 | ||
647 | dlm_recoverd_stop(ls); | 703 | dlm_recoverd_stop(ls); |
@@ -720,15 +776,10 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
720 | dlm_clear_members(ls); | 776 | dlm_clear_members(ls); |
721 | dlm_clear_members_gone(ls); | 777 | dlm_clear_members_gone(ls); |
722 | kfree(ls->ls_node_array); | 778 | kfree(ls->ls_node_array); |
779 | log_debug(ls, "release_lockspace final free"); | ||
723 | kobject_put(&ls->ls_kobj); | 780 | kobject_put(&ls->ls_kobj); |
724 | /* The ls structure will be freed when the kobject is done with */ | 781 | /* The ls structure will be freed when the kobject is done with */ |
725 | 782 | ||
726 | mutex_lock(&ls_lock); | ||
727 | ls_count--; | ||
728 | if (!ls_count) | ||
729 | threads_stop(); | ||
730 | mutex_unlock(&ls_lock); | ||
731 | |||
732 | module_put(THIS_MODULE); | 783 | module_put(THIS_MODULE); |
733 | return 0; | 784 | return 0; |
734 | } | 785 | } |
@@ -750,11 +801,38 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
750 | int dlm_release_lockspace(void *lockspace, int force) | 801 | int dlm_release_lockspace(void *lockspace, int force) |
751 | { | 802 | { |
752 | struct dlm_ls *ls; | 803 | struct dlm_ls *ls; |
804 | int error; | ||
753 | 805 | ||
754 | ls = dlm_find_lockspace_local(lockspace); | 806 | ls = dlm_find_lockspace_local(lockspace); |
755 | if (!ls) | 807 | if (!ls) |
756 | return -EINVAL; | 808 | return -EINVAL; |
757 | dlm_put_lockspace(ls); | 809 | dlm_put_lockspace(ls); |
758 | return release_lockspace(ls, force); | 810 | |
811 | mutex_lock(&ls_lock); | ||
812 | error = release_lockspace(ls, force); | ||
813 | if (!error) | ||
814 | ls_count--; | ||
815 | else if (!ls_count) | ||
816 | threads_stop(); | ||
817 | mutex_unlock(&ls_lock); | ||
818 | |||
819 | return error; | ||
820 | } | ||
821 | |||
822 | void dlm_stop_lockspaces(void) | ||
823 | { | ||
824 | struct dlm_ls *ls; | ||
825 | |||
826 | restart: | ||
827 | spin_lock(&lslist_lock); | ||
828 | list_for_each_entry(ls, &lslist, ls_list) { | ||
829 | if (!test_bit(LSFL_RUNNING, &ls->ls_flags)) | ||
830 | continue; | ||
831 | spin_unlock(&lslist_lock); | ||
832 | log_error(ls, "no userland control daemon, stopping lockspace"); | ||
833 | dlm_ls_stop(ls); | ||
834 | goto restart; | ||
835 | } | ||
836 | spin_unlock(&lslist_lock); | ||
759 | } | 837 | } |
760 | 838 | ||
diff --git a/fs/dlm/lockspace.h b/fs/dlm/lockspace.h index 891eabbdd021..f879f87901f8 100644 --- a/fs/dlm/lockspace.h +++ b/fs/dlm/lockspace.h | |||
@@ -20,6 +20,7 @@ struct dlm_ls *dlm_find_lockspace_global(uint32_t id); | |||
20 | struct dlm_ls *dlm_find_lockspace_local(void *id); | 20 | struct dlm_ls *dlm_find_lockspace_local(void *id); |
21 | struct dlm_ls *dlm_find_lockspace_device(int minor); | 21 | struct dlm_ls *dlm_find_lockspace_device(int minor); |
22 | void dlm_put_lockspace(struct dlm_ls *ls); | 22 | void dlm_put_lockspace(struct dlm_ls *ls); |
23 | void dlm_stop_lockspaces(void); | ||
23 | 24 | ||
24 | #endif /* __LOCKSPACE_DOT_H__ */ | 25 | #endif /* __LOCKSPACE_DOT_H__ */ |
25 | 26 | ||
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 34f14a14fb4e..b3832c67194a 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2006-2007 Red Hat, Inc. All rights reserved. | 2 | * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved. |
3 | * | 3 | * |
4 | * This copyrighted material is made available to anyone wishing to use, | 4 | * This copyrighted material is made available to anyone wishing to use, |
5 | * modify, copy, or redistribute it subject to the terms and conditions | 5 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -15,7 +15,6 @@ | |||
15 | #include <linux/poll.h> | 15 | #include <linux/poll.h> |
16 | #include <linux/signal.h> | 16 | #include <linux/signal.h> |
17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
18 | #include <linux/smp_lock.h> | ||
19 | #include <linux/dlm.h> | 18 | #include <linux/dlm.h> |
20 | #include <linux/dlm_device.h> | 19 | #include <linux/dlm_device.h> |
21 | 20 | ||
@@ -27,6 +26,8 @@ | |||
27 | 26 | ||
28 | static const char name_prefix[] = "dlm"; | 27 | static const char name_prefix[] = "dlm"; |
29 | static const struct file_operations device_fops; | 28 | static const struct file_operations device_fops; |
29 | static atomic_t dlm_monitor_opened; | ||
30 | static int dlm_monitor_unused = 1; | ||
30 | 31 | ||
31 | #ifdef CONFIG_COMPAT | 32 | #ifdef CONFIG_COMPAT |
32 | 33 | ||
@@ -340,10 +341,15 @@ static int device_user_deadlock(struct dlm_user_proc *proc, | |||
340 | return error; | 341 | return error; |
341 | } | 342 | } |
342 | 343 | ||
343 | static int create_misc_device(struct dlm_ls *ls, char *name) | 344 | static int dlm_device_register(struct dlm_ls *ls, char *name) |
344 | { | 345 | { |
345 | int error, len; | 346 | int error, len; |
346 | 347 | ||
348 | /* The device is already registered. This happens when the | ||
349 | lockspace is created multiple times from userspace. */ | ||
350 | if (ls->ls_device.name) | ||
351 | return 0; | ||
352 | |||
347 | error = -ENOMEM; | 353 | error = -ENOMEM; |
348 | len = strlen(name) + strlen(name_prefix) + 2; | 354 | len = strlen(name) + strlen(name_prefix) + 2; |
349 | ls->ls_device.name = kzalloc(len, GFP_KERNEL); | 355 | ls->ls_device.name = kzalloc(len, GFP_KERNEL); |
@@ -363,6 +369,22 @@ fail: | |||
363 | return error; | 369 | return error; |
364 | } | 370 | } |
365 | 371 | ||
372 | int dlm_device_deregister(struct dlm_ls *ls) | ||
373 | { | ||
374 | int error; | ||
375 | |||
376 | /* The device is not registered. This happens when the lockspace | ||
377 | was never used from userspace, or when device_create_lockspace() | ||
378 | calls dlm_release_lockspace() after the register fails. */ | ||
379 | if (!ls->ls_device.name) | ||
380 | return 0; | ||
381 | |||
382 | error = misc_deregister(&ls->ls_device); | ||
383 | if (!error) | ||
384 | kfree(ls->ls_device.name); | ||
385 | return error; | ||
386 | } | ||
387 | |||
366 | static int device_user_purge(struct dlm_user_proc *proc, | 388 | static int device_user_purge(struct dlm_user_proc *proc, |
367 | struct dlm_purge_params *params) | 389 | struct dlm_purge_params *params) |
368 | { | 390 | { |
@@ -397,7 +419,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params) | |||
397 | if (!ls) | 419 | if (!ls) |
398 | return -ENOENT; | 420 | return -ENOENT; |
399 | 421 | ||
400 | error = create_misc_device(ls, params->name); | 422 | error = dlm_device_register(ls, params->name); |
401 | dlm_put_lockspace(ls); | 423 | dlm_put_lockspace(ls); |
402 | 424 | ||
403 | if (error) | 425 | if (error) |
@@ -421,31 +443,22 @@ static int device_remove_lockspace(struct dlm_lspace_params *params) | |||
421 | if (!ls) | 443 | if (!ls) |
422 | return -ENOENT; | 444 | return -ENOENT; |
423 | 445 | ||
424 | /* Deregister the misc device first, so we don't have | ||
425 | * a device that's not attached to a lockspace. If | ||
426 | * dlm_release_lockspace fails then we can recreate it | ||
427 | */ | ||
428 | error = misc_deregister(&ls->ls_device); | ||
429 | if (error) { | ||
430 | dlm_put_lockspace(ls); | ||
431 | goto out; | ||
432 | } | ||
433 | kfree(ls->ls_device.name); | ||
434 | |||
435 | if (params->flags & DLM_USER_LSFLG_FORCEFREE) | 446 | if (params->flags & DLM_USER_LSFLG_FORCEFREE) |
436 | force = 2; | 447 | force = 2; |
437 | 448 | ||
438 | lockspace = ls->ls_local_handle; | 449 | lockspace = ls->ls_local_handle; |
450 | dlm_put_lockspace(ls); | ||
439 | 451 | ||
440 | /* dlm_release_lockspace waits for references to go to zero, | 452 | /* The final dlm_release_lockspace waits for references to go to |
441 | so all processes will need to close their device for the ls | 453 | zero, so all processes will need to close their device for the |
442 | before the release will procede */ | 454 | ls before the release will proceed. release also calls the |
455 | device_deregister above. Converting a positive return value | ||
456 | from release to zero means that userspace won't know when its | ||
457 | release was the final one, but it shouldn't need to know. */ | ||
443 | 458 | ||
444 | dlm_put_lockspace(ls); | ||
445 | error = dlm_release_lockspace(lockspace, force); | 459 | error = dlm_release_lockspace(lockspace, force); |
446 | if (error) | 460 | if (error > 0) |
447 | create_misc_device(ls, ls->ls_name); | 461 | error = 0; |
448 | out: | ||
449 | return error; | 462 | return error; |
450 | } | 463 | } |
451 | 464 | ||
@@ -623,17 +636,13 @@ static int device_open(struct inode *inode, struct file *file) | |||
623 | struct dlm_user_proc *proc; | 636 | struct dlm_user_proc *proc; |
624 | struct dlm_ls *ls; | 637 | struct dlm_ls *ls; |
625 | 638 | ||
626 | lock_kernel(); | ||
627 | ls = dlm_find_lockspace_device(iminor(inode)); | 639 | ls = dlm_find_lockspace_device(iminor(inode)); |
628 | if (!ls) { | 640 | if (!ls) |
629 | unlock_kernel(); | ||
630 | return -ENOENT; | 641 | return -ENOENT; |
631 | } | ||
632 | 642 | ||
633 | proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL); | 643 | proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL); |
634 | if (!proc) { | 644 | if (!proc) { |
635 | dlm_put_lockspace(ls); | 645 | dlm_put_lockspace(ls); |
636 | unlock_kernel(); | ||
637 | return -ENOMEM; | 646 | return -ENOMEM; |
638 | } | 647 | } |
639 | 648 | ||
@@ -645,7 +654,6 @@ static int device_open(struct inode *inode, struct file *file) | |||
645 | spin_lock_init(&proc->locks_spin); | 654 | spin_lock_init(&proc->locks_spin); |
646 | init_waitqueue_head(&proc->wait); | 655 | init_waitqueue_head(&proc->wait); |
647 | file->private_data = proc; | 656 | file->private_data = proc; |
648 | unlock_kernel(); | ||
649 | 657 | ||
650 | return 0; | 658 | return 0; |
651 | } | 659 | } |
@@ -878,9 +886,28 @@ static unsigned int device_poll(struct file *file, poll_table *wait) | |||
878 | return 0; | 886 | return 0; |
879 | } | 887 | } |
880 | 888 | ||
889 | int dlm_user_daemon_available(void) | ||
890 | { | ||
891 | /* dlm_controld hasn't started (or, has started, but not | ||
892 | properly populated configfs) */ | ||
893 | |||
894 | if (!dlm_our_nodeid()) | ||
895 | return 0; | ||
896 | |||
897 | /* This is to deal with versions of dlm_controld that don't | ||
898 | know about the monitor device. We assume that if the | ||
899 | dlm_controld was started (above), but the monitor device | ||
900 | was never opened, that it's an old version. dlm_controld | ||
901 | should open the monitor device before populating configfs. */ | ||
902 | |||
903 | if (dlm_monitor_unused) | ||
904 | return 1; | ||
905 | |||
906 | return atomic_read(&dlm_monitor_opened) ? 1 : 0; | ||
907 | } | ||
908 | |||
881 | static int ctl_device_open(struct inode *inode, struct file *file) | 909 | static int ctl_device_open(struct inode *inode, struct file *file) |
882 | { | 910 | { |
883 | cycle_kernel_lock(); | ||
884 | file->private_data = NULL; | 911 | file->private_data = NULL; |
885 | return 0; | 912 | return 0; |
886 | } | 913 | } |
@@ -890,6 +917,20 @@ static int ctl_device_close(struct inode *inode, struct file *file) | |||
890 | return 0; | 917 | return 0; |
891 | } | 918 | } |
892 | 919 | ||
920 | static int monitor_device_open(struct inode *inode, struct file *file) | ||
921 | { | ||
922 | atomic_inc(&dlm_monitor_opened); | ||
923 | dlm_monitor_unused = 0; | ||
924 | return 0; | ||
925 | } | ||
926 | |||
927 | static int monitor_device_close(struct inode *inode, struct file *file) | ||
928 | { | ||
929 | if (atomic_dec_and_test(&dlm_monitor_opened)) | ||
930 | dlm_stop_lockspaces(); | ||
931 | return 0; | ||
932 | } | ||
933 | |||
893 | static const struct file_operations device_fops = { | 934 | static const struct file_operations device_fops = { |
894 | .open = device_open, | 935 | .open = device_open, |
895 | .release = device_close, | 936 | .release = device_close, |
@@ -913,19 +954,42 @@ static struct miscdevice ctl_device = { | |||
913 | .minor = MISC_DYNAMIC_MINOR, | 954 | .minor = MISC_DYNAMIC_MINOR, |
914 | }; | 955 | }; |
915 | 956 | ||
957 | static const struct file_operations monitor_device_fops = { | ||
958 | .open = monitor_device_open, | ||
959 | .release = monitor_device_close, | ||
960 | .owner = THIS_MODULE, | ||
961 | }; | ||
962 | |||
963 | static struct miscdevice monitor_device = { | ||
964 | .name = "dlm-monitor", | ||
965 | .fops = &monitor_device_fops, | ||
966 | .minor = MISC_DYNAMIC_MINOR, | ||
967 | }; | ||
968 | |||
916 | int __init dlm_user_init(void) | 969 | int __init dlm_user_init(void) |
917 | { | 970 | { |
918 | int error; | 971 | int error; |
919 | 972 | ||
973 | atomic_set(&dlm_monitor_opened, 0); | ||
974 | |||
920 | error = misc_register(&ctl_device); | 975 | error = misc_register(&ctl_device); |
921 | if (error) | 976 | if (error) { |
922 | log_print("misc_register failed for control device"); | 977 | log_print("misc_register failed for control device"); |
978 | goto out; | ||
979 | } | ||
923 | 980 | ||
981 | error = misc_register(&monitor_device); | ||
982 | if (error) { | ||
983 | log_print("misc_register failed for monitor device"); | ||
984 | misc_deregister(&ctl_device); | ||
985 | } | ||
986 | out: | ||
924 | return error; | 987 | return error; |
925 | } | 988 | } |
926 | 989 | ||
927 | void dlm_user_exit(void) | 990 | void dlm_user_exit(void) |
928 | { | 991 | { |
929 | misc_deregister(&ctl_device); | 992 | misc_deregister(&ctl_device); |
993 | misc_deregister(&monitor_device); | ||
930 | } | 994 | } |
931 | 995 | ||
diff --git a/fs/dlm/user.h b/fs/dlm/user.h index d38e9f3e4151..35eb6a13d616 100644 --- a/fs/dlm/user.h +++ b/fs/dlm/user.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2006 Red Hat, Inc. All rights reserved. | 2 | * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved. |
3 | * | 3 | * |
4 | * This copyrighted material is made available to anyone wishing to use, | 4 | * This copyrighted material is made available to anyone wishing to use, |
5 | * modify, copy, or redistribute it subject to the terms and conditions | 5 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -12,5 +12,7 @@ | |||
12 | void dlm_user_add_ast(struct dlm_lkb *lkb, int type); | 12 | void dlm_user_add_ast(struct dlm_lkb *lkb, int type); |
13 | int dlm_user_init(void); | 13 | int dlm_user_init(void); |
14 | void dlm_user_exit(void); | 14 | void dlm_user_exit(void); |
15 | int dlm_device_deregister(struct dlm_ls *ls); | ||
16 | int dlm_user_daemon_available(void); | ||
15 | 17 | ||
16 | #endif | 18 | #endif |
diff --git a/fs/dquot.c b/fs/dquot.c index 8ec4d6cc7633..da30a27f2242 100644 --- a/fs/dquot.c +++ b/fs/dquot.c | |||
@@ -9,8 +9,6 @@ | |||
9 | * implementation is based on one of the several variants of the LINUX | 9 | * implementation is based on one of the several variants of the LINUX |
10 | * inode-subsystem with added complexity of the diskquota system. | 10 | * inode-subsystem with added complexity of the diskquota system. |
11 | * | 11 | * |
12 | * Version: $Id: dquot.c,v 6.3 1996/11/17 18:35:34 mvw Exp mvw $ | ||
13 | * | ||
14 | * Author: Marco van Wieringen <mvw@planets.elm.net> | 12 | * Author: Marco van Wieringen <mvw@planets.elm.net> |
15 | * | 13 | * |
16 | * Fixes: Dmitry Gorodchanin <pgmdsg@ibi.com>, 11 Feb 96 | 14 | * Fixes: Dmitry Gorodchanin <pgmdsg@ibi.com>, 11 Feb 96 |
@@ -895,10 +893,9 @@ static void print_warning(struct dquot *dquot, const int warntype) | |||
895 | warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot)) | 893 | warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot)) |
896 | return; | 894 | return; |
897 | 895 | ||
898 | mutex_lock(&tty_mutex); | ||
899 | tty = get_current_tty(); | 896 | tty = get_current_tty(); |
900 | if (!tty) | 897 | if (!tty) |
901 | goto out_lock; | 898 | return; |
902 | tty_write_message(tty, dquot->dq_sb->s_id); | 899 | tty_write_message(tty, dquot->dq_sb->s_id); |
903 | if (warntype == QUOTA_NL_ISOFTWARN || warntype == QUOTA_NL_BSOFTWARN) | 900 | if (warntype == QUOTA_NL_ISOFTWARN || warntype == QUOTA_NL_BSOFTWARN) |
904 | tty_write_message(tty, ": warning, "); | 901 | tty_write_message(tty, ": warning, "); |
@@ -926,8 +923,7 @@ static void print_warning(struct dquot *dquot, const int warntype) | |||
926 | break; | 923 | break; |
927 | } | 924 | } |
928 | tty_write_message(tty, msg); | 925 | tty_write_message(tty, msg); |
929 | out_lock: | 926 | tty_kref_put(tty); |
930 | mutex_unlock(&tty_mutex); | ||
931 | } | 927 | } |
932 | #endif | 928 | #endif |
933 | 929 | ||
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile index b4755a85996e..2cc9ee4ad2eb 100644 --- a/fs/ecryptfs/Makefile +++ b/fs/ecryptfs/Makefile | |||
@@ -4,4 +4,4 @@ | |||
4 | 4 | ||
5 | obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o | 5 | obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o |
6 | 6 | ||
7 | ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o kthread.o debug.o | 7 | ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o miscdev.o kthread.o debug.o |
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index b73fb752c5f8..3504cf9df358 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h | |||
@@ -79,11 +79,6 @@ | |||
79 | #define ECRYPTFS_MAX_PKI_NAME_BYTES 16 | 79 | #define ECRYPTFS_MAX_PKI_NAME_BYTES 16 |
80 | #define ECRYPTFS_DEFAULT_NUM_USERS 4 | 80 | #define ECRYPTFS_DEFAULT_NUM_USERS 4 |
81 | #define ECRYPTFS_MAX_NUM_USERS 32768 | 81 | #define ECRYPTFS_MAX_NUM_USERS 32768 |
82 | #define ECRYPTFS_TRANSPORT_NETLINK 0 | ||
83 | #define ECRYPTFS_TRANSPORT_CONNECTOR 1 | ||
84 | #define ECRYPTFS_TRANSPORT_RELAYFS 2 | ||
85 | #define ECRYPTFS_TRANSPORT_MISCDEV 3 | ||
86 | #define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_MISCDEV | ||
87 | #define ECRYPTFS_XATTR_NAME "user.ecryptfs" | 82 | #define ECRYPTFS_XATTR_NAME "user.ecryptfs" |
88 | 83 | ||
89 | #define RFC2440_CIPHER_DES3_EDE 0x02 | 84 | #define RFC2440_CIPHER_DES3_EDE 0x02 |
@@ -400,8 +395,6 @@ struct ecryptfs_msg_ctx { | |||
400 | struct mutex mux; | 395 | struct mutex mux; |
401 | }; | 396 | }; |
402 | 397 | ||
403 | extern unsigned int ecryptfs_transport; | ||
404 | |||
405 | struct ecryptfs_daemon; | 398 | struct ecryptfs_daemon; |
406 | 399 | ||
407 | struct ecryptfs_daemon { | 400 | struct ecryptfs_daemon { |
@@ -627,31 +620,20 @@ int | |||
627 | ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, | 620 | ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, |
628 | size_t size, int flags); | 621 | size_t size, int flags); |
629 | int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); | 622 | int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); |
630 | int ecryptfs_process_helo(unsigned int transport, uid_t euid, | 623 | int ecryptfs_process_helo(uid_t euid, struct user_namespace *user_ns, |
631 | struct user_namespace *user_ns, struct pid *pid); | 624 | struct pid *pid); |
632 | int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns, | 625 | int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns, |
633 | struct pid *pid); | 626 | struct pid *pid); |
634 | int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, | 627 | int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, |
635 | struct user_namespace *user_ns, struct pid *pid, | 628 | struct user_namespace *user_ns, struct pid *pid, |
636 | u32 seq); | 629 | u32 seq); |
637 | int ecryptfs_send_message(unsigned int transport, char *data, int data_len, | 630 | int ecryptfs_send_message(char *data, int data_len, |
638 | struct ecryptfs_msg_ctx **msg_ctx); | 631 | struct ecryptfs_msg_ctx **msg_ctx); |
639 | int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, | 632 | int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, |
640 | struct ecryptfs_message **emsg); | 633 | struct ecryptfs_message **emsg); |
641 | int ecryptfs_init_messaging(unsigned int transport); | 634 | int ecryptfs_init_messaging(void); |
642 | void ecryptfs_release_messaging(unsigned int transport); | 635 | void ecryptfs_release_messaging(void); |
643 | 636 | ||
644 | int ecryptfs_send_netlink(char *data, int data_len, | ||
645 | struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, | ||
646 | u16 msg_flags, struct pid *daemon_pid); | ||
647 | int ecryptfs_init_netlink(void); | ||
648 | void ecryptfs_release_netlink(void); | ||
649 | |||
650 | int ecryptfs_send_connector(char *data, int data_len, | ||
651 | struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, | ||
652 | u16 msg_flags, struct pid *daemon_pid); | ||
653 | int ecryptfs_init_connector(void); | ||
654 | void ecryptfs_release_connector(void); | ||
655 | void | 637 | void |
656 | ecryptfs_write_header_metadata(char *virt, | 638 | ecryptfs_write_header_metadata(char *virt, |
657 | struct ecryptfs_crypt_stat *crypt_stat, | 639 | struct ecryptfs_crypt_stat *crypt_stat, |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 9244d653743e..eb3dc4c7ac06 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
@@ -71,12 +71,11 @@ struct ecryptfs_getdents_callback { | |||
71 | void *dirent; | 71 | void *dirent; |
72 | struct dentry *dentry; | 72 | struct dentry *dentry; |
73 | filldir_t filldir; | 73 | filldir_t filldir; |
74 | int err; | ||
75 | int filldir_called; | 74 | int filldir_called; |
76 | int entries_written; | 75 | int entries_written; |
77 | }; | 76 | }; |
78 | 77 | ||
79 | /* Inspired by generic filldir in fs/readir.c */ | 78 | /* Inspired by generic filldir in fs/readdir.c */ |
80 | static int | 79 | static int |
81 | ecryptfs_filldir(void *dirent, const char *name, int namelen, loff_t offset, | 80 | ecryptfs_filldir(void *dirent, const char *name, int namelen, loff_t offset, |
82 | u64 ino, unsigned int d_type) | 81 | u64 ino, unsigned int d_type) |
@@ -125,18 +124,18 @@ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
125 | buf.dirent = dirent; | 124 | buf.dirent = dirent; |
126 | buf.dentry = file->f_path.dentry; | 125 | buf.dentry = file->f_path.dentry; |
127 | buf.filldir = filldir; | 126 | buf.filldir = filldir; |
128 | retry: | ||
129 | buf.filldir_called = 0; | 127 | buf.filldir_called = 0; |
130 | buf.entries_written = 0; | 128 | buf.entries_written = 0; |
131 | buf.err = 0; | ||
132 | rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf); | 129 | rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf); |
133 | if (buf.err) | ||
134 | rc = buf.err; | ||
135 | if (buf.filldir_called && !buf.entries_written) | ||
136 | goto retry; | ||
137 | file->f_pos = lower_file->f_pos; | 130 | file->f_pos = lower_file->f_pos; |
131 | if (rc < 0) | ||
132 | goto out; | ||
133 | if (buf.filldir_called && !buf.entries_written) | ||
134 | goto out; | ||
138 | if (rc >= 0) | 135 | if (rc >= 0) |
139 | fsstack_copy_attr_atime(inode, lower_file->f_path.dentry->d_inode); | 136 | fsstack_copy_attr_atime(inode, |
137 | lower_file->f_path.dentry->d_inode); | ||
138 | out: | ||
140 | return rc; | 139 | return rc; |
141 | } | 140 | } |
142 | 141 | ||
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index f5b76a331b9c..e22bc3961345 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c | |||
@@ -234,8 +234,8 @@ parse_tag_65_packet(struct ecryptfs_session_key *session_key, u8 *cipher_code, | |||
234 | } | 234 | } |
235 | i += data_len; | 235 | i += data_len; |
236 | if (message_len < (i + m_size)) { | 236 | if (message_len < (i + m_size)) { |
237 | ecryptfs_printk(KERN_ERR, "The received netlink message is " | 237 | ecryptfs_printk(KERN_ERR, "The message received from ecryptfsd " |
238 | "shorter than expected\n"); | 238 | "is shorter than expected\n"); |
239 | rc = -EIO; | 239 | rc = -EIO; |
240 | goto out; | 240 | goto out; |
241 | } | 241 | } |
@@ -438,8 +438,8 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, | |||
438 | struct ecryptfs_msg_ctx *msg_ctx; | 438 | struct ecryptfs_msg_ctx *msg_ctx; |
439 | struct ecryptfs_message *msg = NULL; | 439 | struct ecryptfs_message *msg = NULL; |
440 | char *auth_tok_sig; | 440 | char *auth_tok_sig; |
441 | char *netlink_message; | 441 | char *payload; |
442 | size_t netlink_message_length; | 442 | size_t payload_len; |
443 | int rc; | 443 | int rc; |
444 | 444 | ||
445 | rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok); | 445 | rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok); |
@@ -449,15 +449,15 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, | |||
449 | goto out; | 449 | goto out; |
450 | } | 450 | } |
451 | rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key), | 451 | rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key), |
452 | &netlink_message, &netlink_message_length); | 452 | &payload, &payload_len); |
453 | if (rc) { | 453 | if (rc) { |
454 | ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet\n"); | 454 | ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet\n"); |
455 | goto out; | 455 | goto out; |
456 | } | 456 | } |
457 | rc = ecryptfs_send_message(ecryptfs_transport, netlink_message, | 457 | rc = ecryptfs_send_message(payload, payload_len, &msg_ctx); |
458 | netlink_message_length, &msg_ctx); | ||
459 | if (rc) { | 458 | if (rc) { |
460 | ecryptfs_printk(KERN_ERR, "Error sending netlink message\n"); | 459 | ecryptfs_printk(KERN_ERR, "Error sending message to " |
460 | "ecryptfsd\n"); | ||
461 | goto out; | 461 | goto out; |
462 | } | 462 | } |
463 | rc = ecryptfs_wait_for_response(msg_ctx, &msg); | 463 | rc = ecryptfs_wait_for_response(msg_ctx, &msg); |
@@ -1333,23 +1333,22 @@ pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok, | |||
1333 | struct ecryptfs_key_record *key_rec) | 1333 | struct ecryptfs_key_record *key_rec) |
1334 | { | 1334 | { |
1335 | struct ecryptfs_msg_ctx *msg_ctx = NULL; | 1335 | struct ecryptfs_msg_ctx *msg_ctx = NULL; |
1336 | char *netlink_payload; | 1336 | char *payload = NULL; |
1337 | size_t netlink_payload_length; | 1337 | size_t payload_len; |
1338 | struct ecryptfs_message *msg; | 1338 | struct ecryptfs_message *msg; |
1339 | int rc; | 1339 | int rc; |
1340 | 1340 | ||
1341 | rc = write_tag_66_packet(auth_tok->token.private_key.signature, | 1341 | rc = write_tag_66_packet(auth_tok->token.private_key.signature, |
1342 | ecryptfs_code_for_cipher_string(crypt_stat), | 1342 | ecryptfs_code_for_cipher_string(crypt_stat), |
1343 | crypt_stat, &netlink_payload, | 1343 | crypt_stat, &payload, &payload_len); |
1344 | &netlink_payload_length); | ||
1345 | if (rc) { | 1344 | if (rc) { |
1346 | ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet\n"); | 1345 | ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet\n"); |
1347 | goto out; | 1346 | goto out; |
1348 | } | 1347 | } |
1349 | rc = ecryptfs_send_message(ecryptfs_transport, netlink_payload, | 1348 | rc = ecryptfs_send_message(payload, payload_len, &msg_ctx); |
1350 | netlink_payload_length, &msg_ctx); | ||
1351 | if (rc) { | 1349 | if (rc) { |
1352 | ecryptfs_printk(KERN_ERR, "Error sending netlink message\n"); | 1350 | ecryptfs_printk(KERN_ERR, "Error sending message to " |
1351 | "ecryptfsd\n"); | ||
1353 | goto out; | 1352 | goto out; |
1354 | } | 1353 | } |
1355 | rc = ecryptfs_wait_for_response(msg_ctx, &msg); | 1354 | rc = ecryptfs_wait_for_response(msg_ctx, &msg); |
@@ -1364,8 +1363,7 @@ pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok, | |||
1364 | ecryptfs_printk(KERN_ERR, "Error parsing tag 67 packet\n"); | 1363 | ecryptfs_printk(KERN_ERR, "Error parsing tag 67 packet\n"); |
1365 | kfree(msg); | 1364 | kfree(msg); |
1366 | out: | 1365 | out: |
1367 | if (netlink_payload) | 1366 | kfree(payload); |
1368 | kfree(netlink_payload); | ||
1369 | return rc; | 1367 | return rc; |
1370 | } | 1368 | } |
1371 | /** | 1369 | /** |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 448dfd597b5f..046e027a4cb1 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <linux/namei.h> | 30 | #include <linux/namei.h> |
31 | #include <linux/skbuff.h> | 31 | #include <linux/skbuff.h> |
32 | #include <linux/crypto.h> | 32 | #include <linux/crypto.h> |
33 | #include <linux/netlink.h> | ||
34 | #include <linux/mount.h> | 33 | #include <linux/mount.h> |
35 | #include <linux/pagemap.h> | 34 | #include <linux/pagemap.h> |
36 | #include <linux/key.h> | 35 | #include <linux/key.h> |
@@ -49,8 +48,7 @@ MODULE_PARM_DESC(ecryptfs_verbosity, | |||
49 | "0, which is Quiet)"); | 48 | "0, which is Quiet)"); |
50 | 49 | ||
51 | /** | 50 | /** |
52 | * Module parameter that defines the number of netlink message buffer | 51 | * Module parameter that defines the number of message buffer elements |
53 | * elements | ||
54 | */ | 52 | */ |
55 | unsigned int ecryptfs_message_buf_len = ECRYPTFS_DEFAULT_MSG_CTX_ELEMS; | 53 | unsigned int ecryptfs_message_buf_len = ECRYPTFS_DEFAULT_MSG_CTX_ELEMS; |
56 | 54 | ||
@@ -60,9 +58,9 @@ MODULE_PARM_DESC(ecryptfs_message_buf_len, | |||
60 | 58 | ||
61 | /** | 59 | /** |
62 | * Module parameter that defines the maximum guaranteed amount of time to wait | 60 | * Module parameter that defines the maximum guaranteed amount of time to wait |
63 | * for a response through netlink. The actual sleep time will be, more than | 61 | * for a response from ecryptfsd. The actual sleep time will be, more than |
64 | * likely, a small amount greater than this specified value, but only less if | 62 | * likely, a small amount greater than this specified value, but only less if |
65 | * the netlink message successfully arrives. | 63 | * the message successfully arrives. |
66 | */ | 64 | */ |
67 | signed long ecryptfs_message_wait_timeout = ECRYPTFS_MAX_MSG_CTX_TTL / HZ; | 65 | signed long ecryptfs_message_wait_timeout = ECRYPTFS_MAX_MSG_CTX_TTL / HZ; |
68 | 66 | ||
@@ -83,8 +81,6 @@ module_param(ecryptfs_number_of_users, uint, 0); | |||
83 | MODULE_PARM_DESC(ecryptfs_number_of_users, "An estimate of the number of " | 81 | MODULE_PARM_DESC(ecryptfs_number_of_users, "An estimate of the number of " |
84 | "concurrent users of eCryptfs"); | 82 | "concurrent users of eCryptfs"); |
85 | 83 | ||
86 | unsigned int ecryptfs_transport = ECRYPTFS_DEFAULT_TRANSPORT; | ||
87 | |||
88 | void __ecryptfs_printk(const char *fmt, ...) | 84 | void __ecryptfs_printk(const char *fmt, ...) |
89 | { | 85 | { |
90 | va_list args; | 86 | va_list args; |
@@ -211,7 +207,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, | |||
211 | ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, | 207 | ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, |
212 | ecryptfs_opt_encrypted_view, ecryptfs_opt_err }; | 208 | ecryptfs_opt_encrypted_view, ecryptfs_opt_err }; |
213 | 209 | ||
214 | static match_table_t tokens = { | 210 | static const match_table_t tokens = { |
215 | {ecryptfs_opt_sig, "sig=%s"}, | 211 | {ecryptfs_opt_sig, "sig=%s"}, |
216 | {ecryptfs_opt_ecryptfs_sig, "ecryptfs_sig=%s"}, | 212 | {ecryptfs_opt_ecryptfs_sig, "ecryptfs_sig=%s"}, |
217 | {ecryptfs_opt_cipher, "cipher=%s"}, | 213 | {ecryptfs_opt_cipher, "cipher=%s"}, |
@@ -779,10 +775,11 @@ static int __init ecryptfs_init(void) | |||
779 | "rc = [%d]\n", __func__, rc); | 775 | "rc = [%d]\n", __func__, rc); |
780 | goto out_do_sysfs_unregistration; | 776 | goto out_do_sysfs_unregistration; |
781 | } | 777 | } |
782 | rc = ecryptfs_init_messaging(ecryptfs_transport); | 778 | rc = ecryptfs_init_messaging(); |
783 | if (rc) { | 779 | if (rc) { |
784 | printk(KERN_ERR "Failure occured while attempting to " | 780 | printk(KERN_ERR "Failure occured while attempting to " |
785 | "initialize the eCryptfs netlink socket\n"); | 781 | "initialize the communications channel to " |
782 | "ecryptfsd\n"); | ||
786 | goto out_destroy_kthread; | 783 | goto out_destroy_kthread; |
787 | } | 784 | } |
788 | rc = ecryptfs_init_crypto(); | 785 | rc = ecryptfs_init_crypto(); |
@@ -797,7 +794,7 @@ static int __init ecryptfs_init(void) | |||
797 | 794 | ||
798 | goto out; | 795 | goto out; |
799 | out_release_messaging: | 796 | out_release_messaging: |
800 | ecryptfs_release_messaging(ecryptfs_transport); | 797 | ecryptfs_release_messaging(); |
801 | out_destroy_kthread: | 798 | out_destroy_kthread: |
802 | ecryptfs_destroy_kthread(); | 799 | ecryptfs_destroy_kthread(); |
803 | out_do_sysfs_unregistration: | 800 | out_do_sysfs_unregistration: |
@@ -818,7 +815,7 @@ static void __exit ecryptfs_exit(void) | |||
818 | if (rc) | 815 | if (rc) |
819 | printk(KERN_ERR "Failure whilst attempting to destroy crypto; " | 816 | printk(KERN_ERR "Failure whilst attempting to destroy crypto; " |
820 | "rc = [%d]\n", rc); | 817 | "rc = [%d]\n", rc); |
821 | ecryptfs_release_messaging(ecryptfs_transport); | 818 | ecryptfs_release_messaging(); |
822 | ecryptfs_destroy_kthread(); | 819 | ecryptfs_destroy_kthread(); |
823 | do_sysfs_unregistration(); | 820 | do_sysfs_unregistration(); |
824 | unregister_filesystem(&ecryptfs_fs_type); | 821 | unregister_filesystem(&ecryptfs_fs_type); |
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 1b5c20058acb..c6983978a31e 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c | |||
@@ -134,12 +134,11 @@ out: | |||
134 | } | 134 | } |
135 | 135 | ||
136 | static int | 136 | static int |
137 | ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, | 137 | ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type, |
138 | u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx); | 138 | struct ecryptfs_msg_ctx **msg_ctx); |
139 | 139 | ||
140 | /** | 140 | /** |
141 | * ecryptfs_send_raw_message | 141 | * ecryptfs_send_raw_message |
142 | * @transport: Transport type | ||
143 | * @msg_type: Message type | 142 | * @msg_type: Message type |
144 | * @daemon: Daemon struct for recipient of message | 143 | * @daemon: Daemon struct for recipient of message |
145 | * | 144 | * |
@@ -150,38 +149,25 @@ ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, | |||
150 | * | 149 | * |
151 | * Returns zero on success; non-zero otherwise | 150 | * Returns zero on success; non-zero otherwise |
152 | */ | 151 | */ |
153 | static int ecryptfs_send_raw_message(unsigned int transport, u8 msg_type, | 152 | static int ecryptfs_send_raw_message(u8 msg_type, |
154 | struct ecryptfs_daemon *daemon) | 153 | struct ecryptfs_daemon *daemon) |
155 | { | 154 | { |
156 | struct ecryptfs_msg_ctx *msg_ctx; | 155 | struct ecryptfs_msg_ctx *msg_ctx; |
157 | int rc; | 156 | int rc; |
158 | 157 | ||
159 | switch(transport) { | 158 | rc = ecryptfs_send_message_locked(NULL, 0, msg_type, &msg_ctx); |
160 | case ECRYPTFS_TRANSPORT_NETLINK: | 159 | if (rc) { |
161 | rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0, | 160 | printk(KERN_ERR "%s: Error whilst attempting to send " |
162 | daemon->pid); | 161 | "message to ecryptfsd; rc = [%d]\n", __func__, rc); |
163 | break; | 162 | goto out; |
164 | case ECRYPTFS_TRANSPORT_MISCDEV: | ||
165 | rc = ecryptfs_send_message_locked(transport, NULL, 0, msg_type, | ||
166 | &msg_ctx); | ||
167 | if (rc) { | ||
168 | printk(KERN_ERR "%s: Error whilst attempting to send " | ||
169 | "message via procfs; rc = [%d]\n", __func__, rc); | ||
170 | goto out; | ||
171 | } | ||
172 | /* Raw messages are logically context-free (e.g., no | ||
173 | * reply is expected), so we set the state of the | ||
174 | * ecryptfs_msg_ctx object to indicate that it should | ||
175 | * be freed as soon as the transport sends out the message. */ | ||
176 | mutex_lock(&msg_ctx->mux); | ||
177 | msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY; | ||
178 | mutex_unlock(&msg_ctx->mux); | ||
179 | break; | ||
180 | case ECRYPTFS_TRANSPORT_CONNECTOR: | ||
181 | case ECRYPTFS_TRANSPORT_RELAYFS: | ||
182 | default: | ||
183 | rc = -ENOSYS; | ||
184 | } | 163 | } |
164 | /* Raw messages are logically context-free (e.g., no | ||
165 | * reply is expected), so we set the state of the | ||
166 | * ecryptfs_msg_ctx object to indicate that it should | ||
167 | * be freed as soon as the message is sent. */ | ||
168 | mutex_lock(&msg_ctx->mux); | ||
169 | msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY; | ||
170 | mutex_unlock(&msg_ctx->mux); | ||
185 | out: | 171 | out: |
186 | return rc; | 172 | return rc; |
187 | } | 173 | } |
@@ -227,7 +213,6 @@ out: | |||
227 | 213 | ||
228 | /** | 214 | /** |
229 | * ecryptfs_process_helo | 215 | * ecryptfs_process_helo |
230 | * @transport: The underlying transport (netlink, etc.) | ||
231 | * @euid: The user ID owner of the message | 216 | * @euid: The user ID owner of the message |
232 | * @user_ns: The namespace in which @euid applies | 217 | * @user_ns: The namespace in which @euid applies |
233 | * @pid: The process ID for the userspace program that sent the | 218 | * @pid: The process ID for the userspace program that sent the |
@@ -239,8 +224,8 @@ out: | |||
239 | * Returns zero after adding a new daemon to the hash list; | 224 | * Returns zero after adding a new daemon to the hash list; |
240 | * non-zero otherwise. | 225 | * non-zero otherwise. |
241 | */ | 226 | */ |
242 | int ecryptfs_process_helo(unsigned int transport, uid_t euid, | 227 | int ecryptfs_process_helo(uid_t euid, struct user_namespace *user_ns, |
243 | struct user_namespace *user_ns, struct pid *pid) | 228 | struct pid *pid) |
244 | { | 229 | { |
245 | struct ecryptfs_daemon *new_daemon; | 230 | struct ecryptfs_daemon *new_daemon; |
246 | struct ecryptfs_daemon *old_daemon; | 231 | struct ecryptfs_daemon *old_daemon; |
@@ -252,8 +237,7 @@ int ecryptfs_process_helo(unsigned int transport, uid_t euid, | |||
252 | printk(KERN_WARNING "Received request from user [%d] " | 237 | printk(KERN_WARNING "Received request from user [%d] " |
253 | "to register daemon [0x%p]; unregistering daemon " | 238 | "to register daemon [0x%p]; unregistering daemon " |
254 | "[0x%p]\n", euid, pid, old_daemon->pid); | 239 | "[0x%p]\n", euid, pid, old_daemon->pid); |
255 | rc = ecryptfs_send_raw_message(transport, ECRYPTFS_MSG_QUIT, | 240 | rc = ecryptfs_send_raw_message(ECRYPTFS_MSG_QUIT, old_daemon); |
256 | old_daemon); | ||
257 | if (rc) | 241 | if (rc) |
258 | printk(KERN_WARNING "Failed to send QUIT " | 242 | printk(KERN_WARNING "Failed to send QUIT " |
259 | "message to daemon [0x%p]; rc = [%d]\n", | 243 | "message to daemon [0x%p]; rc = [%d]\n", |
@@ -467,8 +451,6 @@ out: | |||
467 | 451 | ||
468 | /** | 452 | /** |
469 | * ecryptfs_send_message_locked | 453 | * ecryptfs_send_message_locked |
470 | * @transport: The transport over which to send the message (i.e., | ||
471 | * netlink) | ||
472 | * @data: The data to send | 454 | * @data: The data to send |
473 | * @data_len: The length of data | 455 | * @data_len: The length of data |
474 | * @msg_ctx: The message context allocated for the send | 456 | * @msg_ctx: The message context allocated for the send |
@@ -478,8 +460,8 @@ out: | |||
478 | * Returns zero on success; non-zero otherwise | 460 | * Returns zero on success; non-zero otherwise |
479 | */ | 461 | */ |
480 | static int | 462 | static int |
481 | ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, | 463 | ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type, |
482 | u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx) | 464 | struct ecryptfs_msg_ctx **msg_ctx) |
483 | { | 465 | { |
484 | struct ecryptfs_daemon *daemon; | 466 | struct ecryptfs_daemon *daemon; |
485 | int rc; | 467 | int rc; |
@@ -503,20 +485,8 @@ ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, | |||
503 | ecryptfs_msg_ctx_free_to_alloc(*msg_ctx); | 485 | ecryptfs_msg_ctx_free_to_alloc(*msg_ctx); |
504 | mutex_unlock(&(*msg_ctx)->mux); | 486 | mutex_unlock(&(*msg_ctx)->mux); |
505 | mutex_unlock(&ecryptfs_msg_ctx_lists_mux); | 487 | mutex_unlock(&ecryptfs_msg_ctx_lists_mux); |
506 | switch (transport) { | 488 | rc = ecryptfs_send_miscdev(data, data_len, *msg_ctx, msg_type, 0, |
507 | case ECRYPTFS_TRANSPORT_NETLINK: | 489 | daemon); |
508 | rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, msg_type, | ||
509 | 0, daemon->pid); | ||
510 | break; | ||
511 | case ECRYPTFS_TRANSPORT_MISCDEV: | ||
512 | rc = ecryptfs_send_miscdev(data, data_len, *msg_ctx, msg_type, | ||
513 | 0, daemon); | ||
514 | break; | ||
515 | case ECRYPTFS_TRANSPORT_CONNECTOR: | ||
516 | case ECRYPTFS_TRANSPORT_RELAYFS: | ||
517 | default: | ||
518 | rc = -ENOSYS; | ||
519 | } | ||
520 | if (rc) | 490 | if (rc) |
521 | printk(KERN_ERR "%s: Error attempting to send message to " | 491 | printk(KERN_ERR "%s: Error attempting to send message to " |
522 | "userspace daemon; rc = [%d]\n", __func__, rc); | 492 | "userspace daemon; rc = [%d]\n", __func__, rc); |
@@ -526,8 +496,6 @@ out: | |||
526 | 496 | ||
527 | /** | 497 | /** |
528 | * ecryptfs_send_message | 498 | * ecryptfs_send_message |
529 | * @transport: The transport over which to send the message (i.e., | ||
530 | * netlink) | ||
531 | * @data: The data to send | 499 | * @data: The data to send |
532 | * @data_len: The length of data | 500 | * @data_len: The length of data |
533 | * @msg_ctx: The message context allocated for the send | 501 | * @msg_ctx: The message context allocated for the send |
@@ -536,14 +504,14 @@ out: | |||
536 | * | 504 | * |
537 | * Returns zero on success; non-zero otherwise | 505 | * Returns zero on success; non-zero otherwise |
538 | */ | 506 | */ |
539 | int ecryptfs_send_message(unsigned int transport, char *data, int data_len, | 507 | int ecryptfs_send_message(char *data, int data_len, |
540 | struct ecryptfs_msg_ctx **msg_ctx) | 508 | struct ecryptfs_msg_ctx **msg_ctx) |
541 | { | 509 | { |
542 | int rc; | 510 | int rc; |
543 | 511 | ||
544 | mutex_lock(&ecryptfs_daemon_hash_mux); | 512 | mutex_lock(&ecryptfs_daemon_hash_mux); |
545 | rc = ecryptfs_send_message_locked(transport, data, data_len, | 513 | rc = ecryptfs_send_message_locked(data, data_len, ECRYPTFS_MSG_REQUEST, |
546 | ECRYPTFS_MSG_REQUEST, msg_ctx); | 514 | msg_ctx); |
547 | mutex_unlock(&ecryptfs_daemon_hash_mux); | 515 | mutex_unlock(&ecryptfs_daemon_hash_mux); |
548 | return rc; | 516 | return rc; |
549 | } | 517 | } |
@@ -586,7 +554,7 @@ sleep: | |||
586 | return rc; | 554 | return rc; |
587 | } | 555 | } |
588 | 556 | ||
589 | int ecryptfs_init_messaging(unsigned int transport) | 557 | int ecryptfs_init_messaging(void) |
590 | { | 558 | { |
591 | int i; | 559 | int i; |
592 | int rc = 0; | 560 | int rc = 0; |
@@ -639,27 +607,14 @@ int ecryptfs_init_messaging(unsigned int transport) | |||
639 | mutex_unlock(&ecryptfs_msg_ctx_arr[i].mux); | 607 | mutex_unlock(&ecryptfs_msg_ctx_arr[i].mux); |
640 | } | 608 | } |
641 | mutex_unlock(&ecryptfs_msg_ctx_lists_mux); | 609 | mutex_unlock(&ecryptfs_msg_ctx_lists_mux); |
642 | switch(transport) { | 610 | rc = ecryptfs_init_ecryptfs_miscdev(); |
643 | case ECRYPTFS_TRANSPORT_NETLINK: | 611 | if (rc) |
644 | rc = ecryptfs_init_netlink(); | 612 | ecryptfs_release_messaging(); |
645 | if (rc) | ||
646 | ecryptfs_release_messaging(transport); | ||
647 | break; | ||
648 | case ECRYPTFS_TRANSPORT_MISCDEV: | ||
649 | rc = ecryptfs_init_ecryptfs_miscdev(); | ||
650 | if (rc) | ||
651 | ecryptfs_release_messaging(transport); | ||
652 | break; | ||
653 | case ECRYPTFS_TRANSPORT_CONNECTOR: | ||
654 | case ECRYPTFS_TRANSPORT_RELAYFS: | ||
655 | default: | ||
656 | rc = -ENOSYS; | ||
657 | } | ||
658 | out: | 613 | out: |
659 | return rc; | 614 | return rc; |
660 | } | 615 | } |
661 | 616 | ||
662 | void ecryptfs_release_messaging(unsigned int transport) | 617 | void ecryptfs_release_messaging(void) |
663 | { | 618 | { |
664 | if (ecryptfs_msg_ctx_arr) { | 619 | if (ecryptfs_msg_ctx_arr) { |
665 | int i; | 620 | int i; |
@@ -698,17 +653,6 @@ void ecryptfs_release_messaging(unsigned int transport) | |||
698 | kfree(ecryptfs_daemon_hash); | 653 | kfree(ecryptfs_daemon_hash); |
699 | mutex_unlock(&ecryptfs_daemon_hash_mux); | 654 | mutex_unlock(&ecryptfs_daemon_hash_mux); |
700 | } | 655 | } |
701 | switch(transport) { | 656 | ecryptfs_destroy_ecryptfs_miscdev(); |
702 | case ECRYPTFS_TRANSPORT_NETLINK: | ||
703 | ecryptfs_release_netlink(); | ||
704 | break; | ||
705 | case ECRYPTFS_TRANSPORT_MISCDEV: | ||
706 | ecryptfs_destroy_ecryptfs_miscdev(); | ||
707 | break; | ||
708 | case ECRYPTFS_TRANSPORT_CONNECTOR: | ||
709 | case ECRYPTFS_TRANSPORT_RELAYFS: | ||
710 | default: | ||
711 | break; | ||
712 | } | ||
713 | return; | 657 | return; |
714 | } | 658 | } |
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 245c2dc02d5c..04d7b3fa1ac6 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c | |||
@@ -265,22 +265,34 @@ out: | |||
265 | } | 265 | } |
266 | 266 | ||
267 | /** | 267 | /** |
268 | * ecryptfs_prepare_write | 268 | * ecryptfs_write_begin |
269 | * @file: The eCryptfs file | 269 | * @file: The eCryptfs file |
270 | * @page: The eCryptfs page | 270 | * @mapping: The eCryptfs object |
271 | * @from: The start byte from which we will write | 271 | * @pos: The file offset at which to start writing |
272 | * @to: The end byte to which we will write | 272 | * @len: Length of the write |
273 | * @flags: Various flags | ||
274 | * @pagep: Pointer to return the page | ||
275 | * @fsdata: Pointer to return fs data (unused) | ||
273 | * | 276 | * |
274 | * This function must zero any hole we create | 277 | * This function must zero any hole we create |
275 | * | 278 | * |
276 | * Returns zero on success; non-zero otherwise | 279 | * Returns zero on success; non-zero otherwise |
277 | */ | 280 | */ |
278 | static int ecryptfs_prepare_write(struct file *file, struct page *page, | 281 | static int ecryptfs_write_begin(struct file *file, |
279 | unsigned from, unsigned to) | 282 | struct address_space *mapping, |
283 | loff_t pos, unsigned len, unsigned flags, | ||
284 | struct page **pagep, void **fsdata) | ||
280 | { | 285 | { |
286 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | ||
287 | struct page *page; | ||
281 | loff_t prev_page_end_size; | 288 | loff_t prev_page_end_size; |
282 | int rc = 0; | 289 | int rc = 0; |
283 | 290 | ||
291 | page = __grab_cache_page(mapping, index); | ||
292 | if (!page) | ||
293 | return -ENOMEM; | ||
294 | *pagep = page; | ||
295 | |||
284 | if (!PageUptodate(page)) { | 296 | if (!PageUptodate(page)) { |
285 | struct ecryptfs_crypt_stat *crypt_stat = | 297 | struct ecryptfs_crypt_stat *crypt_stat = |
286 | &ecryptfs_inode_to_private( | 298 | &ecryptfs_inode_to_private( |
@@ -289,8 +301,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, | |||
289 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED) | 301 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED) |
290 | || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) { | 302 | || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) { |
291 | rc = ecryptfs_read_lower_page_segment( | 303 | rc = ecryptfs_read_lower_page_segment( |
292 | page, page->index, 0, PAGE_CACHE_SIZE, | 304 | page, index, 0, PAGE_CACHE_SIZE, mapping->host); |
293 | page->mapping->host); | ||
294 | if (rc) { | 305 | if (rc) { |
295 | printk(KERN_ERR "%s: Error attemping to read " | 306 | printk(KERN_ERR "%s: Error attemping to read " |
296 | "lower page segment; rc = [%d]\n", | 307 | "lower page segment; rc = [%d]\n", |
@@ -316,8 +327,8 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, | |||
316 | SetPageUptodate(page); | 327 | SetPageUptodate(page); |
317 | } else { | 328 | } else { |
318 | rc = ecryptfs_read_lower_page_segment( | 329 | rc = ecryptfs_read_lower_page_segment( |
319 | page, page->index, 0, PAGE_CACHE_SIZE, | 330 | page, index, 0, PAGE_CACHE_SIZE, |
320 | page->mapping->host); | 331 | mapping->host); |
321 | if (rc) { | 332 | if (rc) { |
322 | printk(KERN_ERR "%s: Error reading " | 333 | printk(KERN_ERR "%s: Error reading " |
323 | "page; rc = [%d]\n", | 334 | "page; rc = [%d]\n", |
@@ -339,10 +350,10 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, | |||
339 | SetPageUptodate(page); | 350 | SetPageUptodate(page); |
340 | } | 351 | } |
341 | } | 352 | } |
342 | prev_page_end_size = ((loff_t)page->index << PAGE_CACHE_SHIFT); | 353 | prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT); |
343 | /* If creating a page or more of holes, zero them out via truncate. | 354 | /* If creating a page or more of holes, zero them out via truncate. |
344 | * Note, this will increase i_size. */ | 355 | * Note, this will increase i_size. */ |
345 | if (page->index != 0) { | 356 | if (index != 0) { |
346 | if (prev_page_end_size > i_size_read(page->mapping->host)) { | 357 | if (prev_page_end_size > i_size_read(page->mapping->host)) { |
347 | rc = ecryptfs_truncate(file->f_path.dentry, | 358 | rc = ecryptfs_truncate(file->f_path.dentry, |
348 | prev_page_end_size); | 359 | prev_page_end_size); |
@@ -357,8 +368,8 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, | |||
357 | } | 368 | } |
358 | /* Writing to a new page, and creating a small hole from start | 369 | /* Writing to a new page, and creating a small hole from start |
359 | * of page? Zero it out. */ | 370 | * of page? Zero it out. */ |
360 | if ((i_size_read(page->mapping->host) == prev_page_end_size) | 371 | if ((i_size_read(mapping->host) == prev_page_end_size) |
361 | && (from != 0)) | 372 | && (pos != 0)) |
362 | zero_user(page, 0, PAGE_CACHE_SIZE); | 373 | zero_user(page, 0, PAGE_CACHE_SIZE); |
363 | out: | 374 | out: |
364 | return rc; | 375 | return rc; |
@@ -445,21 +456,28 @@ int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode) | |||
445 | } | 456 | } |
446 | 457 | ||
447 | /** | 458 | /** |
448 | * ecryptfs_commit_write | 459 | * ecryptfs_write_end |
449 | * @file: The eCryptfs file object | 460 | * @file: The eCryptfs file object |
461 | * @mapping: The eCryptfs object | ||
462 | * @pos: The file position | ||
463 | * @len: The length of the data (unused) | ||
464 | * @copied: The amount of data copied | ||
450 | * @page: The eCryptfs page | 465 | * @page: The eCryptfs page |
451 | * @from: Ignored (we rotate the page IV on each write) | 466 | * @fsdata: The fsdata (unused) |
452 | * @to: Ignored | ||
453 | * | 467 | * |
454 | * This is where we encrypt the data and pass the encrypted data to | 468 | * This is where we encrypt the data and pass the encrypted data to |
455 | * the lower filesystem. In OpenPGP-compatible mode, we operate on | 469 | * the lower filesystem. In OpenPGP-compatible mode, we operate on |
456 | * entire underlying packets. | 470 | * entire underlying packets. |
457 | */ | 471 | */ |
458 | static int ecryptfs_commit_write(struct file *file, struct page *page, | 472 | static int ecryptfs_write_end(struct file *file, |
459 | unsigned from, unsigned to) | 473 | struct address_space *mapping, |
474 | loff_t pos, unsigned len, unsigned copied, | ||
475 | struct page *page, void *fsdata) | ||
460 | { | 476 | { |
461 | loff_t pos; | 477 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
462 | struct inode *ecryptfs_inode = page->mapping->host; | 478 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); |
479 | unsigned to = from + copied; | ||
480 | struct inode *ecryptfs_inode = mapping->host; | ||
463 | struct ecryptfs_crypt_stat *crypt_stat = | 481 | struct ecryptfs_crypt_stat *crypt_stat = |
464 | &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat; | 482 | &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat; |
465 | int rc; | 483 | int rc; |
@@ -471,25 +489,22 @@ static int ecryptfs_commit_write(struct file *file, struct page *page, | |||
471 | } else | 489 | } else |
472 | ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); | 490 | ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); |
473 | ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" | 491 | ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" |
474 | "(page w/ index = [0x%.16x], to = [%d])\n", page->index, | 492 | "(page w/ index = [0x%.16x], to = [%d])\n", index, to); |
475 | to); | ||
476 | /* Fills in zeros if 'to' goes beyond inode size */ | 493 | /* Fills in zeros if 'to' goes beyond inode size */ |
477 | rc = fill_zeros_to_end_of_page(page, to); | 494 | rc = fill_zeros_to_end_of_page(page, to); |
478 | if (rc) { | 495 | if (rc) { |
479 | ecryptfs_printk(KERN_WARNING, "Error attempting to fill " | 496 | ecryptfs_printk(KERN_WARNING, "Error attempting to fill " |
480 | "zeros in page with index = [0x%.16x]\n", | 497 | "zeros in page with index = [0x%.16x]\n", index); |
481 | page->index); | ||
482 | goto out; | 498 | goto out; |
483 | } | 499 | } |
484 | rc = ecryptfs_encrypt_page(page); | 500 | rc = ecryptfs_encrypt_page(page); |
485 | if (rc) { | 501 | if (rc) { |
486 | ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " | 502 | ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " |
487 | "index [0x%.16x])\n", page->index); | 503 | "index [0x%.16x])\n", index); |
488 | goto out; | 504 | goto out; |
489 | } | 505 | } |
490 | pos = (((loff_t)page->index) << PAGE_CACHE_SHIFT) + to; | 506 | if (pos + copied > i_size_read(ecryptfs_inode)) { |
491 | if (pos > i_size_read(ecryptfs_inode)) { | 507 | i_size_write(ecryptfs_inode, pos + copied); |
492 | i_size_write(ecryptfs_inode, pos); | ||
493 | ecryptfs_printk(KERN_DEBUG, "Expanded file size to " | 508 | ecryptfs_printk(KERN_DEBUG, "Expanded file size to " |
494 | "[0x%.16x]\n", i_size_read(ecryptfs_inode)); | 509 | "[0x%.16x]\n", i_size_read(ecryptfs_inode)); |
495 | } | 510 | } |
@@ -497,7 +512,11 @@ static int ecryptfs_commit_write(struct file *file, struct page *page, | |||
497 | if (rc) | 512 | if (rc) |
498 | printk(KERN_ERR "Error writing inode size to metadata; " | 513 | printk(KERN_ERR "Error writing inode size to metadata; " |
499 | "rc = [%d]\n", rc); | 514 | "rc = [%d]\n", rc); |
515 | else | ||
516 | rc = copied; | ||
500 | out: | 517 | out: |
518 | unlock_page(page); | ||
519 | page_cache_release(page); | ||
501 | return rc; | 520 | return rc; |
502 | } | 521 | } |
503 | 522 | ||
@@ -518,7 +537,7 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block) | |||
518 | struct address_space_operations ecryptfs_aops = { | 537 | struct address_space_operations ecryptfs_aops = { |
519 | .writepage = ecryptfs_writepage, | 538 | .writepage = ecryptfs_writepage, |
520 | .readpage = ecryptfs_readpage, | 539 | .readpage = ecryptfs_readpage, |
521 | .prepare_write = ecryptfs_prepare_write, | 540 | .write_begin = ecryptfs_write_begin, |
522 | .commit_write = ecryptfs_commit_write, | 541 | .write_end = ecryptfs_write_end, |
523 | .bmap = ecryptfs_bmap, | 542 | .bmap = ecryptfs_bmap, |
524 | }; | 543 | }; |
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c deleted file mode 100644 index e0abad62b395..000000000000 --- a/fs/ecryptfs/netlink.c +++ /dev/null | |||
@@ -1,249 +0,0 @@ | |||
1 | /** | ||
2 | * eCryptfs: Linux filesystem encryption layer | ||
3 | * | ||
4 | * Copyright (C) 2004-2006 International Business Machines Corp. | ||
5 | * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> | ||
6 | * Tyler Hicks <tyhicks@ou.edu> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License version | ||
10 | * 2 as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, but | ||
13 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
15 | * General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | ||
20 | * 02111-1307, USA. | ||
21 | */ | ||
22 | |||
23 | #include <net/sock.h> | ||
24 | #include <linux/hash.h> | ||
25 | #include <linux/random.h> | ||
26 | #include "ecryptfs_kernel.h" | ||
27 | |||
28 | static struct sock *ecryptfs_nl_sock; | ||
29 | |||
30 | /** | ||
31 | * ecryptfs_send_netlink | ||
32 | * @data: The data to include as the payload | ||
33 | * @data_len: The byte count of the data | ||
34 | * @msg_ctx: The netlink context that will be used to handle the | ||
35 | * response message | ||
36 | * @msg_type: The type of netlink message to send | ||
37 | * @msg_flags: The flags to include in the netlink header | ||
38 | * @daemon_pid: The process id of the daemon to send the message to | ||
39 | * | ||
40 | * Sends the data to the specified daemon pid and uses the netlink | ||
41 | * context element to store the data needed for validation upon | ||
42 | * receiving the response. The data and the netlink context can be | ||
43 | * null if just sending a netlink header is sufficient. Returns zero | ||
44 | * upon sending the message; non-zero upon error. | ||
45 | */ | ||
46 | int ecryptfs_send_netlink(char *data, int data_len, | ||
47 | struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, | ||
48 | u16 msg_flags, struct pid *daemon_pid) | ||
49 | { | ||
50 | struct sk_buff *skb; | ||
51 | struct nlmsghdr *nlh; | ||
52 | struct ecryptfs_message *msg; | ||
53 | size_t payload_len; | ||
54 | int rc; | ||
55 | |||
56 | payload_len = ((data && data_len) ? (sizeof(*msg) + data_len) : 0); | ||
57 | skb = alloc_skb(NLMSG_SPACE(payload_len), GFP_KERNEL); | ||
58 | if (!skb) { | ||
59 | rc = -ENOMEM; | ||
60 | ecryptfs_printk(KERN_ERR, "Failed to allocate socket buffer\n"); | ||
61 | goto out; | ||
62 | } | ||
63 | nlh = NLMSG_PUT(skb, pid_nr(daemon_pid), msg_ctx ? msg_ctx->counter : 0, | ||
64 | msg_type, payload_len); | ||
65 | nlh->nlmsg_flags = msg_flags; | ||
66 | if (msg_ctx && payload_len) { | ||
67 | msg = (struct ecryptfs_message *)NLMSG_DATA(nlh); | ||
68 | msg->index = msg_ctx->index; | ||
69 | msg->data_len = data_len; | ||
70 | memcpy(msg->data, data, data_len); | ||
71 | } | ||
72 | rc = netlink_unicast(ecryptfs_nl_sock, skb, pid_nr(daemon_pid), 0); | ||
73 | if (rc < 0) { | ||
74 | ecryptfs_printk(KERN_ERR, "Failed to send eCryptfs netlink " | ||
75 | "message; rc = [%d]\n", rc); | ||
76 | goto out; | ||
77 | } | ||
78 | rc = 0; | ||
79 | goto out; | ||
80 | nlmsg_failure: | ||
81 | rc = -EMSGSIZE; | ||
82 | kfree_skb(skb); | ||
83 | out: | ||
84 | return rc; | ||
85 | } | ||
86 | |||
87 | /** | ||
88 | * ecryptfs_process_nl_reponse | ||
89 | * @skb: The socket buffer containing the netlink message of state | ||
90 | * RESPONSE | ||
91 | * | ||
92 | * Processes a response message after sending a operation request to | ||
93 | * userspace. Attempts to assign the msg to a netlink context element | ||
94 | * at the index specified in the msg. The sk_buff and nlmsghdr must | ||
95 | * be validated before this function. Returns zero upon delivery to | ||
96 | * desired context element; non-zero upon delivery failure or error. | ||
97 | */ | ||
98 | static int ecryptfs_process_nl_response(struct sk_buff *skb) | ||
99 | { | ||
100 | struct nlmsghdr *nlh = nlmsg_hdr(skb); | ||
101 | struct ecryptfs_message *msg = NLMSG_DATA(nlh); | ||
102 | struct pid *pid; | ||
103 | int rc; | ||
104 | |||
105 | if (skb->len - NLMSG_HDRLEN - sizeof(*msg) != msg->data_len) { | ||
106 | rc = -EINVAL; | ||
107 | ecryptfs_printk(KERN_ERR, "Received netlink message with " | ||
108 | "incorrectly specified data length\n"); | ||
109 | goto out; | ||
110 | } | ||
111 | pid = find_get_pid(NETLINK_CREDS(skb)->pid); | ||
112 | rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, NULL, | ||
113 | pid, nlh->nlmsg_seq); | ||
114 | put_pid(pid); | ||
115 | if (rc) | ||
116 | printk(KERN_ERR | ||
117 | "Error processing response message; rc = [%d]\n", rc); | ||
118 | out: | ||
119 | return rc; | ||
120 | } | ||
121 | |||
122 | /** | ||
123 | * ecryptfs_process_nl_helo | ||
124 | * @skb: The socket buffer containing the nlmsghdr in HELO state | ||
125 | * | ||
126 | * Gets uid and pid of the skb and adds the values to the daemon id | ||
127 | * hash. Returns zero after adding a new daemon id to the hash list; | ||
128 | * non-zero otherwise. | ||
129 | */ | ||
130 | static int ecryptfs_process_nl_helo(struct sk_buff *skb) | ||
131 | { | ||
132 | struct pid *pid; | ||
133 | int rc; | ||
134 | |||
135 | pid = find_get_pid(NETLINK_CREDS(skb)->pid); | ||
136 | rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_NETLINK, | ||
137 | NETLINK_CREDS(skb)->uid, NULL, pid); | ||
138 | put_pid(pid); | ||
139 | if (rc) | ||
140 | printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc); | ||
141 | return rc; | ||
142 | } | ||
143 | |||
144 | /** | ||
145 | * ecryptfs_process_nl_quit | ||
146 | * @skb: The socket buffer containing the nlmsghdr in QUIT state | ||
147 | * | ||
148 | * Gets uid and pid of the skb and deletes the corresponding daemon | ||
149 | * id, if it is the registered that is requesting the | ||
150 | * deletion. Returns zero after deleting the desired daemon id; | ||
151 | * non-zero otherwise. | ||
152 | */ | ||
153 | static int ecryptfs_process_nl_quit(struct sk_buff *skb) | ||
154 | { | ||
155 | struct pid *pid; | ||
156 | int rc; | ||
157 | |||
158 | pid = find_get_pid(NETLINK_CREDS(skb)->pid); | ||
159 | rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, NULL, pid); | ||
160 | put_pid(pid); | ||
161 | if (rc) | ||
162 | printk(KERN_WARNING | ||
163 | "Error processing QUIT message; rc = [%d]\n", rc); | ||
164 | return rc; | ||
165 | } | ||
166 | |||
167 | /** | ||
168 | * ecryptfs_receive_nl_message | ||
169 | * | ||
170 | * Callback function called by netlink system when a message arrives. | ||
171 | * If the message looks to be valid, then an attempt is made to assign | ||
172 | * it to its desired netlink context element and wake up the process | ||
173 | * that is waiting for a response. | ||
174 | */ | ||
175 | static void ecryptfs_receive_nl_message(struct sk_buff *skb) | ||
176 | { | ||
177 | struct nlmsghdr *nlh; | ||
178 | |||
179 | nlh = nlmsg_hdr(skb); | ||
180 | if (!NLMSG_OK(nlh, skb->len)) { | ||
181 | ecryptfs_printk(KERN_ERR, "Received corrupt netlink " | ||
182 | "message\n"); | ||
183 | goto free; | ||
184 | } | ||
185 | switch (nlh->nlmsg_type) { | ||
186 | case ECRYPTFS_MSG_RESPONSE: | ||
187 | if (ecryptfs_process_nl_response(skb)) { | ||
188 | ecryptfs_printk(KERN_WARNING, "Failed to " | ||
189 | "deliver netlink response to " | ||
190 | "requesting operation\n"); | ||
191 | } | ||
192 | break; | ||
193 | case ECRYPTFS_MSG_HELO: | ||
194 | if (ecryptfs_process_nl_helo(skb)) { | ||
195 | ecryptfs_printk(KERN_WARNING, "Failed to " | ||
196 | "fulfill HELO request\n"); | ||
197 | } | ||
198 | break; | ||
199 | case ECRYPTFS_MSG_QUIT: | ||
200 | if (ecryptfs_process_nl_quit(skb)) { | ||
201 | ecryptfs_printk(KERN_WARNING, "Failed to " | ||
202 | "fulfill QUIT request\n"); | ||
203 | } | ||
204 | break; | ||
205 | default: | ||
206 | ecryptfs_printk(KERN_WARNING, "Dropping netlink " | ||
207 | "message of unrecognized type [%d]\n", | ||
208 | nlh->nlmsg_type); | ||
209 | break; | ||
210 | } | ||
211 | free: | ||
212 | kfree_skb(skb); | ||
213 | } | ||
214 | |||
215 | /** | ||
216 | * ecryptfs_init_netlink | ||
217 | * | ||
218 | * Initializes the daemon id hash list, netlink context array, and | ||
219 | * necessary locks. Returns zero upon success; non-zero upon error. | ||
220 | */ | ||
221 | int ecryptfs_init_netlink(void) | ||
222 | { | ||
223 | int rc; | ||
224 | |||
225 | ecryptfs_nl_sock = netlink_kernel_create(&init_net, NETLINK_ECRYPTFS, 0, | ||
226 | ecryptfs_receive_nl_message, | ||
227 | NULL, THIS_MODULE); | ||
228 | if (!ecryptfs_nl_sock) { | ||
229 | rc = -EIO; | ||
230 | ecryptfs_printk(KERN_ERR, "Failed to create netlink socket\n"); | ||
231 | goto out; | ||
232 | } | ||
233 | ecryptfs_nl_sock->sk_sndtimeo = ECRYPTFS_DEFAULT_SEND_TIMEOUT; | ||
234 | rc = 0; | ||
235 | out: | ||
236 | return rc; | ||
237 | } | ||
238 | |||
239 | /** | ||
240 | * ecryptfs_release_netlink | ||
241 | * | ||
242 | * Frees all memory used by the netlink context array and releases the | ||
243 | * netlink socket. | ||
244 | */ | ||
245 | void ecryptfs_release_netlink(void) | ||
246 | { | ||
247 | netlink_kernel_release(ecryptfs_nl_sock); | ||
248 | ecryptfs_nl_sock = NULL; | ||
249 | } | ||
diff --git a/fs/efs/namei.c b/fs/efs/namei.c index 3a404e7fad53..291abb11e20e 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c | |||
@@ -74,8 +74,7 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei | |||
74 | } | 74 | } |
75 | unlock_kernel(); | 75 | unlock_kernel(); |
76 | 76 | ||
77 | d_add(dentry, inode); | 77 | return d_splice_alias(inode, dentry); |
78 | return NULL; | ||
79 | } | 78 | } |
80 | 79 | ||
81 | static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino, | 80 | static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino, |
diff --git a/fs/efs/super.c b/fs/efs/super.c index 567b134fa1f1..73b19cfc91fc 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c | |||
@@ -341,8 +341,6 @@ static int efs_statfs(struct dentry *dentry, struct kstatfs *buf) { | |||
341 | sb->inode_blocks * | 341 | sb->inode_blocks * |
342 | (EFS_BLOCKSIZE / sizeof(struct efs_dinode)); | 342 | (EFS_BLOCKSIZE / sizeof(struct efs_dinode)); |
343 | buf->f_ffree = sb->inode_free; /* free inodes */ | 343 | buf->f_ffree = sb->inode_free; /* free inodes */ |
344 | buf->f_fsid.val[0] = (sb->fs_magic >> 16) & 0xffff; /* fs ID */ | ||
345 | buf->f_fsid.val[1] = sb->fs_magic & 0xffff; /* fs ID */ | ||
346 | buf->f_namelen = EFS_MAXNAMELEN; /* max filename length */ | 344 | buf->f_namelen = EFS_MAXNAMELEN; /* max filename length */ |
347 | 345 | ||
348 | return 0; | 346 | return 0; |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 7cc0eb756b55..99368bda0261 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -927,14 +927,11 @@ errxit: | |||
927 | /* | 927 | /* |
928 | * During the time we spent in the loop above, some other events | 928 | * During the time we spent in the loop above, some other events |
929 | * might have been queued by the poll callback. We re-insert them | 929 | * might have been queued by the poll callback. We re-insert them |
930 | * here (in case they are not already queued, or they're one-shot). | 930 | * inside the main ready-list here. |
931 | */ | 931 | */ |
932 | for (nepi = ep->ovflist; (epi = nepi) != NULL; | 932 | for (nepi = ep->ovflist; (epi = nepi) != NULL; |
933 | nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { | 933 | nepi = epi->next, epi->next = EP_UNACTIVE_PTR) |
934 | if (!ep_is_linked(&epi->rdllink) && | 934 | list_add_tail(&epi->rdllink, &ep->rdllist); |
935 | (epi->event.events & ~EP_PRIVATE_BITS)) | ||
936 | list_add_tail(&epi->rdllink, &ep->rdllist); | ||
937 | } | ||
938 | /* | 935 | /* |
939 | * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after | 936 | * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after |
940 | * releasing the lock, events will be queued in the normal way inside | 937 | * releasing the lock, events will be queued in the normal way inside |
@@ -50,15 +50,12 @@ | |||
50 | #include <linux/cn_proc.h> | 50 | #include <linux/cn_proc.h> |
51 | #include <linux/audit.h> | 51 | #include <linux/audit.h> |
52 | #include <linux/tracehook.h> | 52 | #include <linux/tracehook.h> |
53 | #include <linux/kmod.h> | ||
53 | 54 | ||
54 | #include <asm/uaccess.h> | 55 | #include <asm/uaccess.h> |
55 | #include <asm/mmu_context.h> | 56 | #include <asm/mmu_context.h> |
56 | #include <asm/tlb.h> | 57 | #include <asm/tlb.h> |
57 | 58 | ||
58 | #ifdef CONFIG_KMOD | ||
59 | #include <linux/kmod.h> | ||
60 | #endif | ||
61 | |||
62 | #ifdef __alpha__ | 59 | #ifdef __alpha__ |
63 | /* for /sbin/loader handling in search_binary_handler() */ | 60 | /* for /sbin/loader handling in search_binary_handler() */ |
64 | #include <linux/a.out.h> | 61 | #include <linux/a.out.h> |
@@ -391,7 +388,7 @@ static int count(char __user * __user * argv, int max) | |||
391 | if (!p) | 388 | if (!p) |
392 | break; | 389 | break; |
393 | argv++; | 390 | argv++; |
394 | if(++i > max) | 391 | if (i++ >= max) |
395 | return -E2BIG; | 392 | return -E2BIG; |
396 | cond_resched(); | 393 | cond_resched(); |
397 | } | 394 | } |
@@ -752,11 +749,11 @@ static int exec_mmap(struct mm_struct *mm) | |||
752 | tsk->active_mm = mm; | 749 | tsk->active_mm = mm; |
753 | activate_mm(active_mm, mm); | 750 | activate_mm(active_mm, mm); |
754 | task_unlock(tsk); | 751 | task_unlock(tsk); |
755 | mm_update_next_owner(old_mm); | ||
756 | arch_pick_mmap_layout(mm); | 752 | arch_pick_mmap_layout(mm); |
757 | if (old_mm) { | 753 | if (old_mm) { |
758 | up_read(&old_mm->mmap_sem); | 754 | up_read(&old_mm->mmap_sem); |
759 | BUG_ON(active_mm != old_mm); | 755 | BUG_ON(active_mm != old_mm); |
756 | mm_update_next_owner(old_mm); | ||
760 | mmput(old_mm); | 757 | mmput(old_mm); |
761 | return 0; | 758 | return 0; |
762 | } | 759 | } |
@@ -825,8 +822,6 @@ static int de_thread(struct task_struct *tsk) | |||
825 | schedule(); | 822 | schedule(); |
826 | } | 823 | } |
827 | 824 | ||
828 | if (unlikely(task_child_reaper(tsk) == leader)) | ||
829 | task_active_pid_ns(tsk)->child_reaper = tsk; | ||
830 | /* | 825 | /* |
831 | * The only record we have of the real-time age of a | 826 | * The only record we have of the real-time age of a |
832 | * process, regardless of execs it's done, is start_time. | 827 | * process, regardless of execs it's done, is start_time. |
@@ -1189,7 +1184,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) | |||
1189 | return retval; | 1184 | return retval; |
1190 | 1185 | ||
1191 | /* Remember if the application is TASO. */ | 1186 | /* Remember if the application is TASO. */ |
1192 | bprm->sh_bang = eh->ah.entry < 0x100000000UL; | 1187 | bprm->taso = eh->ah.entry < 0x100000000UL; |
1193 | 1188 | ||
1194 | bprm->file = file; | 1189 | bprm->file = file; |
1195 | bprm->loader = loader; | 1190 | bprm->loader = loader; |
@@ -1247,8 +1242,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) | |||
1247 | read_unlock(&binfmt_lock); | 1242 | read_unlock(&binfmt_lock); |
1248 | if (retval != -ENOEXEC || bprm->mm == NULL) { | 1243 | if (retval != -ENOEXEC || bprm->mm == NULL) { |
1249 | break; | 1244 | break; |
1250 | #ifdef CONFIG_KMOD | 1245 | #ifdef CONFIG_MODULES |
1251 | }else{ | 1246 | } else { |
1252 | #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) | 1247 | #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) |
1253 | if (printable(bprm->buf[0]) && | 1248 | if (printable(bprm->buf[0]) && |
1254 | printable(bprm->buf[1]) && | 1249 | printable(bprm->buf[1]) && |
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 10bb02c3f25c..6dac7ba2d22d 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c | |||
@@ -1295,6 +1295,7 @@ retry_alloc: | |||
1295 | * turn off reservation for this allocation | 1295 | * turn off reservation for this allocation |
1296 | */ | 1296 | */ |
1297 | if (my_rsv && (free_blocks < windowsz) | 1297 | if (my_rsv && (free_blocks < windowsz) |
1298 | && (free_blocks > 0) | ||
1298 | && (rsv_is_empty(&my_rsv->rsv_window))) | 1299 | && (rsv_is_empty(&my_rsv->rsv_window))) |
1299 | my_rsv = NULL; | 1300 | my_rsv = NULL; |
1300 | 1301 | ||
@@ -1332,7 +1333,7 @@ retry_alloc: | |||
1332 | * free blocks is less than half of the reservation | 1333 | * free blocks is less than half of the reservation |
1333 | * window size. | 1334 | * window size. |
1334 | */ | 1335 | */ |
1335 | if (free_blocks <= (windowsz/2)) | 1336 | if (my_rsv && (free_blocks <= (windowsz/2))) |
1336 | continue; | 1337 | continue; |
1337 | 1338 | ||
1338 | brelse(bitmap_bh); | 1339 | brelse(bitmap_bh); |
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index a78c6b4af060..11a49ce84392 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c | |||
@@ -103,7 +103,7 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len) | |||
103 | return err; | 103 | return err; |
104 | } | 104 | } |
105 | 105 | ||
106 | static void ext2_check_page(struct page *page) | 106 | static void ext2_check_page(struct page *page, int quiet) |
107 | { | 107 | { |
108 | struct inode *dir = page->mapping->host; | 108 | struct inode *dir = page->mapping->host; |
109 | struct super_block *sb = dir->i_sb; | 109 | struct super_block *sb = dir->i_sb; |
@@ -146,10 +146,10 @@ out: | |||
146 | /* Too bad, we had an error */ | 146 | /* Too bad, we had an error */ |
147 | 147 | ||
148 | Ebadsize: | 148 | Ebadsize: |
149 | ext2_error(sb, "ext2_check_page", | 149 | if (!quiet) |
150 | "size of directory #%lu is not a multiple of chunk size", | 150 | ext2_error(sb, __func__, |
151 | dir->i_ino | 151 | "size of directory #%lu is not a multiple " |
152 | ); | 152 | "of chunk size", dir->i_ino); |
153 | goto fail; | 153 | goto fail; |
154 | Eshort: | 154 | Eshort: |
155 | error = "rec_len is smaller than minimal"; | 155 | error = "rec_len is smaller than minimal"; |
@@ -166,32 +166,36 @@ Espan: | |||
166 | Einumber: | 166 | Einumber: |
167 | error = "inode out of bounds"; | 167 | error = "inode out of bounds"; |
168 | bad_entry: | 168 | bad_entry: |
169 | ext2_error (sb, "ext2_check_page", "bad entry in directory #%lu: %s - " | 169 | if (!quiet) |
170 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", | 170 | ext2_error(sb, __func__, "bad entry in directory #%lu: : %s - " |
171 | dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, | 171 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", |
172 | (unsigned long) le32_to_cpu(p->inode), | 172 | dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, |
173 | rec_len, p->name_len); | 173 | (unsigned long) le32_to_cpu(p->inode), |
174 | rec_len, p->name_len); | ||
174 | goto fail; | 175 | goto fail; |
175 | Eend: | 176 | Eend: |
176 | p = (ext2_dirent *)(kaddr + offs); | 177 | if (!quiet) { |
177 | ext2_error (sb, "ext2_check_page", | 178 | p = (ext2_dirent *)(kaddr + offs); |
178 | "entry in directory #%lu spans the page boundary" | 179 | ext2_error(sb, "ext2_check_page", |
179 | "offset=%lu, inode=%lu", | 180 | "entry in directory #%lu spans the page boundary" |
180 | dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, | 181 | "offset=%lu, inode=%lu", |
181 | (unsigned long) le32_to_cpu(p->inode)); | 182 | dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, |
183 | (unsigned long) le32_to_cpu(p->inode)); | ||
184 | } | ||
182 | fail: | 185 | fail: |
183 | SetPageChecked(page); | 186 | SetPageChecked(page); |
184 | SetPageError(page); | 187 | SetPageError(page); |
185 | } | 188 | } |
186 | 189 | ||
187 | static struct page * ext2_get_page(struct inode *dir, unsigned long n) | 190 | static struct page * ext2_get_page(struct inode *dir, unsigned long n, |
191 | int quiet) | ||
188 | { | 192 | { |
189 | struct address_space *mapping = dir->i_mapping; | 193 | struct address_space *mapping = dir->i_mapping; |
190 | struct page *page = read_mapping_page(mapping, n, NULL); | 194 | struct page *page = read_mapping_page(mapping, n, NULL); |
191 | if (!IS_ERR(page)) { | 195 | if (!IS_ERR(page)) { |
192 | kmap(page); | 196 | kmap(page); |
193 | if (!PageChecked(page)) | 197 | if (!PageChecked(page)) |
194 | ext2_check_page(page); | 198 | ext2_check_page(page, quiet); |
195 | if (PageError(page)) | 199 | if (PageError(page)) |
196 | goto fail; | 200 | goto fail; |
197 | } | 201 | } |
@@ -292,7 +296,7 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) | |||
292 | for ( ; n < npages; n++, offset = 0) { | 296 | for ( ; n < npages; n++, offset = 0) { |
293 | char *kaddr, *limit; | 297 | char *kaddr, *limit; |
294 | ext2_dirent *de; | 298 | ext2_dirent *de; |
295 | struct page *page = ext2_get_page(inode, n); | 299 | struct page *page = ext2_get_page(inode, n, 0); |
296 | 300 | ||
297 | if (IS_ERR(page)) { | 301 | if (IS_ERR(page)) { |
298 | ext2_error(sb, __func__, | 302 | ext2_error(sb, __func__, |
@@ -361,6 +365,7 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, | |||
361 | struct page *page = NULL; | 365 | struct page *page = NULL; |
362 | struct ext2_inode_info *ei = EXT2_I(dir); | 366 | struct ext2_inode_info *ei = EXT2_I(dir); |
363 | ext2_dirent * de; | 367 | ext2_dirent * de; |
368 | int dir_has_error = 0; | ||
364 | 369 | ||
365 | if (npages == 0) | 370 | if (npages == 0) |
366 | goto out; | 371 | goto out; |
@@ -374,7 +379,7 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, | |||
374 | n = start; | 379 | n = start; |
375 | do { | 380 | do { |
376 | char *kaddr; | 381 | char *kaddr; |
377 | page = ext2_get_page(dir, n); | 382 | page = ext2_get_page(dir, n, dir_has_error); |
378 | if (!IS_ERR(page)) { | 383 | if (!IS_ERR(page)) { |
379 | kaddr = page_address(page); | 384 | kaddr = page_address(page); |
380 | de = (ext2_dirent *) kaddr; | 385 | de = (ext2_dirent *) kaddr; |
@@ -391,7 +396,9 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, | |||
391 | de = ext2_next_entry(de); | 396 | de = ext2_next_entry(de); |
392 | } | 397 | } |
393 | ext2_put_page(page); | 398 | ext2_put_page(page); |
394 | } | 399 | } else |
400 | dir_has_error = 1; | ||
401 | |||
395 | if (++n >= npages) | 402 | if (++n >= npages) |
396 | n = 0; | 403 | n = 0; |
397 | /* next page is past the blocks we've got */ | 404 | /* next page is past the blocks we've got */ |
@@ -414,7 +421,7 @@ found: | |||
414 | 421 | ||
415 | struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p) | 422 | struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p) |
416 | { | 423 | { |
417 | struct page *page = ext2_get_page(dir, 0); | 424 | struct page *page = ext2_get_page(dir, 0, 0); |
418 | ext2_dirent *de = NULL; | 425 | ext2_dirent *de = NULL; |
419 | 426 | ||
420 | if (!IS_ERR(page)) { | 427 | if (!IS_ERR(page)) { |
@@ -487,7 +494,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) | |||
487 | for (n = 0; n <= npages; n++) { | 494 | for (n = 0; n <= npages; n++) { |
488 | char *dir_end; | 495 | char *dir_end; |
489 | 496 | ||
490 | page = ext2_get_page(dir, n); | 497 | page = ext2_get_page(dir, n, 0); |
491 | err = PTR_ERR(page); | 498 | err = PTR_ERR(page); |
492 | if (IS_ERR(page)) | 499 | if (IS_ERR(page)) |
493 | goto out; | 500 | goto out; |
@@ -655,14 +662,17 @@ int ext2_empty_dir (struct inode * inode) | |||
655 | { | 662 | { |
656 | struct page *page = NULL; | 663 | struct page *page = NULL; |
657 | unsigned long i, npages = dir_pages(inode); | 664 | unsigned long i, npages = dir_pages(inode); |
665 | int dir_has_error = 0; | ||
658 | 666 | ||
659 | for (i = 0; i < npages; i++) { | 667 | for (i = 0; i < npages; i++) { |
660 | char *kaddr; | 668 | char *kaddr; |
661 | ext2_dirent * de; | 669 | ext2_dirent * de; |
662 | page = ext2_get_page(inode, i); | 670 | page = ext2_get_page(inode, i, dir_has_error); |
663 | 671 | ||
664 | if (IS_ERR(page)) | 672 | if (IS_ERR(page)) { |
673 | dir_has_error = 1; | ||
665 | continue; | 674 | continue; |
675 | } | ||
666 | 676 | ||
667 | kaddr = page_address(page); | 677 | kaddr = page_address(page); |
668 | de = (ext2_dirent *)kaddr; | 678 | de = (ext2_dirent *)kaddr; |
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 47d88da2d33b..bae998c1e44e 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h | |||
@@ -133,6 +133,8 @@ extern void ext2_truncate (struct inode *); | |||
133 | extern int ext2_setattr (struct dentry *, struct iattr *); | 133 | extern int ext2_setattr (struct dentry *, struct iattr *); |
134 | extern void ext2_set_inode_flags(struct inode *inode); | 134 | extern void ext2_set_inode_flags(struct inode *inode); |
135 | extern void ext2_get_inode_flags(struct ext2_inode_info *); | 135 | extern void ext2_get_inode_flags(struct ext2_inode_info *); |
136 | extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
137 | u64 start, u64 len); | ||
136 | int __ext2_write_begin(struct file *file, struct address_space *mapping, | 138 | int __ext2_write_begin(struct file *file, struct address_space *mapping, |
137 | loff_t pos, unsigned len, unsigned flags, | 139 | loff_t pos, unsigned len, unsigned flags, |
138 | struct page **pagep, void **fsdata); | 140 | struct page **pagep, void **fsdata); |
diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 5f2fa9c36293..45ed07122182 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c | |||
@@ -86,4 +86,5 @@ const struct inode_operations ext2_file_inode_operations = { | |||
86 | #endif | 86 | #endif |
87 | .setattr = ext2_setattr, | 87 | .setattr = ext2_setattr, |
88 | .permission = ext2_permission, | 88 | .permission = ext2_permission, |
89 | .fiemap = ext2_fiemap, | ||
89 | }; | 90 | }; |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 991d6dfeb51f..7658b33e2653 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/writeback.h> | 31 | #include <linux/writeback.h> |
32 | #include <linux/buffer_head.h> | 32 | #include <linux/buffer_head.h> |
33 | #include <linux/mpage.h> | 33 | #include <linux/mpage.h> |
34 | #include <linux/fiemap.h> | ||
34 | #include "ext2.h" | 35 | #include "ext2.h" |
35 | #include "acl.h" | 36 | #include "acl.h" |
36 | #include "xip.h" | 37 | #include "xip.h" |
@@ -704,6 +705,13 @@ int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_ | |||
704 | 705 | ||
705 | } | 706 | } |
706 | 707 | ||
708 | int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
709 | u64 start, u64 len) | ||
710 | { | ||
711 | return generic_block_fiemap(inode, fieinfo, start, len, | ||
712 | ext2_get_block); | ||
713 | } | ||
714 | |||
707 | static int ext2_writepage(struct page *page, struct writeback_control *wbc) | 715 | static int ext2_writepage(struct page *page, struct writeback_control *wbc) |
708 | { | 716 | { |
709 | return block_write_full_page(page, ext2_get_block, wbc); | 717 | return block_write_full_page(page, ext2_get_block, wbc); |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index fd88c7b43e66..647cd888ac87 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -393,7 +393,7 @@ enum { | |||
393 | Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation | 393 | Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation |
394 | }; | 394 | }; |
395 | 395 | ||
396 | static match_table_t tokens = { | 396 | static const match_table_t tokens = { |
397 | {Opt_bsd_df, "bsddf"}, | 397 | {Opt_bsd_df, "bsddf"}, |
398 | {Opt_minix_df, "minixdf"}, | 398 | {Opt_minix_df, "minixdf"}, |
399 | {Opt_grpid, "grpid"}, | 399 | {Opt_grpid, "grpid"}, |
diff --git a/fs/ext3/file.c b/fs/ext3/file.c index acc4913d3019..3be1e0689c9a 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c | |||
@@ -134,5 +134,6 @@ const struct inode_operations ext3_file_inode_operations = { | |||
134 | .removexattr = generic_removexattr, | 134 | .removexattr = generic_removexattr, |
135 | #endif | 135 | #endif |
136 | .permission = ext3_permission, | 136 | .permission = ext3_permission, |
137 | .fiemap = ext3_fiemap, | ||
137 | }; | 138 | }; |
138 | 139 | ||
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 507d8689b111..ebfec4d0148e 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/mpage.h> | 36 | #include <linux/mpage.h> |
37 | #include <linux/uio.h> | 37 | #include <linux/uio.h> |
38 | #include <linux/bio.h> | 38 | #include <linux/bio.h> |
39 | #include <linux/fiemap.h> | ||
39 | #include "xattr.h" | 40 | #include "xattr.h" |
40 | #include "acl.h" | 41 | #include "acl.h" |
41 | 42 | ||
@@ -981,6 +982,13 @@ out: | |||
981 | return ret; | 982 | return ret; |
982 | } | 983 | } |
983 | 984 | ||
985 | int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
986 | u64 start, u64 len) | ||
987 | { | ||
988 | return generic_block_fiemap(inode, fieinfo, start, len, | ||
989 | ext3_get_block); | ||
990 | } | ||
991 | |||
984 | /* | 992 | /* |
985 | * `handle' can be NULL if create is zero | 993 | * `handle' can be NULL if create is zero |
986 | */ | 994 | */ |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index f38a5afc39a1..399a96a6c556 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -760,7 +760,7 @@ enum { | |||
760 | Opt_grpquota | 760 | Opt_grpquota |
761 | }; | 761 | }; |
762 | 762 | ||
763 | static match_table_t tokens = { | 763 | static const match_table_t tokens = { |
764 | {Opt_bsd_df, "bsddf"}, | 764 | {Opt_bsd_df, "bsddf"}, |
765 | {Opt_minix_df, "minixdf"}, | 765 | {Opt_minix_df, "minixdf"}, |
766 | {Opt_grpid, "grpid"}, | 766 | {Opt_grpid, "grpid"}, |
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index ac6fa8ca0a2f..a8ff003a00f7 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile | |||
@@ -2,12 +2,12 @@ | |||
2 | # Makefile for the linux ext4-filesystem routines. | 2 | # Makefile for the linux ext4-filesystem routines. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o | 5 | obj-$(CONFIG_EXT4_FS) += ext4.o |
6 | 6 | ||
7 | ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ | 7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ |
8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ | 8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ |
9 | ext4_jbd2.o migrate.o mballoc.o | 9 | ext4_jbd2.o migrate.o mballoc.o |
10 | 10 | ||
11 | ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o | 11 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o |
12 | ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o | 12 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o |
13 | ext4dev-$(CONFIG_EXT4DEV_FS_SECURITY) += xattr_security.o | 13 | ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o |
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index cd2b855a07d6..cb45257a246e 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h | |||
@@ -51,18 +51,18 @@ static inline int ext4_acl_count(size_t size) | |||
51 | } | 51 | } |
52 | } | 52 | } |
53 | 53 | ||
54 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 54 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
55 | 55 | ||
56 | /* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl | 56 | /* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl |
57 | if the ACL has not been cached */ | 57 | if the ACL has not been cached */ |
58 | #define EXT4_ACL_NOT_CACHED ((void *)-1) | 58 | #define EXT4_ACL_NOT_CACHED ((void *)-1) |
59 | 59 | ||
60 | /* acl.c */ | 60 | /* acl.c */ |
61 | extern int ext4_permission (struct inode *, int); | 61 | extern int ext4_permission(struct inode *, int); |
62 | extern int ext4_acl_chmod (struct inode *); | 62 | extern int ext4_acl_chmod(struct inode *); |
63 | extern int ext4_init_acl (handle_t *, struct inode *, struct inode *); | 63 | extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); |
64 | 64 | ||
65 | #else /* CONFIG_EXT4DEV_FS_POSIX_ACL */ | 65 | #else /* CONFIG_EXT4_FS_POSIX_ACL */ |
66 | #include <linux/sched.h> | 66 | #include <linux/sched.h> |
67 | #define ext4_permission NULL | 67 | #define ext4_permission NULL |
68 | 68 | ||
@@ -77,5 +77,5 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) | |||
77 | { | 77 | { |
78 | return 0; | 78 | return 0; |
79 | } | 79 | } |
80 | #endif /* CONFIG_EXT4DEV_FS_POSIX_ACL */ | 80 | #endif /* CONFIG_EXT4_FS_POSIX_ACL */ |
81 | 81 | ||
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1ae5004e93fc..b9821be709bd 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -83,6 +83,7 @@ static int ext4_group_used_meta_blocks(struct super_block *sb, | |||
83 | } | 83 | } |
84 | return used_blocks; | 84 | return used_blocks; |
85 | } | 85 | } |
86 | |||
86 | /* Initializes an uninitialized block bitmap if given, and returns the | 87 | /* Initializes an uninitialized block bitmap if given, and returns the |
87 | * number of blocks free in the group. */ | 88 | * number of blocks free in the group. */ |
88 | unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | 89 | unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, |
@@ -132,7 +133,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
132 | */ | 133 | */ |
133 | group_blocks = ext4_blocks_count(sbi->s_es) - | 134 | group_blocks = ext4_blocks_count(sbi->s_es) - |
134 | le32_to_cpu(sbi->s_es->s_first_data_block) - | 135 | le32_to_cpu(sbi->s_es->s_first_data_block) - |
135 | (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1)); | 136 | (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1)); |
136 | } else { | 137 | } else { |
137 | group_blocks = EXT4_BLOCKS_PER_GROUP(sb); | 138 | group_blocks = EXT4_BLOCKS_PER_GROUP(sb); |
138 | } | 139 | } |
@@ -200,20 +201,20 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
200 | * @bh: pointer to the buffer head to store the block | 201 | * @bh: pointer to the buffer head to store the block |
201 | * group descriptor | 202 | * group descriptor |
202 | */ | 203 | */ |
203 | struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | 204 | struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, |
204 | ext4_group_t block_group, | 205 | ext4_group_t block_group, |
205 | struct buffer_head ** bh) | 206 | struct buffer_head **bh) |
206 | { | 207 | { |
207 | unsigned long group_desc; | 208 | unsigned long group_desc; |
208 | unsigned long offset; | 209 | unsigned long offset; |
209 | struct ext4_group_desc * desc; | 210 | struct ext4_group_desc *desc; |
210 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 211 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
211 | 212 | ||
212 | if (block_group >= sbi->s_groups_count) { | 213 | if (block_group >= sbi->s_groups_count) { |
213 | ext4_error (sb, "ext4_get_group_desc", | 214 | ext4_error(sb, "ext4_get_group_desc", |
214 | "block_group >= groups_count - " | 215 | "block_group >= groups_count - " |
215 | "block_group = %lu, groups_count = %lu", | 216 | "block_group = %lu, groups_count = %lu", |
216 | block_group, sbi->s_groups_count); | 217 | block_group, sbi->s_groups_count); |
217 | 218 | ||
218 | return NULL; | 219 | return NULL; |
219 | } | 220 | } |
@@ -222,10 +223,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | |||
222 | group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); | 223 | group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); |
223 | offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); | 224 | offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); |
224 | if (!sbi->s_group_desc[group_desc]) { | 225 | if (!sbi->s_group_desc[group_desc]) { |
225 | ext4_error (sb, "ext4_get_group_desc", | 226 | ext4_error(sb, "ext4_get_group_desc", |
226 | "Group descriptor not loaded - " | 227 | "Group descriptor not loaded - " |
227 | "block_group = %lu, group_desc = %lu, desc = %lu", | 228 | "block_group = %lu, group_desc = %lu, desc = %lu", |
228 | block_group, group_desc, offset); | 229 | block_group, group_desc, offset); |
229 | return NULL; | 230 | return NULL; |
230 | } | 231 | } |
231 | 232 | ||
@@ -302,8 +303,8 @@ err_out: | |||
302 | struct buffer_head * | 303 | struct buffer_head * |
303 | ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | 304 | ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) |
304 | { | 305 | { |
305 | struct ext4_group_desc * desc; | 306 | struct ext4_group_desc *desc; |
306 | struct buffer_head * bh = NULL; | 307 | struct buffer_head *bh = NULL; |
307 | ext4_fsblk_t bitmap_blk; | 308 | ext4_fsblk_t bitmap_blk; |
308 | 309 | ||
309 | desc = ext4_get_group_desc(sb, block_group, NULL); | 310 | desc = ext4_get_group_desc(sb, block_group, NULL); |
@@ -318,9 +319,11 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
318 | block_group, bitmap_blk); | 319 | block_group, bitmap_blk); |
319 | return NULL; | 320 | return NULL; |
320 | } | 321 | } |
321 | if (bh_uptodate_or_lock(bh)) | 322 | if (buffer_uptodate(bh) && |
323 | !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) | ||
322 | return bh; | 324 | return bh; |
323 | 325 | ||
326 | lock_buffer(bh); | ||
324 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 327 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); |
325 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 328 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
326 | ext4_init_block_bitmap(sb, bh, block_group, desc); | 329 | ext4_init_block_bitmap(sb, bh, block_group, desc); |
@@ -345,301 +348,6 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
345 | */ | 348 | */ |
346 | return bh; | 349 | return bh; |
347 | } | 350 | } |
348 | /* | ||
349 | * The reservation window structure operations | ||
350 | * -------------------------------------------- | ||
351 | * Operations include: | ||
352 | * dump, find, add, remove, is_empty, find_next_reservable_window, etc. | ||
353 | * | ||
354 | * We use a red-black tree to represent per-filesystem reservation | ||
355 | * windows. | ||
356 | * | ||
357 | */ | ||
358 | |||
359 | /** | ||
360 | * __rsv_window_dump() -- Dump the filesystem block allocation reservation map | ||
361 | * @rb_root: root of per-filesystem reservation rb tree | ||
362 | * @verbose: verbose mode | ||
363 | * @fn: function which wishes to dump the reservation map | ||
364 | * | ||
365 | * If verbose is turned on, it will print the whole block reservation | ||
366 | * windows(start, end). Otherwise, it will only print out the "bad" windows, | ||
367 | * those windows that overlap with their immediate neighbors. | ||
368 | */ | ||
369 | #if 1 | ||
370 | static void __rsv_window_dump(struct rb_root *root, int verbose, | ||
371 | const char *fn) | ||
372 | { | ||
373 | struct rb_node *n; | ||
374 | struct ext4_reserve_window_node *rsv, *prev; | ||
375 | int bad; | ||
376 | |||
377 | restart: | ||
378 | n = rb_first(root); | ||
379 | bad = 0; | ||
380 | prev = NULL; | ||
381 | |||
382 | printk("Block Allocation Reservation Windows Map (%s):\n", fn); | ||
383 | while (n) { | ||
384 | rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); | ||
385 | if (verbose) | ||
386 | printk("reservation window 0x%p " | ||
387 | "start: %llu, end: %llu\n", | ||
388 | rsv, rsv->rsv_start, rsv->rsv_end); | ||
389 | if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { | ||
390 | printk("Bad reservation %p (start >= end)\n", | ||
391 | rsv); | ||
392 | bad = 1; | ||
393 | } | ||
394 | if (prev && prev->rsv_end >= rsv->rsv_start) { | ||
395 | printk("Bad reservation %p (prev->end >= start)\n", | ||
396 | rsv); | ||
397 | bad = 1; | ||
398 | } | ||
399 | if (bad) { | ||
400 | if (!verbose) { | ||
401 | printk("Restarting reservation walk in verbose mode\n"); | ||
402 | verbose = 1; | ||
403 | goto restart; | ||
404 | } | ||
405 | } | ||
406 | n = rb_next(n); | ||
407 | prev = rsv; | ||
408 | } | ||
409 | printk("Window map complete.\n"); | ||
410 | BUG_ON(bad); | ||
411 | } | ||
412 | #define rsv_window_dump(root, verbose) \ | ||
413 | __rsv_window_dump((root), (verbose), __func__) | ||
414 | #else | ||
415 | #define rsv_window_dump(root, verbose) do {} while (0) | ||
416 | #endif | ||
417 | |||
418 | /** | ||
419 | * goal_in_my_reservation() | ||
420 | * @rsv: inode's reservation window | ||
421 | * @grp_goal: given goal block relative to the allocation block group | ||
422 | * @group: the current allocation block group | ||
423 | * @sb: filesystem super block | ||
424 | * | ||
425 | * Test if the given goal block (group relative) is within the file's | ||
426 | * own block reservation window range. | ||
427 | * | ||
428 | * If the reservation window is outside the goal allocation group, return 0; | ||
429 | * grp_goal (given goal block) could be -1, which means no specific | ||
430 | * goal block. In this case, always return 1. | ||
431 | * If the goal block is within the reservation window, return 1; | ||
432 | * otherwise, return 0; | ||
433 | */ | ||
434 | static int | ||
435 | goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal, | ||
436 | ext4_group_t group, struct super_block *sb) | ||
437 | { | ||
438 | ext4_fsblk_t group_first_block, group_last_block; | ||
439 | |||
440 | group_first_block = ext4_group_first_block_no(sb, group); | ||
441 | group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); | ||
442 | |||
443 | if ((rsv->_rsv_start > group_last_block) || | ||
444 | (rsv->_rsv_end < group_first_block)) | ||
445 | return 0; | ||
446 | if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) | ||
447 | || (grp_goal + group_first_block > rsv->_rsv_end))) | ||
448 | return 0; | ||
449 | return 1; | ||
450 | } | ||
451 | |||
452 | /** | ||
453 | * search_reserve_window() | ||
454 | * @rb_root: root of reservation tree | ||
455 | * @goal: target allocation block | ||
456 | * | ||
457 | * Find the reserved window which includes the goal, or the previous one | ||
458 | * if the goal is not in any window. | ||
459 | * Returns NULL if there are no windows or if all windows start after the goal. | ||
460 | */ | ||
461 | static struct ext4_reserve_window_node * | ||
462 | search_reserve_window(struct rb_root *root, ext4_fsblk_t goal) | ||
463 | { | ||
464 | struct rb_node *n = root->rb_node; | ||
465 | struct ext4_reserve_window_node *rsv; | ||
466 | |||
467 | if (!n) | ||
468 | return NULL; | ||
469 | |||
470 | do { | ||
471 | rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); | ||
472 | |||
473 | if (goal < rsv->rsv_start) | ||
474 | n = n->rb_left; | ||
475 | else if (goal > rsv->rsv_end) | ||
476 | n = n->rb_right; | ||
477 | else | ||
478 | return rsv; | ||
479 | } while (n); | ||
480 | /* | ||
481 | * We've fallen off the end of the tree: the goal wasn't inside | ||
482 | * any particular node. OK, the previous node must be to one | ||
483 | * side of the interval containing the goal. If it's the RHS, | ||
484 | * we need to back up one. | ||
485 | */ | ||
486 | if (rsv->rsv_start > goal) { | ||
487 | n = rb_prev(&rsv->rsv_node); | ||
488 | rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); | ||
489 | } | ||
490 | return rsv; | ||
491 | } | ||
492 | |||
493 | /** | ||
494 | * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree. | ||
495 | * @sb: super block | ||
496 | * @rsv: reservation window to add | ||
497 | * | ||
498 | * Must be called with rsv_lock hold. | ||
499 | */ | ||
500 | void ext4_rsv_window_add(struct super_block *sb, | ||
501 | struct ext4_reserve_window_node *rsv) | ||
502 | { | ||
503 | struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root; | ||
504 | struct rb_node *node = &rsv->rsv_node; | ||
505 | ext4_fsblk_t start = rsv->rsv_start; | ||
506 | |||
507 | struct rb_node ** p = &root->rb_node; | ||
508 | struct rb_node * parent = NULL; | ||
509 | struct ext4_reserve_window_node *this; | ||
510 | |||
511 | while (*p) | ||
512 | { | ||
513 | parent = *p; | ||
514 | this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node); | ||
515 | |||
516 | if (start < this->rsv_start) | ||
517 | p = &(*p)->rb_left; | ||
518 | else if (start > this->rsv_end) | ||
519 | p = &(*p)->rb_right; | ||
520 | else { | ||
521 | rsv_window_dump(root, 1); | ||
522 | BUG(); | ||
523 | } | ||
524 | } | ||
525 | |||
526 | rb_link_node(node, parent, p); | ||
527 | rb_insert_color(node, root); | ||
528 | } | ||
529 | |||
530 | /** | ||
531 | * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree | ||
532 | * @sb: super block | ||
533 | * @rsv: reservation window to remove | ||
534 | * | ||
535 | * Mark the block reservation window as not allocated, and unlink it | ||
536 | * from the filesystem reservation window rb tree. Must be called with | ||
537 | * rsv_lock hold. | ||
538 | */ | ||
539 | static void rsv_window_remove(struct super_block *sb, | ||
540 | struct ext4_reserve_window_node *rsv) | ||
541 | { | ||
542 | rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
543 | rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
544 | rsv->rsv_alloc_hit = 0; | ||
545 | rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root); | ||
546 | } | ||
547 | |||
548 | /* | ||
549 | * rsv_is_empty() -- Check if the reservation window is allocated. | ||
550 | * @rsv: given reservation window to check | ||
551 | * | ||
552 | * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED. | ||
553 | */ | ||
554 | static inline int rsv_is_empty(struct ext4_reserve_window *rsv) | ||
555 | { | ||
556 | /* a valid reservation end block could not be 0 */ | ||
557 | return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
558 | } | ||
559 | |||
560 | /** | ||
561 | * ext4_init_block_alloc_info() | ||
562 | * @inode: file inode structure | ||
563 | * | ||
564 | * Allocate and initialize the reservation window structure, and | ||
565 | * link the window to the ext4 inode structure at last | ||
566 | * | ||
567 | * The reservation window structure is only dynamically allocated | ||
568 | * and linked to ext4 inode the first time the open file | ||
569 | * needs a new block. So, before every ext4_new_block(s) call, for | ||
570 | * regular files, we should check whether the reservation window | ||
571 | * structure exists or not. In the latter case, this function is called. | ||
572 | * Fail to do so will result in block reservation being turned off for that | ||
573 | * open file. | ||
574 | * | ||
575 | * This function is called from ext4_get_blocks_handle(), also called | ||
576 | * when setting the reservation window size through ioctl before the file | ||
577 | * is open for write (needs block allocation). | ||
578 | * | ||
579 | * Needs down_write(i_data_sem) protection prior to call this function. | ||
580 | */ | ||
581 | void ext4_init_block_alloc_info(struct inode *inode) | ||
582 | { | ||
583 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
584 | struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; | ||
585 | struct super_block *sb = inode->i_sb; | ||
586 | |||
587 | block_i = kmalloc(sizeof(*block_i), GFP_NOFS); | ||
588 | if (block_i) { | ||
589 | struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node; | ||
590 | |||
591 | rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
592 | rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
593 | |||
594 | /* | ||
595 | * if filesystem is mounted with NORESERVATION, the goal | ||
596 | * reservation window size is set to zero to indicate | ||
597 | * block reservation is off | ||
598 | */ | ||
599 | if (!test_opt(sb, RESERVATION)) | ||
600 | rsv->rsv_goal_size = 0; | ||
601 | else | ||
602 | rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS; | ||
603 | rsv->rsv_alloc_hit = 0; | ||
604 | block_i->last_alloc_logical_block = 0; | ||
605 | block_i->last_alloc_physical_block = 0; | ||
606 | } | ||
607 | ei->i_block_alloc_info = block_i; | ||
608 | } | ||
609 | |||
610 | /** | ||
611 | * ext4_discard_reservation() | ||
612 | * @inode: inode | ||
613 | * | ||
614 | * Discard(free) block reservation window on last file close, or truncate | ||
615 | * or at last iput(). | ||
616 | * | ||
617 | * It is being called in three cases: | ||
618 | * ext4_release_file(): last writer close the file | ||
619 | * ext4_clear_inode(): last iput(), when nobody link to this file. | ||
620 | * ext4_truncate(): when the block indirect map is about to change. | ||
621 | * | ||
622 | */ | ||
623 | void ext4_discard_reservation(struct inode *inode) | ||
624 | { | ||
625 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
626 | struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; | ||
627 | struct ext4_reserve_window_node *rsv; | ||
628 | spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock; | ||
629 | |||
630 | ext4_mb_discard_inode_preallocations(inode); | ||
631 | |||
632 | if (!block_i) | ||
633 | return; | ||
634 | |||
635 | rsv = &block_i->rsv_window_node; | ||
636 | if (!rsv_is_empty(&rsv->rsv_window)) { | ||
637 | spin_lock(rsv_lock); | ||
638 | if (!rsv_is_empty(&rsv->rsv_window)) | ||
639 | rsv_window_remove(inode->i_sb, rsv); | ||
640 | spin_unlock(rsv_lock); | ||
641 | } | ||
642 | } | ||
643 | 351 | ||
644 | /** | 352 | /** |
645 | * ext4_free_blocks_sb() -- Free given blocks and update quota | 353 | * ext4_free_blocks_sb() -- Free given blocks and update quota |
@@ -648,6 +356,13 @@ void ext4_discard_reservation(struct inode *inode) | |||
648 | * @block: start physcial block to free | 356 | * @block: start physcial block to free |
649 | * @count: number of blocks to free | 357 | * @count: number of blocks to free |
650 | * @pdquot_freed_blocks: pointer to quota | 358 | * @pdquot_freed_blocks: pointer to quota |
359 | * | ||
360 | * XXX This function is only used by the on-line resizing code, which | ||
361 | * should probably be fixed up to call the mballoc variant. There | ||
362 | * this needs to be cleaned up later; in fact, I'm not convinced this | ||
363 | * is 100% correct in the face of the mballoc code. The online resizing | ||
364 | * code needs to be fixed up to more tightly (and correctly) interlock | ||
365 | * with the mballoc code. | ||
651 | */ | 366 | */ |
652 | void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | 367 | void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, |
653 | ext4_fsblk_t block, unsigned long count, | 368 | ext4_fsblk_t block, unsigned long count, |
@@ -659,8 +374,8 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | |||
659 | ext4_grpblk_t bit; | 374 | ext4_grpblk_t bit; |
660 | unsigned long i; | 375 | unsigned long i; |
661 | unsigned long overflow; | 376 | unsigned long overflow; |
662 | struct ext4_group_desc * desc; | 377 | struct ext4_group_desc *desc; |
663 | struct ext4_super_block * es; | 378 | struct ext4_super_block *es; |
664 | struct ext4_sb_info *sbi; | 379 | struct ext4_sb_info *sbi; |
665 | int err = 0, ret; | 380 | int err = 0, ret; |
666 | ext4_grpblk_t group_freed; | 381 | ext4_grpblk_t group_freed; |
@@ -671,13 +386,13 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | |||
671 | if (block < le32_to_cpu(es->s_first_data_block) || | 386 | if (block < le32_to_cpu(es->s_first_data_block) || |
672 | block + count < block || | 387 | block + count < block || |
673 | block + count > ext4_blocks_count(es)) { | 388 | block + count > ext4_blocks_count(es)) { |
674 | ext4_error (sb, "ext4_free_blocks", | 389 | ext4_error(sb, "ext4_free_blocks", |
675 | "Freeing blocks not in datazone - " | 390 | "Freeing blocks not in datazone - " |
676 | "block = %llu, count = %lu", block, count); | 391 | "block = %llu, count = %lu", block, count); |
677 | goto error_return; | 392 | goto error_return; |
678 | } | 393 | } |
679 | 394 | ||
680 | ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1); | 395 | ext4_debug("freeing block(s) %llu-%llu\n", block, block + count - 1); |
681 | 396 | ||
682 | do_more: | 397 | do_more: |
683 | overflow = 0; | 398 | overflow = 0; |
@@ -694,7 +409,7 @@ do_more: | |||
694 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); | 409 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); |
695 | if (!bitmap_bh) | 410 | if (!bitmap_bh) |
696 | goto error_return; | 411 | goto error_return; |
697 | desc = ext4_get_group_desc (sb, block_group, &gd_bh); | 412 | desc = ext4_get_group_desc(sb, block_group, &gd_bh); |
698 | if (!desc) | 413 | if (!desc) |
699 | goto error_return; | 414 | goto error_return; |
700 | 415 | ||
@@ -703,10 +418,10 @@ do_more: | |||
703 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || | 418 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || |
704 | in_range(block + count - 1, ext4_inode_table(sb, desc), | 419 | in_range(block + count - 1, ext4_inode_table(sb, desc), |
705 | sbi->s_itb_per_group)) { | 420 | sbi->s_itb_per_group)) { |
706 | ext4_error (sb, "ext4_free_blocks", | 421 | ext4_error(sb, "ext4_free_blocks", |
707 | "Freeing blocks in system zones - " | 422 | "Freeing blocks in system zones - " |
708 | "Block = %llu, count = %lu", | 423 | "Block = %llu, count = %lu", |
709 | block, count); | 424 | block, count); |
710 | goto error_return; | 425 | goto error_return; |
711 | } | 426 | } |
712 | 427 | ||
@@ -848,759 +563,71 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
848 | ext4_fsblk_t block, unsigned long count, | 563 | ext4_fsblk_t block, unsigned long count, |
849 | int metadata) | 564 | int metadata) |
850 | { | 565 | { |
851 | struct super_block * sb; | 566 | struct super_block *sb; |
852 | unsigned long dquot_freed_blocks; | 567 | unsigned long dquot_freed_blocks; |
853 | 568 | ||
854 | /* this isn't the right place to decide whether block is metadata | 569 | /* this isn't the right place to decide whether block is metadata |
855 | * inode.c/extents.c knows better, but for safety ... */ | 570 | * inode.c/extents.c knows better, but for safety ... */ |
856 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || | 571 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
857 | ext4_should_journal_data(inode)) | 572 | metadata = 1; |
573 | |||
574 | /* We need to make sure we don't reuse | ||
575 | * block released untill the transaction commit. | ||
576 | * writeback mode have weak data consistency so | ||
577 | * don't force data as metadata when freeing block | ||
578 | * for writeback mode. | ||
579 | */ | ||
580 | if (metadata == 0 && !ext4_should_writeback_data(inode)) | ||
858 | metadata = 1; | 581 | metadata = 1; |
859 | 582 | ||
860 | sb = inode->i_sb; | 583 | sb = inode->i_sb; |
861 | 584 | ||
862 | if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info) | 585 | ext4_mb_free_blocks(handle, inode, block, count, |
863 | ext4_free_blocks_sb(handle, sb, block, count, | 586 | metadata, &dquot_freed_blocks); |
864 | &dquot_freed_blocks); | ||
865 | else | ||
866 | ext4_mb_free_blocks(handle, inode, block, count, | ||
867 | metadata, &dquot_freed_blocks); | ||
868 | if (dquot_freed_blocks) | 587 | if (dquot_freed_blocks) |
869 | DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); | 588 | DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); |
870 | return; | 589 | return; |
871 | } | 590 | } |
872 | 591 | ||
873 | /** | 592 | int ext4_claim_free_blocks(struct ext4_sb_info *sbi, |
874 | * ext4_test_allocatable() | 593 | s64 nblocks) |
875 | * @nr: given allocation block group | ||
876 | * @bh: bufferhead contains the bitmap of the given block group | ||
877 | * | ||
878 | * For ext4 allocations, we must not reuse any blocks which are | ||
879 | * allocated in the bitmap buffer's "last committed data" copy. This | ||
880 | * prevents deletes from freeing up the page for reuse until we have | ||
881 | * committed the delete transaction. | ||
882 | * | ||
883 | * If we didn't do this, then deleting something and reallocating it as | ||
884 | * data would allow the old block to be overwritten before the | ||
885 | * transaction committed (because we force data to disk before commit). | ||
886 | * This would lead to corruption if we crashed between overwriting the | ||
887 | * data and committing the delete. | ||
888 | * | ||
889 | * @@@ We may want to make this allocation behaviour conditional on | ||
890 | * data-writes at some point, and disable it for metadata allocations or | ||
891 | * sync-data inodes. | ||
892 | */ | ||
893 | static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh) | ||
894 | { | 594 | { |
895 | int ret; | 595 | s64 free_blocks, dirty_blocks; |
896 | struct journal_head *jh = bh2jh(bh); | 596 | s64 root_blocks = 0; |
897 | 597 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; | |
898 | if (ext4_test_bit(nr, bh->b_data)) | 598 | struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; |
899 | return 0; | ||
900 | |||
901 | jbd_lock_bh_state(bh); | ||
902 | if (!jh->b_committed_data) | ||
903 | ret = 1; | ||
904 | else | ||
905 | ret = !ext4_test_bit(nr, jh->b_committed_data); | ||
906 | jbd_unlock_bh_state(bh); | ||
907 | return ret; | ||
908 | } | ||
909 | 599 | ||
910 | /** | 600 | free_blocks = percpu_counter_read_positive(fbc); |
911 | * bitmap_search_next_usable_block() | 601 | dirty_blocks = percpu_counter_read_positive(dbc); |
912 | * @start: the starting block (group relative) of the search | ||
913 | * @bh: bufferhead contains the block group bitmap | ||
914 | * @maxblocks: the ending block (group relative) of the reservation | ||
915 | * | ||
916 | * The bitmap search --- search forward alternately through the actual | ||
917 | * bitmap on disk and the last-committed copy in journal, until we find a | ||
918 | * bit free in both bitmaps. | ||
919 | */ | ||
920 | static ext4_grpblk_t | ||
921 | bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, | ||
922 | ext4_grpblk_t maxblocks) | ||
923 | { | ||
924 | ext4_grpblk_t next; | ||
925 | struct journal_head *jh = bh2jh(bh); | ||
926 | |||
927 | while (start < maxblocks) { | ||
928 | next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start); | ||
929 | if (next >= maxblocks) | ||
930 | return -1; | ||
931 | if (ext4_test_allocatable(next, bh)) | ||
932 | return next; | ||
933 | jbd_lock_bh_state(bh); | ||
934 | if (jh->b_committed_data) | ||
935 | start = ext4_find_next_zero_bit(jh->b_committed_data, | ||
936 | maxblocks, next); | ||
937 | jbd_unlock_bh_state(bh); | ||
938 | } | ||
939 | return -1; | ||
940 | } | ||
941 | 602 | ||
942 | /** | 603 | if (!capable(CAP_SYS_RESOURCE) && |
943 | * find_next_usable_block() | 604 | sbi->s_resuid != current->fsuid && |
944 | * @start: the starting block (group relative) to find next | 605 | (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) |
945 | * allocatable block in bitmap. | 606 | root_blocks = ext4_r_blocks_count(sbi->s_es); |
946 | * @bh: bufferhead contains the block group bitmap | ||
947 | * @maxblocks: the ending block (group relative) for the search | ||
948 | * | ||
949 | * Find an allocatable block in a bitmap. We honor both the bitmap and | ||
950 | * its last-committed copy (if that exists), and perform the "most | ||
951 | * appropriate allocation" algorithm of looking for a free block near | ||
952 | * the initial goal; then for a free byte somewhere in the bitmap; then | ||
953 | * for any free bit in the bitmap. | ||
954 | */ | ||
955 | static ext4_grpblk_t | ||
956 | find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, | ||
957 | ext4_grpblk_t maxblocks) | ||
958 | { | ||
959 | ext4_grpblk_t here, next; | ||
960 | char *p, *r; | ||
961 | |||
962 | if (start > 0) { | ||
963 | /* | ||
964 | * The goal was occupied; search forward for a free | ||
965 | * block within the next XX blocks. | ||
966 | * | ||
967 | * end_goal is more or less random, but it has to be | ||
968 | * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the | ||
969 | * next 64-bit boundary is simple.. | ||
970 | */ | ||
971 | ext4_grpblk_t end_goal = (start + 63) & ~63; | ||
972 | if (end_goal > maxblocks) | ||
973 | end_goal = maxblocks; | ||
974 | here = ext4_find_next_zero_bit(bh->b_data, end_goal, start); | ||
975 | if (here < end_goal && ext4_test_allocatable(here, bh)) | ||
976 | return here; | ||
977 | ext4_debug("Bit not found near goal\n"); | ||
978 | } | ||
979 | |||
980 | here = start; | ||
981 | if (here < 0) | ||
982 | here = 0; | ||
983 | |||
984 | p = ((char *)bh->b_data) + (here >> 3); | ||
985 | r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); | ||
986 | next = (r - ((char *)bh->b_data)) << 3; | ||
987 | |||
988 | if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh)) | ||
989 | return next; | ||
990 | |||
991 | /* | ||
992 | * The bitmap search --- search forward alternately through the actual | ||
993 | * bitmap and the last-committed copy until we find a bit free in | ||
994 | * both | ||
995 | */ | ||
996 | here = bitmap_search_next_usable_block(here, bh, maxblocks); | ||
997 | return here; | ||
998 | } | ||
999 | |||
1000 | /** | ||
1001 | * claim_block() | ||
1002 | * @block: the free block (group relative) to allocate | ||
1003 | * @bh: the bufferhead containts the block group bitmap | ||
1004 | * | ||
1005 | * We think we can allocate this block in this bitmap. Try to set the bit. | ||
1006 | * If that succeeds then check that nobody has allocated and then freed the | ||
1007 | * block since we saw that is was not marked in b_committed_data. If it _was_ | ||
1008 | * allocated and freed then clear the bit in the bitmap again and return | ||
1009 | * zero (failure). | ||
1010 | */ | ||
1011 | static inline int | ||
1012 | claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh) | ||
1013 | { | ||
1014 | struct journal_head *jh = bh2jh(bh); | ||
1015 | int ret; | ||
1016 | |||
1017 | if (ext4_set_bit_atomic(lock, block, bh->b_data)) | ||
1018 | return 0; | ||
1019 | jbd_lock_bh_state(bh); | ||
1020 | if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) { | ||
1021 | ext4_clear_bit_atomic(lock, block, bh->b_data); | ||
1022 | ret = 0; | ||
1023 | } else { | ||
1024 | ret = 1; | ||
1025 | } | ||
1026 | jbd_unlock_bh_state(bh); | ||
1027 | return ret; | ||
1028 | } | ||
1029 | |||
1030 | /** | ||
1031 | * ext4_try_to_allocate() | ||
1032 | * @sb: superblock | ||
1033 | * @handle: handle to this transaction | ||
1034 | * @group: given allocation block group | ||
1035 | * @bitmap_bh: bufferhead holds the block bitmap | ||
1036 | * @grp_goal: given target block within the group | ||
1037 | * @count: target number of blocks to allocate | ||
1038 | * @my_rsv: reservation window | ||
1039 | * | ||
1040 | * Attempt to allocate blocks within a give range. Set the range of allocation | ||
1041 | * first, then find the first free bit(s) from the bitmap (within the range), | ||
1042 | * and at last, allocate the blocks by claiming the found free bit as allocated. | ||
1043 | * | ||
1044 | * To set the range of this allocation: | ||
1045 | * if there is a reservation window, only try to allocate block(s) from the | ||
1046 | * file's own reservation window; | ||
1047 | * Otherwise, the allocation range starts from the give goal block, ends at | ||
1048 | * the block group's last block. | ||
1049 | * | ||
1050 | * If we failed to allocate the desired block then we may end up crossing to a | ||
1051 | * new bitmap. In that case we must release write access to the old one via | ||
1052 | * ext4_journal_release_buffer(), else we'll run out of credits. | ||
1053 | */ | ||
1054 | static ext4_grpblk_t | ||
1055 | ext4_try_to_allocate(struct super_block *sb, handle_t *handle, | ||
1056 | ext4_group_t group, struct buffer_head *bitmap_bh, | ||
1057 | ext4_grpblk_t grp_goal, unsigned long *count, | ||
1058 | struct ext4_reserve_window *my_rsv) | ||
1059 | { | ||
1060 | ext4_fsblk_t group_first_block; | ||
1061 | ext4_grpblk_t start, end; | ||
1062 | unsigned long num = 0; | ||
1063 | |||
1064 | /* we do allocation within the reservation window if we have a window */ | ||
1065 | if (my_rsv) { | ||
1066 | group_first_block = ext4_group_first_block_no(sb, group); | ||
1067 | if (my_rsv->_rsv_start >= group_first_block) | ||
1068 | start = my_rsv->_rsv_start - group_first_block; | ||
1069 | else | ||
1070 | /* reservation window cross group boundary */ | ||
1071 | start = 0; | ||
1072 | end = my_rsv->_rsv_end - group_first_block + 1; | ||
1073 | if (end > EXT4_BLOCKS_PER_GROUP(sb)) | ||
1074 | /* reservation window crosses group boundary */ | ||
1075 | end = EXT4_BLOCKS_PER_GROUP(sb); | ||
1076 | if ((start <= grp_goal) && (grp_goal < end)) | ||
1077 | start = grp_goal; | ||
1078 | else | ||
1079 | grp_goal = -1; | ||
1080 | } else { | ||
1081 | if (grp_goal > 0) | ||
1082 | start = grp_goal; | ||
1083 | else | ||
1084 | start = 0; | ||
1085 | end = EXT4_BLOCKS_PER_GROUP(sb); | ||
1086 | } | ||
1087 | |||
1088 | BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb)); | ||
1089 | |||
1090 | repeat: | ||
1091 | if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) { | ||
1092 | grp_goal = find_next_usable_block(start, bitmap_bh, end); | ||
1093 | if (grp_goal < 0) | ||
1094 | goto fail_access; | ||
1095 | if (!my_rsv) { | ||
1096 | int i; | ||
1097 | |||
1098 | for (i = 0; i < 7 && grp_goal > start && | ||
1099 | ext4_test_allocatable(grp_goal - 1, | ||
1100 | bitmap_bh); | ||
1101 | i++, grp_goal--) | ||
1102 | ; | ||
1103 | } | ||
1104 | } | ||
1105 | start = grp_goal; | ||
1106 | |||
1107 | if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group), | ||
1108 | grp_goal, bitmap_bh)) { | ||
1109 | /* | ||
1110 | * The block was allocated by another thread, or it was | ||
1111 | * allocated and then freed by another thread | ||
1112 | */ | ||
1113 | start++; | ||
1114 | grp_goal++; | ||
1115 | if (start >= end) | ||
1116 | goto fail_access; | ||
1117 | goto repeat; | ||
1118 | } | ||
1119 | num++; | ||
1120 | grp_goal++; | ||
1121 | while (num < *count && grp_goal < end | ||
1122 | && ext4_test_allocatable(grp_goal, bitmap_bh) | ||
1123 | && claim_block(sb_bgl_lock(EXT4_SB(sb), group), | ||
1124 | grp_goal, bitmap_bh)) { | ||
1125 | num++; | ||
1126 | grp_goal++; | ||
1127 | } | ||
1128 | *count = num; | ||
1129 | return grp_goal - num; | ||
1130 | fail_access: | ||
1131 | *count = num; | ||
1132 | return -1; | ||
1133 | } | ||
1134 | |||
1135 | /** | ||
1136 | * find_next_reservable_window(): | ||
1137 | * find a reservable space within the given range. | ||
1138 | * It does not allocate the reservation window for now: | ||
1139 | * alloc_new_reservation() will do the work later. | ||
1140 | * | ||
1141 | * @search_head: the head of the searching list; | ||
1142 | * This is not necessarily the list head of the whole filesystem | ||
1143 | * | ||
1144 | * We have both head and start_block to assist the search | ||
1145 | * for the reservable space. The list starts from head, | ||
1146 | * but we will shift to the place where start_block is, | ||
1147 | * then start from there, when looking for a reservable space. | ||
1148 | * | ||
1149 | * @size: the target new reservation window size | ||
1150 | * | ||
1151 | * @group_first_block: the first block we consider to start | ||
1152 | * the real search from | ||
1153 | * | ||
1154 | * @last_block: | ||
1155 | * the maximum block number that our goal reservable space | ||
1156 | * could start from. This is normally the last block in this | ||
1157 | * group. The search will end when we found the start of next | ||
1158 | * possible reservable space is out of this boundary. | ||
1159 | * This could handle the cross boundary reservation window | ||
1160 | * request. | ||
1161 | * | ||
1162 | * basically we search from the given range, rather than the whole | ||
1163 | * reservation double linked list, (start_block, last_block) | ||
1164 | * to find a free region that is of my size and has not | ||
1165 | * been reserved. | ||
1166 | * | ||
1167 | */ | ||
1168 | static int find_next_reservable_window( | ||
1169 | struct ext4_reserve_window_node *search_head, | ||
1170 | struct ext4_reserve_window_node *my_rsv, | ||
1171 | struct super_block * sb, | ||
1172 | ext4_fsblk_t start_block, | ||
1173 | ext4_fsblk_t last_block) | ||
1174 | { | ||
1175 | struct rb_node *next; | ||
1176 | struct ext4_reserve_window_node *rsv, *prev; | ||
1177 | ext4_fsblk_t cur; | ||
1178 | int size = my_rsv->rsv_goal_size; | ||
1179 | |||
1180 | /* TODO: make the start of the reservation window byte-aligned */ | ||
1181 | /* cur = *start_block & ~7;*/ | ||
1182 | cur = start_block; | ||
1183 | rsv = search_head; | ||
1184 | if (!rsv) | ||
1185 | return -1; | ||
1186 | |||
1187 | while (1) { | ||
1188 | if (cur <= rsv->rsv_end) | ||
1189 | cur = rsv->rsv_end + 1; | ||
1190 | |||
1191 | /* TODO? | ||
1192 | * in the case we could not find a reservable space | ||
1193 | * that is what is expected, during the re-search, we could | ||
1194 | * remember what's the largest reservable space we could have | ||
1195 | * and return that one. | ||
1196 | * | ||
1197 | * For now it will fail if we could not find the reservable | ||
1198 | * space with expected-size (or more)... | ||
1199 | */ | ||
1200 | if (cur > last_block) | ||
1201 | return -1; /* fail */ | ||
1202 | |||
1203 | prev = rsv; | ||
1204 | next = rb_next(&rsv->rsv_node); | ||
1205 | rsv = rb_entry(next,struct ext4_reserve_window_node,rsv_node); | ||
1206 | 607 | ||
1207 | /* | 608 | if (free_blocks - (nblocks + root_blocks + dirty_blocks) < |
1208 | * Reached the last reservation, we can just append to the | 609 | EXT4_FREEBLOCKS_WATERMARK) { |
1209 | * previous one. | 610 | free_blocks = percpu_counter_sum(fbc); |
1210 | */ | 611 | dirty_blocks = percpu_counter_sum(dbc); |
1211 | if (!next) | 612 | if (dirty_blocks < 0) { |
1212 | break; | 613 | printk(KERN_CRIT "Dirty block accounting " |
1213 | 614 | "went wrong %lld\n", | |
1214 | if (cur + size <= rsv->rsv_start) { | 615 | dirty_blocks); |
1215 | /* | ||
1216 | * Found a reserveable space big enough. We could | ||
1217 | * have a reservation across the group boundary here | ||
1218 | */ | ||
1219 | break; | ||
1220 | } | 616 | } |
1221 | } | 617 | } |
1222 | /* | 618 | /* Check whether we have space after |
1223 | * we come here either : | 619 | * accounting for current dirty blocks |
1224 | * when we reach the end of the whole list, | ||
1225 | * and there is empty reservable space after last entry in the list. | ||
1226 | * append it to the end of the list. | ||
1227 | * | ||
1228 | * or we found one reservable space in the middle of the list, | ||
1229 | * return the reservation window that we could append to. | ||
1230 | * succeed. | ||
1231 | */ | 620 | */ |
621 | if (free_blocks < ((root_blocks + nblocks) + dirty_blocks)) | ||
622 | /* we don't have free space */ | ||
623 | return -ENOSPC; | ||
1232 | 624 | ||
1233 | if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) | 625 | /* Add the blocks to nblocks */ |
1234 | rsv_window_remove(sb, my_rsv); | 626 | percpu_counter_add(dbc, nblocks); |
1235 | |||
1236 | /* | ||
1237 | * Let's book the whole avaliable window for now. We will check the | ||
1238 | * disk bitmap later and then, if there are free blocks then we adjust | ||
1239 | * the window size if it's larger than requested. | ||
1240 | * Otherwise, we will remove this node from the tree next time | ||
1241 | * call find_next_reservable_window. | ||
1242 | */ | ||
1243 | my_rsv->rsv_start = cur; | ||
1244 | my_rsv->rsv_end = cur + size - 1; | ||
1245 | my_rsv->rsv_alloc_hit = 0; | ||
1246 | |||
1247 | if (prev != my_rsv) | ||
1248 | ext4_rsv_window_add(sb, my_rsv); | ||
1249 | |||
1250 | return 0; | 627 | return 0; |
1251 | } | 628 | } |
1252 | 629 | ||
1253 | /** | 630 | /** |
1254 | * alloc_new_reservation()--allocate a new reservation window | ||
1255 | * | ||
1256 | * To make a new reservation, we search part of the filesystem | ||
1257 | * reservation list (the list that inside the group). We try to | ||
1258 | * allocate a new reservation window near the allocation goal, | ||
1259 | * or the beginning of the group, if there is no goal. | ||
1260 | * | ||
1261 | * We first find a reservable space after the goal, then from | ||
1262 | * there, we check the bitmap for the first free block after | ||
1263 | * it. If there is no free block until the end of group, then the | ||
1264 | * whole group is full, we failed. Otherwise, check if the free | ||
1265 | * block is inside the expected reservable space, if so, we | ||
1266 | * succeed. | ||
1267 | * If the first free block is outside the reservable space, then | ||
1268 | * start from the first free block, we search for next available | ||
1269 | * space, and go on. | ||
1270 | * | ||
1271 | * on succeed, a new reservation will be found and inserted into the list | ||
1272 | * It contains at least one free block, and it does not overlap with other | ||
1273 | * reservation windows. | ||
1274 | * | ||
1275 | * failed: we failed to find a reservation window in this group | ||
1276 | * | ||
1277 | * @rsv: the reservation | ||
1278 | * | ||
1279 | * @grp_goal: The goal (group-relative). It is where the search for a | ||
1280 | * free reservable space should start from. | ||
1281 | * if we have a grp_goal(grp_goal >0 ), then start from there, | ||
1282 | * no grp_goal(grp_goal = -1), we start from the first block | ||
1283 | * of the group. | ||
1284 | * | ||
1285 | * @sb: the super block | ||
1286 | * @group: the group we are trying to allocate in | ||
1287 | * @bitmap_bh: the block group block bitmap | ||
1288 | * | ||
1289 | */ | ||
1290 | static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv, | ||
1291 | ext4_grpblk_t grp_goal, struct super_block *sb, | ||
1292 | ext4_group_t group, struct buffer_head *bitmap_bh) | ||
1293 | { | ||
1294 | struct ext4_reserve_window_node *search_head; | ||
1295 | ext4_fsblk_t group_first_block, group_end_block, start_block; | ||
1296 | ext4_grpblk_t first_free_block; | ||
1297 | struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root; | ||
1298 | unsigned long size; | ||
1299 | int ret; | ||
1300 | spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; | ||
1301 | |||
1302 | group_first_block = ext4_group_first_block_no(sb, group); | ||
1303 | group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); | ||
1304 | |||
1305 | if (grp_goal < 0) | ||
1306 | start_block = group_first_block; | ||
1307 | else | ||
1308 | start_block = grp_goal + group_first_block; | ||
1309 | |||
1310 | size = my_rsv->rsv_goal_size; | ||
1311 | |||
1312 | if (!rsv_is_empty(&my_rsv->rsv_window)) { | ||
1313 | /* | ||
1314 | * if the old reservation is cross group boundary | ||
1315 | * and if the goal is inside the old reservation window, | ||
1316 | * we will come here when we just failed to allocate from | ||
1317 | * the first part of the window. We still have another part | ||
1318 | * that belongs to the next group. In this case, there is no | ||
1319 | * point to discard our window and try to allocate a new one | ||
1320 | * in this group(which will fail). we should | ||
1321 | * keep the reservation window, just simply move on. | ||
1322 | * | ||
1323 | * Maybe we could shift the start block of the reservation | ||
1324 | * window to the first block of next group. | ||
1325 | */ | ||
1326 | |||
1327 | if ((my_rsv->rsv_start <= group_end_block) && | ||
1328 | (my_rsv->rsv_end > group_end_block) && | ||
1329 | (start_block >= my_rsv->rsv_start)) | ||
1330 | return -1; | ||
1331 | |||
1332 | if ((my_rsv->rsv_alloc_hit > | ||
1333 | (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { | ||
1334 | /* | ||
1335 | * if the previously allocation hit ratio is | ||
1336 | * greater than 1/2, then we double the size of | ||
1337 | * the reservation window the next time, | ||
1338 | * otherwise we keep the same size window | ||
1339 | */ | ||
1340 | size = size * 2; | ||
1341 | if (size > EXT4_MAX_RESERVE_BLOCKS) | ||
1342 | size = EXT4_MAX_RESERVE_BLOCKS; | ||
1343 | my_rsv->rsv_goal_size= size; | ||
1344 | } | ||
1345 | } | ||
1346 | |||
1347 | spin_lock(rsv_lock); | ||
1348 | /* | ||
1349 | * shift the search start to the window near the goal block | ||
1350 | */ | ||
1351 | search_head = search_reserve_window(fs_rsv_root, start_block); | ||
1352 | |||
1353 | /* | ||
1354 | * find_next_reservable_window() simply finds a reservable window | ||
1355 | * inside the given range(start_block, group_end_block). | ||
1356 | * | ||
1357 | * To make sure the reservation window has a free bit inside it, we | ||
1358 | * need to check the bitmap after we found a reservable window. | ||
1359 | */ | ||
1360 | retry: | ||
1361 | ret = find_next_reservable_window(search_head, my_rsv, sb, | ||
1362 | start_block, group_end_block); | ||
1363 | |||
1364 | if (ret == -1) { | ||
1365 | if (!rsv_is_empty(&my_rsv->rsv_window)) | ||
1366 | rsv_window_remove(sb, my_rsv); | ||
1367 | spin_unlock(rsv_lock); | ||
1368 | return -1; | ||
1369 | } | ||
1370 | |||
1371 | /* | ||
1372 | * On success, find_next_reservable_window() returns the | ||
1373 | * reservation window where there is a reservable space after it. | ||
1374 | * Before we reserve this reservable space, we need | ||
1375 | * to make sure there is at least a free block inside this region. | ||
1376 | * | ||
1377 | * searching the first free bit on the block bitmap and copy of | ||
1378 | * last committed bitmap alternatively, until we found a allocatable | ||
1379 | * block. Search start from the start block of the reservable space | ||
1380 | * we just found. | ||
1381 | */ | ||
1382 | spin_unlock(rsv_lock); | ||
1383 | first_free_block = bitmap_search_next_usable_block( | ||
1384 | my_rsv->rsv_start - group_first_block, | ||
1385 | bitmap_bh, group_end_block - group_first_block + 1); | ||
1386 | |||
1387 | if (first_free_block < 0) { | ||
1388 | /* | ||
1389 | * no free block left on the bitmap, no point | ||
1390 | * to reserve the space. return failed. | ||
1391 | */ | ||
1392 | spin_lock(rsv_lock); | ||
1393 | if (!rsv_is_empty(&my_rsv->rsv_window)) | ||
1394 | rsv_window_remove(sb, my_rsv); | ||
1395 | spin_unlock(rsv_lock); | ||
1396 | return -1; /* failed */ | ||
1397 | } | ||
1398 | |||
1399 | start_block = first_free_block + group_first_block; | ||
1400 | /* | ||
1401 | * check if the first free block is within the | ||
1402 | * free space we just reserved | ||
1403 | */ | ||
1404 | if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) | ||
1405 | return 0; /* success */ | ||
1406 | /* | ||
1407 | * if the first free bit we found is out of the reservable space | ||
1408 | * continue search for next reservable space, | ||
1409 | * start from where the free block is, | ||
1410 | * we also shift the list head to where we stopped last time | ||
1411 | */ | ||
1412 | search_head = my_rsv; | ||
1413 | spin_lock(rsv_lock); | ||
1414 | goto retry; | ||
1415 | } | ||
1416 | |||
1417 | /** | ||
1418 | * try_to_extend_reservation() | ||
1419 | * @my_rsv: given reservation window | ||
1420 | * @sb: super block | ||
1421 | * @size: the delta to extend | ||
1422 | * | ||
1423 | * Attempt to expand the reservation window large enough to have | ||
1424 | * required number of free blocks | ||
1425 | * | ||
1426 | * Since ext4_try_to_allocate() will always allocate blocks within | ||
1427 | * the reservation window range, if the window size is too small, | ||
1428 | * multiple blocks allocation has to stop at the end of the reservation | ||
1429 | * window. To make this more efficient, given the total number of | ||
1430 | * blocks needed and the current size of the window, we try to | ||
1431 | * expand the reservation window size if necessary on a best-effort | ||
1432 | * basis before ext4_new_blocks() tries to allocate blocks, | ||
1433 | */ | ||
1434 | static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv, | ||
1435 | struct super_block *sb, int size) | ||
1436 | { | ||
1437 | struct ext4_reserve_window_node *next_rsv; | ||
1438 | struct rb_node *next; | ||
1439 | spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; | ||
1440 | |||
1441 | if (!spin_trylock(rsv_lock)) | ||
1442 | return; | ||
1443 | |||
1444 | next = rb_next(&my_rsv->rsv_node); | ||
1445 | |||
1446 | if (!next) | ||
1447 | my_rsv->rsv_end += size; | ||
1448 | else { | ||
1449 | next_rsv = rb_entry(next, struct ext4_reserve_window_node, rsv_node); | ||
1450 | |||
1451 | if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) | ||
1452 | my_rsv->rsv_end += size; | ||
1453 | else | ||
1454 | my_rsv->rsv_end = next_rsv->rsv_start - 1; | ||
1455 | } | ||
1456 | spin_unlock(rsv_lock); | ||
1457 | } | ||
1458 | |||
1459 | /** | ||
1460 | * ext4_try_to_allocate_with_rsv() | ||
1461 | * @sb: superblock | ||
1462 | * @handle: handle to this transaction | ||
1463 | * @group: given allocation block group | ||
1464 | * @bitmap_bh: bufferhead holds the block bitmap | ||
1465 | * @grp_goal: given target block within the group | ||
1466 | * @count: target number of blocks to allocate | ||
1467 | * @my_rsv: reservation window | ||
1468 | * @errp: pointer to store the error code | ||
1469 | * | ||
1470 | * This is the main function used to allocate a new block and its reservation | ||
1471 | * window. | ||
1472 | * | ||
1473 | * Each time when a new block allocation is need, first try to allocate from | ||
1474 | * its own reservation. If it does not have a reservation window, instead of | ||
1475 | * looking for a free bit on bitmap first, then look up the reservation list to | ||
1476 | * see if it is inside somebody else's reservation window, we try to allocate a | ||
1477 | * reservation window for it starting from the goal first. Then do the block | ||
1478 | * allocation within the reservation window. | ||
1479 | * | ||
1480 | * This will avoid keeping on searching the reservation list again and | ||
1481 | * again when somebody is looking for a free block (without | ||
1482 | * reservation), and there are lots of free blocks, but they are all | ||
1483 | * being reserved. | ||
1484 | * | ||
1485 | * We use a red-black tree for the per-filesystem reservation list. | ||
1486 | * | ||
1487 | */ | ||
1488 | static ext4_grpblk_t | ||
1489 | ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, | ||
1490 | ext4_group_t group, struct buffer_head *bitmap_bh, | ||
1491 | ext4_grpblk_t grp_goal, | ||
1492 | struct ext4_reserve_window_node * my_rsv, | ||
1493 | unsigned long *count, int *errp) | ||
1494 | { | ||
1495 | ext4_fsblk_t group_first_block, group_last_block; | ||
1496 | ext4_grpblk_t ret = 0; | ||
1497 | int fatal; | ||
1498 | unsigned long num = *count; | ||
1499 | |||
1500 | *errp = 0; | ||
1501 | |||
1502 | /* | ||
1503 | * Make sure we use undo access for the bitmap, because it is critical | ||
1504 | * that we do the frozen_data COW on bitmap buffers in all cases even | ||
1505 | * if the buffer is in BJ_Forget state in the committing transaction. | ||
1506 | */ | ||
1507 | BUFFER_TRACE(bitmap_bh, "get undo access for new block"); | ||
1508 | fatal = ext4_journal_get_undo_access(handle, bitmap_bh); | ||
1509 | if (fatal) { | ||
1510 | *errp = fatal; | ||
1511 | return -1; | ||
1512 | } | ||
1513 | |||
1514 | /* | ||
1515 | * we don't deal with reservation when | ||
1516 | * filesystem is mounted without reservation | ||
1517 | * or the file is not a regular file | ||
1518 | * or last attempt to allocate a block with reservation turned on failed | ||
1519 | */ | ||
1520 | if (my_rsv == NULL ) { | ||
1521 | ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, | ||
1522 | grp_goal, count, NULL); | ||
1523 | goto out; | ||
1524 | } | ||
1525 | /* | ||
1526 | * grp_goal is a group relative block number (if there is a goal) | ||
1527 | * 0 <= grp_goal < EXT4_BLOCKS_PER_GROUP(sb) | ||
1528 | * first block is a filesystem wide block number | ||
1529 | * first block is the block number of the first block in this group | ||
1530 | */ | ||
1531 | group_first_block = ext4_group_first_block_no(sb, group); | ||
1532 | group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); | ||
1533 | |||
1534 | /* | ||
1535 | * Basically we will allocate a new block from inode's reservation | ||
1536 | * window. | ||
1537 | * | ||
1538 | * We need to allocate a new reservation window, if: | ||
1539 | * a) inode does not have a reservation window; or | ||
1540 | * b) last attempt to allocate a block from existing reservation | ||
1541 | * failed; or | ||
1542 | * c) we come here with a goal and with a reservation window | ||
1543 | * | ||
1544 | * We do not need to allocate a new reservation window if we come here | ||
1545 | * at the beginning with a goal and the goal is inside the window, or | ||
1546 | * we don't have a goal but already have a reservation window. | ||
1547 | * then we could go to allocate from the reservation window directly. | ||
1548 | */ | ||
1549 | while (1) { | ||
1550 | if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || | ||
1551 | !goal_in_my_reservation(&my_rsv->rsv_window, | ||
1552 | grp_goal, group, sb)) { | ||
1553 | if (my_rsv->rsv_goal_size < *count) | ||
1554 | my_rsv->rsv_goal_size = *count; | ||
1555 | ret = alloc_new_reservation(my_rsv, grp_goal, sb, | ||
1556 | group, bitmap_bh); | ||
1557 | if (ret < 0) | ||
1558 | break; /* failed */ | ||
1559 | |||
1560 | if (!goal_in_my_reservation(&my_rsv->rsv_window, | ||
1561 | grp_goal, group, sb)) | ||
1562 | grp_goal = -1; | ||
1563 | } else if (grp_goal >= 0) { | ||
1564 | int curr = my_rsv->rsv_end - | ||
1565 | (grp_goal + group_first_block) + 1; | ||
1566 | |||
1567 | if (curr < *count) | ||
1568 | try_to_extend_reservation(my_rsv, sb, | ||
1569 | *count - curr); | ||
1570 | } | ||
1571 | |||
1572 | if ((my_rsv->rsv_start > group_last_block) || | ||
1573 | (my_rsv->rsv_end < group_first_block)) { | ||
1574 | rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1); | ||
1575 | BUG(); | ||
1576 | } | ||
1577 | ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, | ||
1578 | grp_goal, &num, &my_rsv->rsv_window); | ||
1579 | if (ret >= 0) { | ||
1580 | my_rsv->rsv_alloc_hit += num; | ||
1581 | *count = num; | ||
1582 | break; /* succeed */ | ||
1583 | } | ||
1584 | num = *count; | ||
1585 | } | ||
1586 | out: | ||
1587 | if (ret >= 0) { | ||
1588 | BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for " | ||
1589 | "bitmap block"); | ||
1590 | fatal = ext4_journal_dirty_metadata(handle, bitmap_bh); | ||
1591 | if (fatal) { | ||
1592 | *errp = fatal; | ||
1593 | return -1; | ||
1594 | } | ||
1595 | return ret; | ||
1596 | } | ||
1597 | |||
1598 | BUFFER_TRACE(bitmap_bh, "journal_release_buffer"); | ||
1599 | ext4_journal_release_buffer(handle, bitmap_bh); | ||
1600 | return ret; | ||
1601 | } | ||
1602 | |||
1603 | /** | ||
1604 | * ext4_has_free_blocks() | 631 | * ext4_has_free_blocks() |
1605 | * @sbi: in-core super block structure. | 632 | * @sbi: in-core super block structure. |
1606 | * @nblocks: number of neeed blocks | 633 | * @nblocks: number of neeed blocks |
@@ -1610,26 +637,34 @@ out: | |||
1610 | * On success, return nblocks | 637 | * On success, return nblocks |
1611 | */ | 638 | */ |
1612 | ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, | 639 | ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, |
1613 | ext4_fsblk_t nblocks) | 640 | s64 nblocks) |
1614 | { | 641 | { |
1615 | ext4_fsblk_t free_blocks; | 642 | s64 free_blocks, dirty_blocks; |
1616 | ext4_fsblk_t root_blocks = 0; | 643 | s64 root_blocks = 0; |
644 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; | ||
645 | struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; | ||
1617 | 646 | ||
1618 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | 647 | free_blocks = percpu_counter_read_positive(fbc); |
648 | dirty_blocks = percpu_counter_read_positive(dbc); | ||
1619 | 649 | ||
1620 | if (!capable(CAP_SYS_RESOURCE) && | 650 | if (!capable(CAP_SYS_RESOURCE) && |
1621 | sbi->s_resuid != current->fsuid && | 651 | sbi->s_resuid != current->fsuid && |
1622 | (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) | 652 | (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) |
1623 | root_blocks = ext4_r_blocks_count(sbi->s_es); | 653 | root_blocks = ext4_r_blocks_count(sbi->s_es); |
1624 | #ifdef CONFIG_SMP | 654 | |
1625 | if (free_blocks - root_blocks < FBC_BATCH) | 655 | if (free_blocks - (nblocks + root_blocks + dirty_blocks) < |
1626 | free_blocks = | 656 | EXT4_FREEBLOCKS_WATERMARK) { |
1627 | percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); | 657 | free_blocks = percpu_counter_sum(fbc); |
1628 | #endif | 658 | dirty_blocks = percpu_counter_sum(dbc); |
1629 | if (free_blocks - root_blocks < nblocks) | 659 | } |
1630 | return free_blocks - root_blocks; | 660 | if (free_blocks <= (root_blocks + dirty_blocks)) |
661 | /* we don't have free space */ | ||
662 | return 0; | ||
663 | |||
664 | if (free_blocks - (root_blocks + dirty_blocks) < nblocks) | ||
665 | return free_blocks - (root_blocks + dirty_blocks); | ||
1631 | return nblocks; | 666 | return nblocks; |
1632 | } | 667 | } |
1633 | 668 | ||
1634 | 669 | ||
1635 | /** | 670 | /** |
@@ -1654,303 +689,6 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) | |||
1654 | return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); | 689 | return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); |
1655 | } | 690 | } |
1656 | 691 | ||
1657 | /** | ||
1658 | * ext4_old_new_blocks() -- core block bitmap based block allocation function | ||
1659 | * | ||
1660 | * @handle: handle to this transaction | ||
1661 | * @inode: file inode | ||
1662 | * @goal: given target block(filesystem wide) | ||
1663 | * @count: target number of blocks to allocate | ||
1664 | * @errp: error code | ||
1665 | * | ||
1666 | * ext4_old_new_blocks uses a goal block to assist allocation and look up | ||
1667 | * the block bitmap directly to do block allocation. It tries to | ||
1668 | * allocate block(s) from the block group contains the goal block first. If | ||
1669 | * that fails, it will try to allocate block(s) from other block groups | ||
1670 | * without any specific goal block. | ||
1671 | * | ||
1672 | * This function is called when -o nomballoc mount option is enabled | ||
1673 | * | ||
1674 | */ | ||
1675 | ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, | ||
1676 | ext4_fsblk_t goal, unsigned long *count, int *errp) | ||
1677 | { | ||
1678 | struct buffer_head *bitmap_bh = NULL; | ||
1679 | struct buffer_head *gdp_bh; | ||
1680 | ext4_group_t group_no; | ||
1681 | ext4_group_t goal_group; | ||
1682 | ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */ | ||
1683 | ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ | ||
1684 | ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */ | ||
1685 | ext4_group_t bgi; /* blockgroup iteration index */ | ||
1686 | int fatal = 0, err; | ||
1687 | int performed_allocation = 0; | ||
1688 | ext4_grpblk_t free_blocks; /* number of free blocks in a group */ | ||
1689 | struct super_block *sb; | ||
1690 | struct ext4_group_desc *gdp; | ||
1691 | struct ext4_super_block *es; | ||
1692 | struct ext4_sb_info *sbi; | ||
1693 | struct ext4_reserve_window_node *my_rsv = NULL; | ||
1694 | struct ext4_block_alloc_info *block_i; | ||
1695 | unsigned short windowsz = 0; | ||
1696 | ext4_group_t ngroups; | ||
1697 | unsigned long num = *count; | ||
1698 | |||
1699 | sb = inode->i_sb; | ||
1700 | if (!sb) { | ||
1701 | *errp = -ENODEV; | ||
1702 | printk("ext4_new_block: nonexistent device"); | ||
1703 | return 0; | ||
1704 | } | ||
1705 | |||
1706 | sbi = EXT4_SB(sb); | ||
1707 | if (!EXT4_I(inode)->i_delalloc_reserved_flag) { | ||
1708 | /* | ||
1709 | * With delalloc we already reserved the blocks | ||
1710 | */ | ||
1711 | *count = ext4_has_free_blocks(sbi, *count); | ||
1712 | } | ||
1713 | if (*count == 0) { | ||
1714 | *errp = -ENOSPC; | ||
1715 | return 0; /*return with ENOSPC error */ | ||
1716 | } | ||
1717 | num = *count; | ||
1718 | |||
1719 | /* | ||
1720 | * Check quota for allocation of this block. | ||
1721 | */ | ||
1722 | if (DQUOT_ALLOC_BLOCK(inode, num)) { | ||
1723 | *errp = -EDQUOT; | ||
1724 | return 0; | ||
1725 | } | ||
1726 | |||
1727 | sbi = EXT4_SB(sb); | ||
1728 | es = EXT4_SB(sb)->s_es; | ||
1729 | ext4_debug("goal=%llu.\n", goal); | ||
1730 | /* | ||
1731 | * Allocate a block from reservation only when | ||
1732 | * filesystem is mounted with reservation(default,-o reservation), and | ||
1733 | * it's a regular file, and | ||
1734 | * the desired window size is greater than 0 (One could use ioctl | ||
1735 | * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off | ||
1736 | * reservation on that particular file) | ||
1737 | */ | ||
1738 | block_i = EXT4_I(inode)->i_block_alloc_info; | ||
1739 | if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) | ||
1740 | my_rsv = &block_i->rsv_window_node; | ||
1741 | |||
1742 | /* | ||
1743 | * First, test whether the goal block is free. | ||
1744 | */ | ||
1745 | if (goal < le32_to_cpu(es->s_first_data_block) || | ||
1746 | goal >= ext4_blocks_count(es)) | ||
1747 | goal = le32_to_cpu(es->s_first_data_block); | ||
1748 | ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk); | ||
1749 | goal_group = group_no; | ||
1750 | retry_alloc: | ||
1751 | gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); | ||
1752 | if (!gdp) | ||
1753 | goto io_error; | ||
1754 | |||
1755 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); | ||
1756 | /* | ||
1757 | * if there is not enough free blocks to make a new resevation | ||
1758 | * turn off reservation for this allocation | ||
1759 | */ | ||
1760 | if (my_rsv && (free_blocks < windowsz) | ||
1761 | && (rsv_is_empty(&my_rsv->rsv_window))) | ||
1762 | my_rsv = NULL; | ||
1763 | |||
1764 | if (free_blocks > 0) { | ||
1765 | bitmap_bh = ext4_read_block_bitmap(sb, group_no); | ||
1766 | if (!bitmap_bh) | ||
1767 | goto io_error; | ||
1768 | grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, | ||
1769 | group_no, bitmap_bh, grp_target_blk, | ||
1770 | my_rsv, &num, &fatal); | ||
1771 | if (fatal) | ||
1772 | goto out; | ||
1773 | if (grp_alloc_blk >= 0) | ||
1774 | goto allocated; | ||
1775 | } | ||
1776 | |||
1777 | ngroups = EXT4_SB(sb)->s_groups_count; | ||
1778 | smp_rmb(); | ||
1779 | |||
1780 | /* | ||
1781 | * Now search the rest of the groups. We assume that | ||
1782 | * group_no and gdp correctly point to the last group visited. | ||
1783 | */ | ||
1784 | for (bgi = 0; bgi < ngroups; bgi++) { | ||
1785 | group_no++; | ||
1786 | if (group_no >= ngroups) | ||
1787 | group_no = 0; | ||
1788 | gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); | ||
1789 | if (!gdp) | ||
1790 | goto io_error; | ||
1791 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); | ||
1792 | /* | ||
1793 | * skip this group if the number of | ||
1794 | * free blocks is less than half of the reservation | ||
1795 | * window size. | ||
1796 | */ | ||
1797 | if (free_blocks <= (windowsz/2)) | ||
1798 | continue; | ||
1799 | |||
1800 | brelse(bitmap_bh); | ||
1801 | bitmap_bh = ext4_read_block_bitmap(sb, group_no); | ||
1802 | if (!bitmap_bh) | ||
1803 | goto io_error; | ||
1804 | /* | ||
1805 | * try to allocate block(s) from this group, without a goal(-1). | ||
1806 | */ | ||
1807 | grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, | ||
1808 | group_no, bitmap_bh, -1, my_rsv, | ||
1809 | &num, &fatal); | ||
1810 | if (fatal) | ||
1811 | goto out; | ||
1812 | if (grp_alloc_blk >= 0) | ||
1813 | goto allocated; | ||
1814 | } | ||
1815 | /* | ||
1816 | * We may end up a bogus ealier ENOSPC error due to | ||
1817 | * filesystem is "full" of reservations, but | ||
1818 | * there maybe indeed free blocks avaliable on disk | ||
1819 | * In this case, we just forget about the reservations | ||
1820 | * just do block allocation as without reservations. | ||
1821 | */ | ||
1822 | if (my_rsv) { | ||
1823 | my_rsv = NULL; | ||
1824 | windowsz = 0; | ||
1825 | group_no = goal_group; | ||
1826 | goto retry_alloc; | ||
1827 | } | ||
1828 | /* No space left on the device */ | ||
1829 | *errp = -ENOSPC; | ||
1830 | goto out; | ||
1831 | |||
1832 | allocated: | ||
1833 | |||
1834 | ext4_debug("using block group %lu(%d)\n", | ||
1835 | group_no, gdp->bg_free_blocks_count); | ||
1836 | |||
1837 | BUFFER_TRACE(gdp_bh, "get_write_access"); | ||
1838 | fatal = ext4_journal_get_write_access(handle, gdp_bh); | ||
1839 | if (fatal) | ||
1840 | goto out; | ||
1841 | |||
1842 | ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no); | ||
1843 | |||
1844 | if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || | ||
1845 | in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) || | ||
1846 | in_range(ret_block, ext4_inode_table(sb, gdp), | ||
1847 | EXT4_SB(sb)->s_itb_per_group) || | ||
1848 | in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), | ||
1849 | EXT4_SB(sb)->s_itb_per_group)) { | ||
1850 | ext4_error(sb, "ext4_new_block", | ||
1851 | "Allocating block in system zone - " | ||
1852 | "blocks from %llu, length %lu", | ||
1853 | ret_block, num); | ||
1854 | /* | ||
1855 | * claim_block marked the blocks we allocated | ||
1856 | * as in use. So we may want to selectively | ||
1857 | * mark some of the blocks as free | ||
1858 | */ | ||
1859 | goto retry_alloc; | ||
1860 | } | ||
1861 | |||
1862 | performed_allocation = 1; | ||
1863 | |||
1864 | #ifdef CONFIG_JBD2_DEBUG | ||
1865 | { | ||
1866 | struct buffer_head *debug_bh; | ||
1867 | |||
1868 | /* Record bitmap buffer state in the newly allocated block */ | ||
1869 | debug_bh = sb_find_get_block(sb, ret_block); | ||
1870 | if (debug_bh) { | ||
1871 | BUFFER_TRACE(debug_bh, "state when allocated"); | ||
1872 | BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state"); | ||
1873 | brelse(debug_bh); | ||
1874 | } | ||
1875 | } | ||
1876 | jbd_lock_bh_state(bitmap_bh); | ||
1877 | spin_lock(sb_bgl_lock(sbi, group_no)); | ||
1878 | if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) { | ||
1879 | int i; | ||
1880 | |||
1881 | for (i = 0; i < num; i++) { | ||
1882 | if (ext4_test_bit(grp_alloc_blk+i, | ||
1883 | bh2jh(bitmap_bh)->b_committed_data)) { | ||
1884 | printk("%s: block was unexpectedly set in " | ||
1885 | "b_committed_data\n", __func__); | ||
1886 | } | ||
1887 | } | ||
1888 | } | ||
1889 | ext4_debug("found bit %d\n", grp_alloc_blk); | ||
1890 | spin_unlock(sb_bgl_lock(sbi, group_no)); | ||
1891 | jbd_unlock_bh_state(bitmap_bh); | ||
1892 | #endif | ||
1893 | |||
1894 | if (ret_block + num - 1 >= ext4_blocks_count(es)) { | ||
1895 | ext4_error(sb, "ext4_new_block", | ||
1896 | "block(%llu) >= blocks count(%llu) - " | ||
1897 | "block_group = %lu, es == %p ", ret_block, | ||
1898 | ext4_blocks_count(es), group_no, es); | ||
1899 | goto out; | ||
1900 | } | ||
1901 | |||
1902 | /* | ||
1903 | * It is up to the caller to add the new buffer to a journal | ||
1904 | * list of some description. We don't know in advance whether | ||
1905 | * the caller wants to use it as metadata or data. | ||
1906 | */ | ||
1907 | spin_lock(sb_bgl_lock(sbi, group_no)); | ||
1908 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) | ||
1909 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | ||
1910 | le16_add_cpu(&gdp->bg_free_blocks_count, -num); | ||
1911 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); | ||
1912 | spin_unlock(sb_bgl_lock(sbi, group_no)); | ||
1913 | if (!EXT4_I(inode)->i_delalloc_reserved_flag) | ||
1914 | percpu_counter_sub(&sbi->s_freeblocks_counter, num); | ||
1915 | |||
1916 | if (sbi->s_log_groups_per_flex) { | ||
1917 | ext4_group_t flex_group = ext4_flex_group(sbi, group_no); | ||
1918 | spin_lock(sb_bgl_lock(sbi, flex_group)); | ||
1919 | sbi->s_flex_groups[flex_group].free_blocks -= num; | ||
1920 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | ||
1921 | } | ||
1922 | |||
1923 | BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); | ||
1924 | err = ext4_journal_dirty_metadata(handle, gdp_bh); | ||
1925 | if (!fatal) | ||
1926 | fatal = err; | ||
1927 | |||
1928 | sb->s_dirt = 1; | ||
1929 | if (fatal) | ||
1930 | goto out; | ||
1931 | |||
1932 | *errp = 0; | ||
1933 | brelse(bitmap_bh); | ||
1934 | DQUOT_FREE_BLOCK(inode, *count-num); | ||
1935 | *count = num; | ||
1936 | return ret_block; | ||
1937 | |||
1938 | io_error: | ||
1939 | *errp = -EIO; | ||
1940 | out: | ||
1941 | if (fatal) { | ||
1942 | *errp = fatal; | ||
1943 | ext4_std_error(sb, fatal); | ||
1944 | } | ||
1945 | /* | ||
1946 | * Undo the block allocation | ||
1947 | */ | ||
1948 | if (!performed_allocation) | ||
1949 | DQUOT_FREE_BLOCK(inode, *count); | ||
1950 | brelse(bitmap_bh); | ||
1951 | return 0; | ||
1952 | } | ||
1953 | |||
1954 | #define EXT4_META_BLOCK 0x1 | 692 | #define EXT4_META_BLOCK 0x1 |
1955 | 693 | ||
1956 | static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, | 694 | static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, |
@@ -1960,10 +698,6 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, | |||
1960 | struct ext4_allocation_request ar; | 698 | struct ext4_allocation_request ar; |
1961 | ext4_fsblk_t ret; | 699 | ext4_fsblk_t ret; |
1962 | 700 | ||
1963 | if (!test_opt(inode->i_sb, MBALLOC)) { | ||
1964 | return ext4_old_new_blocks(handle, inode, goal, count, errp); | ||
1965 | } | ||
1966 | |||
1967 | memset(&ar, 0, sizeof(ar)); | 701 | memset(&ar, 0, sizeof(ar)); |
1968 | /* Fill with neighbour allocated blocks */ | 702 | /* Fill with neighbour allocated blocks */ |
1969 | 703 | ||
@@ -2005,7 +739,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
2005 | /* | 739 | /* |
2006 | * Account for the allocated meta blocks | 740 | * Account for the allocated meta blocks |
2007 | */ | 741 | */ |
2008 | if (!(*errp)) { | 742 | if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) { |
2009 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 743 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
2010 | EXT4_I(inode)->i_allocated_meta_blocks += *count; | 744 | EXT4_I(inode)->i_allocated_meta_blocks += *count; |
2011 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 745 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
@@ -2090,10 +824,9 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
2090 | bitmap_count += x; | 824 | bitmap_count += x; |
2091 | } | 825 | } |
2092 | brelse(bitmap_bh); | 826 | brelse(bitmap_bh); |
2093 | printk("ext4_count_free_blocks: stored = %llu" | 827 | printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu" |
2094 | ", computed = %llu, %llu\n", | 828 | ", computed = %llu, %llu\n", ext4_free_blocks_count(es), |
2095 | ext4_free_blocks_count(es), | 829 | desc_count, bitmap_count); |
2096 | desc_count, bitmap_count); | ||
2097 | return bitmap_count; | 830 | return bitmap_count; |
2098 | #else | 831 | #else |
2099 | desc_count = 0; | 832 | desc_count = 0; |
@@ -2180,8 +913,9 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) | |||
2180 | 913 | ||
2181 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || | 914 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || |
2182 | metagroup < first_meta_bg) | 915 | metagroup < first_meta_bg) |
2183 | return ext4_bg_num_gdb_nometa(sb,group); | 916 | return ext4_bg_num_gdb_nometa(sb, group); |
2184 | 917 | ||
2185 | return ext4_bg_num_gdb_meta(sb,group); | 918 | return ext4_bg_num_gdb_meta(sb,group); |
2186 | 919 | ||
2187 | } | 920 | } |
921 | |||
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index d37ea6750454..0a7a6663c190 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c | |||
@@ -15,17 +15,17 @@ | |||
15 | 15 | ||
16 | static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; | 16 | static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; |
17 | 17 | ||
18 | unsigned long ext4_count_free (struct buffer_head * map, unsigned int numchars) | 18 | unsigned long ext4_count_free(struct buffer_head *map, unsigned int numchars) |
19 | { | 19 | { |
20 | unsigned int i; | 20 | unsigned int i; |
21 | unsigned long sum = 0; | 21 | unsigned long sum = 0; |
22 | 22 | ||
23 | if (!map) | 23 | if (!map) |
24 | return (0); | 24 | return 0; |
25 | for (i = 0; i < numchars; i++) | 25 | for (i = 0; i < numchars; i++) |
26 | sum += nibblemap[map->b_data[i] & 0xf] + | 26 | sum += nibblemap[map->b_data[i] & 0xf] + |
27 | nibblemap[(map->b_data[i] >> 4) & 0xf]; | 27 | nibblemap[(map->b_data[i] >> 4) & 0xf]; |
28 | return (sum); | 28 | return sum; |
29 | } | 29 | } |
30 | 30 | ||
31 | #endif /* EXT4FS_DEBUG */ | 31 | #endif /* EXT4FS_DEBUG */ |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index d3d23d73c08b..3ca6a2b7632d 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -33,10 +33,10 @@ static unsigned char ext4_filetype_table[] = { | |||
33 | }; | 33 | }; |
34 | 34 | ||
35 | static int ext4_readdir(struct file *, void *, filldir_t); | 35 | static int ext4_readdir(struct file *, void *, filldir_t); |
36 | static int ext4_dx_readdir(struct file * filp, | 36 | static int ext4_dx_readdir(struct file *filp, |
37 | void * dirent, filldir_t filldir); | 37 | void *dirent, filldir_t filldir); |
38 | static int ext4_release_dir (struct inode * inode, | 38 | static int ext4_release_dir(struct inode *inode, |
39 | struct file * filp); | 39 | struct file *filp); |
40 | 40 | ||
41 | const struct file_operations ext4_dir_operations = { | 41 | const struct file_operations ext4_dir_operations = { |
42 | .llseek = generic_file_llseek, | 42 | .llseek = generic_file_llseek, |
@@ -61,12 +61,12 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) | |||
61 | } | 61 | } |
62 | 62 | ||
63 | 63 | ||
64 | int ext4_check_dir_entry (const char * function, struct inode * dir, | 64 | int ext4_check_dir_entry(const char *function, struct inode *dir, |
65 | struct ext4_dir_entry_2 * de, | 65 | struct ext4_dir_entry_2 *de, |
66 | struct buffer_head * bh, | 66 | struct buffer_head *bh, |
67 | unsigned long offset) | 67 | unsigned long offset) |
68 | { | 68 | { |
69 | const char * error_msg = NULL; | 69 | const char *error_msg = NULL; |
70 | const int rlen = ext4_rec_len_from_disk(de->rec_len); | 70 | const int rlen = ext4_rec_len_from_disk(de->rec_len); |
71 | 71 | ||
72 | if (rlen < EXT4_DIR_REC_LEN(1)) | 72 | if (rlen < EXT4_DIR_REC_LEN(1)) |
@@ -82,7 +82,7 @@ int ext4_check_dir_entry (const char * function, struct inode * dir, | |||
82 | error_msg = "inode out of bounds"; | 82 | error_msg = "inode out of bounds"; |
83 | 83 | ||
84 | if (error_msg != NULL) | 84 | if (error_msg != NULL) |
85 | ext4_error (dir->i_sb, function, | 85 | ext4_error(dir->i_sb, function, |
86 | "bad entry in directory #%lu: %s - " | 86 | "bad entry in directory #%lu: %s - " |
87 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", | 87 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", |
88 | dir->i_ino, error_msg, offset, | 88 | dir->i_ino, error_msg, offset, |
@@ -91,8 +91,8 @@ int ext4_check_dir_entry (const char * function, struct inode * dir, | |||
91 | return error_msg == NULL ? 1 : 0; | 91 | return error_msg == NULL ? 1 : 0; |
92 | } | 92 | } |
93 | 93 | ||
94 | static int ext4_readdir(struct file * filp, | 94 | static int ext4_readdir(struct file *filp, |
95 | void * dirent, filldir_t filldir) | 95 | void *dirent, filldir_t filldir) |
96 | { | 96 | { |
97 | int error = 0; | 97 | int error = 0; |
98 | unsigned long offset; | 98 | unsigned long offset; |
@@ -102,6 +102,7 @@ static int ext4_readdir(struct file * filp, | |||
102 | int err; | 102 | int err; |
103 | struct inode *inode = filp->f_path.dentry->d_inode; | 103 | struct inode *inode = filp->f_path.dentry->d_inode; |
104 | int ret = 0; | 104 | int ret = 0; |
105 | int dir_has_error = 0; | ||
105 | 106 | ||
106 | sb = inode->i_sb; | 107 | sb = inode->i_sb; |
107 | 108 | ||
@@ -148,9 +149,13 @@ static int ext4_readdir(struct file * filp, | |||
148 | * of recovering data when there's a bad sector | 149 | * of recovering data when there's a bad sector |
149 | */ | 150 | */ |
150 | if (!bh) { | 151 | if (!bh) { |
151 | ext4_error (sb, "ext4_readdir", | 152 | if (!dir_has_error) { |
152 | "directory #%lu contains a hole at offset %lu", | 153 | ext4_error(sb, __func__, "directory #%lu " |
153 | inode->i_ino, (unsigned long)filp->f_pos); | 154 | "contains a hole at offset %Lu", |
155 | inode->i_ino, | ||
156 | (unsigned long long) filp->f_pos); | ||
157 | dir_has_error = 1; | ||
158 | } | ||
154 | /* corrupt size? Maybe no more blocks to read */ | 159 | /* corrupt size? Maybe no more blocks to read */ |
155 | if (filp->f_pos > inode->i_blocks << 9) | 160 | if (filp->f_pos > inode->i_blocks << 9) |
156 | break; | 161 | break; |
@@ -187,14 +192,14 @@ revalidate: | |||
187 | while (!error && filp->f_pos < inode->i_size | 192 | while (!error && filp->f_pos < inode->i_size |
188 | && offset < sb->s_blocksize) { | 193 | && offset < sb->s_blocksize) { |
189 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); | 194 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); |
190 | if (!ext4_check_dir_entry ("ext4_readdir", inode, de, | 195 | if (!ext4_check_dir_entry("ext4_readdir", inode, de, |
191 | bh, offset)) { | 196 | bh, offset)) { |
192 | /* | 197 | /* |
193 | * On error, skip the f_pos to the next block | 198 | * On error, skip the f_pos to the next block |
194 | */ | 199 | */ |
195 | filp->f_pos = (filp->f_pos | | 200 | filp->f_pos = (filp->f_pos | |
196 | (sb->s_blocksize - 1)) + 1; | 201 | (sb->s_blocksize - 1)) + 1; |
197 | brelse (bh); | 202 | brelse(bh); |
198 | ret = stored; | 203 | ret = stored; |
199 | goto out; | 204 | goto out; |
200 | } | 205 | } |
@@ -218,12 +223,12 @@ revalidate: | |||
218 | break; | 223 | break; |
219 | if (version != filp->f_version) | 224 | if (version != filp->f_version) |
220 | goto revalidate; | 225 | goto revalidate; |
221 | stored ++; | 226 | stored++; |
222 | } | 227 | } |
223 | filp->f_pos += ext4_rec_len_from_disk(de->rec_len); | 228 | filp->f_pos += ext4_rec_len_from_disk(de->rec_len); |
224 | } | 229 | } |
225 | offset = 0; | 230 | offset = 0; |
226 | brelse (bh); | 231 | brelse(bh); |
227 | } | 232 | } |
228 | out: | 233 | out: |
229 | return ret; | 234 | return ret; |
@@ -290,9 +295,9 @@ static void free_rb_tree_fname(struct rb_root *root) | |||
290 | parent = rb_parent(n); | 295 | parent = rb_parent(n); |
291 | fname = rb_entry(n, struct fname, rb_hash); | 296 | fname = rb_entry(n, struct fname, rb_hash); |
292 | while (fname) { | 297 | while (fname) { |
293 | struct fname * old = fname; | 298 | struct fname *old = fname; |
294 | fname = fname->next; | 299 | fname = fname->next; |
295 | kfree (old); | 300 | kfree(old); |
296 | } | 301 | } |
297 | if (!parent) | 302 | if (!parent) |
298 | root->rb_node = NULL; | 303 | root->rb_node = NULL; |
@@ -331,7 +336,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
331 | struct ext4_dir_entry_2 *dirent) | 336 | struct ext4_dir_entry_2 *dirent) |
332 | { | 337 | { |
333 | struct rb_node **p, *parent = NULL; | 338 | struct rb_node **p, *parent = NULL; |
334 | struct fname * fname, *new_fn; | 339 | struct fname *fname, *new_fn; |
335 | struct dir_private_info *info; | 340 | struct dir_private_info *info; |
336 | int len; | 341 | int len; |
337 | 342 | ||
@@ -388,19 +393,20 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
388 | * for all entres on the fname linked list. (Normally there is only | 393 | * for all entres on the fname linked list. (Normally there is only |
389 | * one entry on the linked list, unless there are 62 bit hash collisions.) | 394 | * one entry on the linked list, unless there are 62 bit hash collisions.) |
390 | */ | 395 | */ |
391 | static int call_filldir(struct file * filp, void * dirent, | 396 | static int call_filldir(struct file *filp, void *dirent, |
392 | filldir_t filldir, struct fname *fname) | 397 | filldir_t filldir, struct fname *fname) |
393 | { | 398 | { |
394 | struct dir_private_info *info = filp->private_data; | 399 | struct dir_private_info *info = filp->private_data; |
395 | loff_t curr_pos; | 400 | loff_t curr_pos; |
396 | struct inode *inode = filp->f_path.dentry->d_inode; | 401 | struct inode *inode = filp->f_path.dentry->d_inode; |
397 | struct super_block * sb; | 402 | struct super_block *sb; |
398 | int error; | 403 | int error; |
399 | 404 | ||
400 | sb = inode->i_sb; | 405 | sb = inode->i_sb; |
401 | 406 | ||
402 | if (!fname) { | 407 | if (!fname) { |
403 | printk("call_filldir: called with null fname?!?\n"); | 408 | printk(KERN_ERR "ext4: call_filldir: called with " |
409 | "null fname?!?\n"); | ||
404 | return 0; | 410 | return 0; |
405 | } | 411 | } |
406 | curr_pos = hash2pos(fname->hash, fname->minor_hash); | 412 | curr_pos = hash2pos(fname->hash, fname->minor_hash); |
@@ -411,7 +417,7 @@ static int call_filldir(struct file * filp, void * dirent, | |||
411 | get_dtype(sb, fname->file_type)); | 417 | get_dtype(sb, fname->file_type)); |
412 | if (error) { | 418 | if (error) { |
413 | filp->f_pos = curr_pos; | 419 | filp->f_pos = curr_pos; |
414 | info->extra_fname = fname->next; | 420 | info->extra_fname = fname; |
415 | return error; | 421 | return error; |
416 | } | 422 | } |
417 | fname = fname->next; | 423 | fname = fname->next; |
@@ -419,8 +425,8 @@ static int call_filldir(struct file * filp, void * dirent, | |||
419 | return 0; | 425 | return 0; |
420 | } | 426 | } |
421 | 427 | ||
422 | static int ext4_dx_readdir(struct file * filp, | 428 | static int ext4_dx_readdir(struct file *filp, |
423 | void * dirent, filldir_t filldir) | 429 | void *dirent, filldir_t filldir) |
424 | { | 430 | { |
425 | struct dir_private_info *info = filp->private_data; | 431 | struct dir_private_info *info = filp->private_data; |
426 | struct inode *inode = filp->f_path.dentry->d_inode; | 432 | struct inode *inode = filp->f_path.dentry->d_inode; |
@@ -450,11 +456,21 @@ static int ext4_dx_readdir(struct file * filp, | |||
450 | * If there are any leftover names on the hash collision | 456 | * If there are any leftover names on the hash collision |
451 | * chain, return them first. | 457 | * chain, return them first. |
452 | */ | 458 | */ |
453 | if (info->extra_fname && | 459 | if (info->extra_fname) { |
454 | call_filldir(filp, dirent, filldir, info->extra_fname)) | 460 | if (call_filldir(filp, dirent, filldir, info->extra_fname)) |
455 | goto finished; | 461 | goto finished; |
456 | 462 | ||
457 | if (!info->curr_node) | 463 | info->extra_fname = NULL; |
464 | info->curr_node = rb_next(info->curr_node); | ||
465 | if (!info->curr_node) { | ||
466 | if (info->next_hash == ~0) { | ||
467 | filp->f_pos = EXT4_HTREE_EOF; | ||
468 | goto finished; | ||
469 | } | ||
470 | info->curr_hash = info->next_hash; | ||
471 | info->curr_minor_hash = 0; | ||
472 | } | ||
473 | } else if (!info->curr_node) | ||
458 | info->curr_node = rb_first(&info->root); | 474 | info->curr_node = rb_first(&info->root); |
459 | 475 | ||
460 | while (1) { | 476 | while (1) { |
@@ -501,7 +517,7 @@ finished: | |||
501 | return 0; | 517 | return 0; |
502 | } | 518 | } |
503 | 519 | ||
504 | static int ext4_release_dir (struct inode * inode, struct file * filp) | 520 | static int ext4_release_dir(struct inode *inode, struct file *filp) |
505 | { | 521 | { |
506 | if (filp->private_data) | 522 | if (filp->private_data) |
507 | ext4_htree_free_dir_info(filp->private_data); | 523 | ext4_htree_free_dir_info(filp->private_data); |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6c7924d9e358..4880cc3e6727 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -44,9 +44,9 @@ | |||
44 | #ifdef EXT4FS_DEBUG | 44 | #ifdef EXT4FS_DEBUG |
45 | #define ext4_debug(f, a...) \ | 45 | #define ext4_debug(f, a...) \ |
46 | do { \ | 46 | do { \ |
47 | printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ | 47 | printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ |
48 | __FILE__, __LINE__, __func__); \ | 48 | __FILE__, __LINE__, __func__); \ |
49 | printk (KERN_DEBUG f, ## a); \ | 49 | printk(KERN_DEBUG f, ## a); \ |
50 | } while (0) | 50 | } while (0) |
51 | #else | 51 | #else |
52 | #define ext4_debug(f, a...) do {} while (0) | 52 | #define ext4_debug(f, a...) do {} while (0) |
@@ -128,7 +128,7 @@ struct ext4_allocation_request { | |||
128 | #else | 128 | #else |
129 | # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) | 129 | # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) |
130 | #endif | 130 | #endif |
131 | #define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32)) | 131 | #define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32)) |
132 | #ifdef __KERNEL__ | 132 | #ifdef __KERNEL__ |
133 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) | 133 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) |
134 | #else | 134 | #else |
@@ -245,7 +245,7 @@ struct flex_groups { | |||
245 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ | 245 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ |
246 | 246 | ||
247 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ | 247 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ |
248 | #define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ | 248 | #define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */ |
249 | 249 | ||
250 | /* | 250 | /* |
251 | * Inode dynamic state flags | 251 | * Inode dynamic state flags |
@@ -291,8 +291,6 @@ struct ext4_new_group_data { | |||
291 | #define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS | 291 | #define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS |
292 | #define EXT4_IOC_GETVERSION _IOR('f', 3, long) | 292 | #define EXT4_IOC_GETVERSION _IOR('f', 3, long) |
293 | #define EXT4_IOC_SETVERSION _IOW('f', 4, long) | 293 | #define EXT4_IOC_SETVERSION _IOW('f', 4, long) |
294 | #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) | ||
295 | #define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input) | ||
296 | #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION | 294 | #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION |
297 | #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION | 295 | #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION |
298 | #ifdef CONFIG_JBD2_DEBUG | 296 | #ifdef CONFIG_JBD2_DEBUG |
@@ -300,7 +298,10 @@ struct ext4_new_group_data { | |||
300 | #endif | 298 | #endif |
301 | #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) | 299 | #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) |
302 | #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) | 300 | #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) |
303 | #define EXT4_IOC_MIGRATE _IO('f', 7) | 301 | #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) |
302 | #define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input) | ||
303 | #define EXT4_IOC_MIGRATE _IO('f', 9) | ||
304 | /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ | ||
304 | 305 | ||
305 | /* | 306 | /* |
306 | * ioctl commands in 32 bit emulation | 307 | * ioctl commands in 32 bit emulation |
@@ -510,7 +511,6 @@ do { \ | |||
510 | /* | 511 | /* |
511 | * Mount flags | 512 | * Mount flags |
512 | */ | 513 | */ |
513 | #define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */ | ||
514 | #define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ | 514 | #define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ |
515 | #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ | 515 | #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ |
516 | #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ | 516 | #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ |
@@ -538,8 +538,9 @@ do { \ | |||
538 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ | 538 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ |
539 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ | 539 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ |
540 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ | 540 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ |
541 | #define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */ | ||
542 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | 541 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ |
542 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ | ||
543 | |||
543 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ | 544 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ |
544 | #ifndef _LINUX_EXT2_FS_H | 545 | #ifndef _LINUX_EXT2_FS_H |
545 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt | 546 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt |
@@ -667,7 +668,7 @@ struct ext4_super_block { | |||
667 | }; | 668 | }; |
668 | 669 | ||
669 | #ifdef __KERNEL__ | 670 | #ifdef __KERNEL__ |
670 | static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb) | 671 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
671 | { | 672 | { |
672 | return sb->s_fs_info; | 673 | return sb->s_fs_info; |
673 | } | 674 | } |
@@ -725,11 +726,11 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | |||
725 | */ | 726 | */ |
726 | 727 | ||
727 | #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ | 728 | #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ |
728 | ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) ) | 729 | (EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) |
729 | #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ | 730 | #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ |
730 | ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) ) | 731 | (EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) |
731 | #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ | 732 | #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ |
732 | ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) ) | 733 | (EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) |
733 | #define EXT4_SET_COMPAT_FEATURE(sb,mask) \ | 734 | #define EXT4_SET_COMPAT_FEATURE(sb,mask) \ |
734 | EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) | 735 | EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) |
735 | #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ | 736 | #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ |
@@ -789,6 +790,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | |||
789 | #define EXT4_DEF_RESUID 0 | 790 | #define EXT4_DEF_RESUID 0 |
790 | #define EXT4_DEF_RESGID 0 | 791 | #define EXT4_DEF_RESGID 0 |
791 | 792 | ||
793 | #define EXT4_DEF_INODE_READAHEAD_BLKS 32 | ||
794 | |||
792 | /* | 795 | /* |
793 | * Default mount options | 796 | * Default mount options |
794 | */ | 797 | */ |
@@ -954,6 +957,24 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) | |||
954 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, | 957 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, |
955 | unsigned long *blockgrpp, ext4_grpblk_t *offsetp); | 958 | unsigned long *blockgrpp, ext4_grpblk_t *offsetp); |
956 | 959 | ||
960 | extern struct proc_dir_entry *ext4_proc_root; | ||
961 | |||
962 | #ifdef CONFIG_PROC_FS | ||
963 | extern const struct file_operations ext4_ui_proc_fops; | ||
964 | |||
965 | #define EXT4_PROC_HANDLER(name, var) \ | ||
966 | do { \ | ||
967 | proc = proc_create_data(name, mode, sbi->s_proc, \ | ||
968 | &ext4_ui_proc_fops, &sbi->s_##var); \ | ||
969 | if (proc == NULL) { \ | ||
970 | printk(KERN_ERR "EXT4-fs: can't create %s\n", name); \ | ||
971 | goto err_out; \ | ||
972 | } \ | ||
973 | } while (0) | ||
974 | #else | ||
975 | #define EXT4_PROC_HANDLER(name, var) | ||
976 | #endif | ||
977 | |||
957 | /* | 978 | /* |
958 | * Function prototypes | 979 | * Function prototypes |
959 | */ | 980 | */ |
@@ -981,23 +1002,20 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
981 | extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, | 1002 | extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, |
982 | ext4_lblk_t iblock, ext4_fsblk_t goal, | 1003 | ext4_lblk_t iblock, ext4_fsblk_t goal, |
983 | unsigned long *count, int *errp); | 1004 | unsigned long *count, int *errp); |
984 | extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, | 1005 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); |
985 | ext4_fsblk_t goal, unsigned long *count, int *errp); | ||
986 | extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, | 1006 | extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, |
987 | ext4_fsblk_t nblocks); | 1007 | s64 nblocks); |
988 | extern void ext4_free_blocks (handle_t *handle, struct inode *inode, | 1008 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, |
989 | ext4_fsblk_t block, unsigned long count, int metadata); | 1009 | ext4_fsblk_t block, unsigned long count, int metadata); |
990 | extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, | 1010 | extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, |
991 | ext4_fsblk_t block, unsigned long count, | 1011 | ext4_fsblk_t block, unsigned long count, |
992 | unsigned long *pdquot_freed_blocks); | 1012 | unsigned long *pdquot_freed_blocks); |
993 | extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); | 1013 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); |
994 | extern void ext4_check_blocks_bitmap (struct super_block *); | 1014 | extern void ext4_check_blocks_bitmap(struct super_block *); |
995 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | 1015 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, |
996 | ext4_group_t block_group, | 1016 | ext4_group_t block_group, |
997 | struct buffer_head ** bh); | 1017 | struct buffer_head ** bh); |
998 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); | 1018 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); |
999 | extern void ext4_init_block_alloc_info(struct inode *); | ||
1000 | extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv); | ||
1001 | 1019 | ||
1002 | /* dir.c */ | 1020 | /* dir.c */ |
1003 | extern int ext4_check_dir_entry(const char *, struct inode *, | 1021 | extern int ext4_check_dir_entry(const char *, struct inode *, |
@@ -1009,20 +1027,20 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
1009 | extern void ext4_htree_free_dir_info(struct dir_private_info *p); | 1027 | extern void ext4_htree_free_dir_info(struct dir_private_info *p); |
1010 | 1028 | ||
1011 | /* fsync.c */ | 1029 | /* fsync.c */ |
1012 | extern int ext4_sync_file (struct file *, struct dentry *, int); | 1030 | extern int ext4_sync_file(struct file *, struct dentry *, int); |
1013 | 1031 | ||
1014 | /* hash.c */ | 1032 | /* hash.c */ |
1015 | extern int ext4fs_dirhash(const char *name, int len, struct | 1033 | extern int ext4fs_dirhash(const char *name, int len, struct |
1016 | dx_hash_info *hinfo); | 1034 | dx_hash_info *hinfo); |
1017 | 1035 | ||
1018 | /* ialloc.c */ | 1036 | /* ialloc.c */ |
1019 | extern struct inode * ext4_new_inode (handle_t *, struct inode *, int); | 1037 | extern struct inode * ext4_new_inode(handle_t *, struct inode *, int); |
1020 | extern void ext4_free_inode (handle_t *, struct inode *); | 1038 | extern void ext4_free_inode(handle_t *, struct inode *); |
1021 | extern struct inode * ext4_orphan_get (struct super_block *, unsigned long); | 1039 | extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); |
1022 | extern unsigned long ext4_count_free_inodes (struct super_block *); | 1040 | extern unsigned long ext4_count_free_inodes(struct super_block *); |
1023 | extern unsigned long ext4_count_dirs (struct super_block *); | 1041 | extern unsigned long ext4_count_dirs(struct super_block *); |
1024 | extern void ext4_check_inodes_bitmap (struct super_block *); | 1042 | extern void ext4_check_inodes_bitmap(struct super_block *); |
1025 | extern unsigned long ext4_count_free (struct buffer_head *, unsigned); | 1043 | extern unsigned long ext4_count_free(struct buffer_head *, unsigned); |
1026 | 1044 | ||
1027 | /* mballoc.c */ | 1045 | /* mballoc.c */ |
1028 | extern long ext4_mb_stats; | 1046 | extern long ext4_mb_stats; |
@@ -1032,7 +1050,7 @@ extern int ext4_mb_release(struct super_block *); | |||
1032 | extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, | 1050 | extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, |
1033 | struct ext4_allocation_request *, int *); | 1051 | struct ext4_allocation_request *, int *); |
1034 | extern int ext4_mb_reserve_blocks(struct super_block *, int); | 1052 | extern int ext4_mb_reserve_blocks(struct super_block *, int); |
1035 | extern void ext4_mb_discard_inode_preallocations(struct inode *); | 1053 | extern void ext4_discard_preallocations(struct inode *); |
1036 | extern int __init init_ext4_mballoc(void); | 1054 | extern int __init init_ext4_mballoc(void); |
1037 | extern void exit_ext4_mballoc(void); | 1055 | extern void exit_ext4_mballoc(void); |
1038 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, | 1056 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, |
@@ -1050,39 +1068,41 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, | |||
1050 | ext4_lblk_t, int, int *); | 1068 | ext4_lblk_t, int, int *); |
1051 | struct buffer_head *ext4_bread(handle_t *, struct inode *, | 1069 | struct buffer_head *ext4_bread(handle_t *, struct inode *, |
1052 | ext4_lblk_t, int, int *); | 1070 | ext4_lblk_t, int, int *); |
1071 | int ext4_get_block(struct inode *inode, sector_t iblock, | ||
1072 | struct buffer_head *bh_result, int create); | ||
1053 | int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | 1073 | int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, |
1054 | ext4_lblk_t iblock, unsigned long maxblocks, | 1074 | ext4_lblk_t iblock, unsigned long maxblocks, |
1055 | struct buffer_head *bh_result, | 1075 | struct buffer_head *bh_result, |
1056 | int create, int extend_disksize); | 1076 | int create, int extend_disksize); |
1057 | 1077 | ||
1058 | extern struct inode *ext4_iget(struct super_block *, unsigned long); | 1078 | extern struct inode *ext4_iget(struct super_block *, unsigned long); |
1059 | extern int ext4_write_inode (struct inode *, int); | 1079 | extern int ext4_write_inode(struct inode *, int); |
1060 | extern int ext4_setattr (struct dentry *, struct iattr *); | 1080 | extern int ext4_setattr(struct dentry *, struct iattr *); |
1061 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | 1081 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, |
1062 | struct kstat *stat); | 1082 | struct kstat *stat); |
1063 | extern void ext4_delete_inode (struct inode *); | 1083 | extern void ext4_delete_inode(struct inode *); |
1064 | extern int ext4_sync_inode (handle_t *, struct inode *); | 1084 | extern int ext4_sync_inode(handle_t *, struct inode *); |
1065 | extern void ext4_discard_reservation (struct inode *); | ||
1066 | extern void ext4_dirty_inode(struct inode *); | 1085 | extern void ext4_dirty_inode(struct inode *); |
1067 | extern int ext4_change_inode_journal_flag(struct inode *, int); | 1086 | extern int ext4_change_inode_journal_flag(struct inode *, int); |
1068 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 1087 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
1069 | extern int ext4_can_truncate(struct inode *inode); | 1088 | extern int ext4_can_truncate(struct inode *inode); |
1070 | extern void ext4_truncate (struct inode *); | 1089 | extern void ext4_truncate(struct inode *); |
1071 | extern void ext4_set_inode_flags(struct inode *); | 1090 | extern void ext4_set_inode_flags(struct inode *); |
1072 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 1091 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
1073 | extern void ext4_set_aops(struct inode *inode); | 1092 | extern void ext4_set_aops(struct inode *inode); |
1074 | extern int ext4_writepage_trans_blocks(struct inode *); | 1093 | extern int ext4_writepage_trans_blocks(struct inode *); |
1094 | extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); | ||
1095 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | ||
1075 | extern int ext4_block_truncate_page(handle_t *handle, | 1096 | extern int ext4_block_truncate_page(handle_t *handle, |
1076 | struct address_space *mapping, loff_t from); | 1097 | struct address_space *mapping, loff_t from); |
1077 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); | 1098 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); |
1078 | 1099 | ||
1079 | /* ioctl.c */ | 1100 | /* ioctl.c */ |
1080 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); | 1101 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); |
1081 | extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); | 1102 | extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); |
1082 | 1103 | ||
1083 | /* migrate.c */ | 1104 | /* migrate.c */ |
1084 | extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int, | 1105 | extern int ext4_ext_migrate(struct inode *); |
1085 | unsigned long); | ||
1086 | /* namei.c */ | 1106 | /* namei.c */ |
1087 | extern int ext4_orphan_add(handle_t *, struct inode *); | 1107 | extern int ext4_orphan_add(handle_t *, struct inode *); |
1088 | extern int ext4_orphan_del(handle_t *, struct inode *); | 1108 | extern int ext4_orphan_del(handle_t *, struct inode *); |
@@ -1097,14 +1117,14 @@ extern int ext4_group_extend(struct super_block *sb, | |||
1097 | ext4_fsblk_t n_blocks_count); | 1117 | ext4_fsblk_t n_blocks_count); |
1098 | 1118 | ||
1099 | /* super.c */ | 1119 | /* super.c */ |
1100 | extern void ext4_error (struct super_block *, const char *, const char *, ...) | 1120 | extern void ext4_error(struct super_block *, const char *, const char *, ...) |
1101 | __attribute__ ((format (printf, 3, 4))); | 1121 | __attribute__ ((format (printf, 3, 4))); |
1102 | extern void __ext4_std_error (struct super_block *, const char *, int); | 1122 | extern void __ext4_std_error(struct super_block *, const char *, int); |
1103 | extern void ext4_abort (struct super_block *, const char *, const char *, ...) | 1123 | extern void ext4_abort(struct super_block *, const char *, const char *, ...) |
1104 | __attribute__ ((format (printf, 3, 4))); | 1124 | __attribute__ ((format (printf, 3, 4))); |
1105 | extern void ext4_warning (struct super_block *, const char *, const char *, ...) | 1125 | extern void ext4_warning(struct super_block *, const char *, const char *, ...) |
1106 | __attribute__ ((format (printf, 3, 4))); | 1126 | __attribute__ ((format (printf, 3, 4))); |
1107 | extern void ext4_update_dynamic_rev (struct super_block *sb); | 1127 | extern void ext4_update_dynamic_rev(struct super_block *sb); |
1108 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, | 1128 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, |
1109 | __u32 compat); | 1129 | __u32 compat); |
1110 | extern int ext4_update_rocompat_feature(handle_t *handle, | 1130 | extern int ext4_update_rocompat_feature(handle_t *handle, |
@@ -1177,7 +1197,7 @@ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) | |||
1177 | 1197 | ||
1178 | static inline | 1198 | static inline |
1179 | struct ext4_group_info *ext4_get_group_info(struct super_block *sb, | 1199 | struct ext4_group_info *ext4_get_group_info(struct super_block *sb, |
1180 | ext4_group_t group) | 1200 | ext4_group_t group) |
1181 | { | 1201 | { |
1182 | struct ext4_group_info ***grp_info; | 1202 | struct ext4_group_info ***grp_info; |
1183 | long indexv, indexh; | 1203 | long indexv, indexh; |
@@ -1205,6 +1225,28 @@ do { \ | |||
1205 | __ext4_std_error((sb), __func__, (errno)); \ | 1225 | __ext4_std_error((sb), __func__, (errno)); \ |
1206 | } while (0) | 1226 | } while (0) |
1207 | 1227 | ||
1228 | #ifdef CONFIG_SMP | ||
1229 | /* Each CPU can accumulate FBC_BATCH blocks in their local | ||
1230 | * counters. So we need to make sure we have free blocks more | ||
1231 | * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times. | ||
1232 | */ | ||
1233 | #define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids)) | ||
1234 | #else | ||
1235 | #define EXT4_FREEBLOCKS_WATERMARK 0 | ||
1236 | #endif | ||
1237 | |||
1238 | static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) | ||
1239 | { | ||
1240 | /* | ||
1241 | * XXX: replace with spinlock if seen contended -bzzz | ||
1242 | */ | ||
1243 | down_write(&EXT4_I(inode)->i_data_sem); | ||
1244 | if (newsize > EXT4_I(inode)->i_disksize) | ||
1245 | EXT4_I(inode)->i_disksize = newsize; | ||
1246 | up_write(&EXT4_I(inode)->i_data_sem); | ||
1247 | return ; | ||
1248 | } | ||
1249 | |||
1208 | /* | 1250 | /* |
1209 | * Inodes and files operations | 1251 | * Inodes and files operations |
1210 | */ | 1252 | */ |
@@ -1227,6 +1269,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations; | |||
1227 | /* extents.c */ | 1269 | /* extents.c */ |
1228 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); | 1270 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); |
1229 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); | 1271 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); |
1272 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, | ||
1273 | int chunk); | ||
1230 | extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | 1274 | extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, |
1231 | ext4_lblk_t iblock, | 1275 | ext4_lblk_t iblock, |
1232 | unsigned long max_blocks, struct buffer_head *bh_result, | 1276 | unsigned long max_blocks, struct buffer_head *bh_result, |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 6c166c0a54b7..bec7ce59fc0d 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -124,6 +124,19 @@ struct ext4_ext_path { | |||
124 | #define EXT4_EXT_CACHE_GAP 1 | 124 | #define EXT4_EXT_CACHE_GAP 1 |
125 | #define EXT4_EXT_CACHE_EXTENT 2 | 125 | #define EXT4_EXT_CACHE_EXTENT 2 |
126 | 126 | ||
127 | /* | ||
128 | * to be called by ext4_ext_walk_space() | ||
129 | * negative retcode - error | ||
130 | * positive retcode - signal for ext4_ext_walk_space(), see below | ||
131 | * callback must return valid extent (passed or newly created) | ||
132 | */ | ||
133 | typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, | ||
134 | struct ext4_ext_cache *, | ||
135 | struct ext4_extent *, void *); | ||
136 | |||
137 | #define EXT_CONTINUE 0 | ||
138 | #define EXT_BREAK 1 | ||
139 | #define EXT_REPEAT 2 | ||
127 | 140 | ||
128 | #define EXT_MAX_BLOCK 0xffffffff | 141 | #define EXT_MAX_BLOCK 0xffffffff |
129 | 142 | ||
@@ -216,12 +229,16 @@ extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); | |||
216 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); | 229 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); |
217 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); | 230 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); |
218 | extern int ext4_extent_tree_init(handle_t *, struct inode *); | 231 | extern int ext4_extent_tree_init(handle_t *, struct inode *); |
219 | extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); | 232 | extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, |
233 | int num, | ||
234 | struct ext4_ext_path *path); | ||
220 | extern int ext4_ext_try_to_merge(struct inode *inode, | 235 | extern int ext4_ext_try_to_merge(struct inode *inode, |
221 | struct ext4_ext_path *path, | 236 | struct ext4_ext_path *path, |
222 | struct ext4_extent *); | 237 | struct ext4_extent *); |
223 | extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); | 238 | extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); |
224 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); | 239 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); |
240 | extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t, | ||
241 | ext_prepare_callback, void *); | ||
225 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, | 242 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, |
226 | struct ext4_ext_path *); | 243 | struct ext4_ext_path *); |
227 | extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, | 244 | extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, |
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h index ef7409f0e7e4..5c124c0ac6d3 100644 --- a/fs/ext4/ext4_i.h +++ b/fs/ext4/ext4_i.h | |||
@@ -33,38 +33,6 @@ typedef __u32 ext4_lblk_t; | |||
33 | /* data type for block group number */ | 33 | /* data type for block group number */ |
34 | typedef unsigned long ext4_group_t; | 34 | typedef unsigned long ext4_group_t; |
35 | 35 | ||
36 | struct ext4_reserve_window { | ||
37 | ext4_fsblk_t _rsv_start; /* First byte reserved */ | ||
38 | ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */ | ||
39 | }; | ||
40 | |||
41 | struct ext4_reserve_window_node { | ||
42 | struct rb_node rsv_node; | ||
43 | __u32 rsv_goal_size; | ||
44 | __u32 rsv_alloc_hit; | ||
45 | struct ext4_reserve_window rsv_window; | ||
46 | }; | ||
47 | |||
48 | struct ext4_block_alloc_info { | ||
49 | /* information about reservation window */ | ||
50 | struct ext4_reserve_window_node rsv_window_node; | ||
51 | /* | ||
52 | * was i_next_alloc_block in ext4_inode_info | ||
53 | * is the logical (file-relative) number of the | ||
54 | * most-recently-allocated block in this file. | ||
55 | * We use this for detecting linearly ascending allocation requests. | ||
56 | */ | ||
57 | ext4_lblk_t last_alloc_logical_block; | ||
58 | /* | ||
59 | * Was i_next_alloc_goal in ext4_inode_info | ||
60 | * is the *physical* companion to i_next_alloc_block. | ||
61 | * it the physical block number of the block which was most-recentl | ||
62 | * allocated to this file. This give us the goal (target) for the next | ||
63 | * allocation when we detect linearly ascending requests. | ||
64 | */ | ||
65 | ext4_fsblk_t last_alloc_physical_block; | ||
66 | }; | ||
67 | |||
68 | #define rsv_start rsv_window._rsv_start | 36 | #define rsv_start rsv_window._rsv_start |
69 | #define rsv_end rsv_window._rsv_end | 37 | #define rsv_end rsv_window._rsv_end |
70 | 38 | ||
@@ -97,11 +65,8 @@ struct ext4_inode_info { | |||
97 | ext4_group_t i_block_group; | 65 | ext4_group_t i_block_group; |
98 | __u32 i_state; /* Dynamic state flags for ext4 */ | 66 | __u32 i_state; /* Dynamic state flags for ext4 */ |
99 | 67 | ||
100 | /* block reservation info */ | ||
101 | struct ext4_block_alloc_info *i_block_alloc_info; | ||
102 | |||
103 | ext4_lblk_t i_dir_start_lookup; | 68 | ext4_lblk_t i_dir_start_lookup; |
104 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 69 | #ifdef CONFIG_EXT4_FS_XATTR |
105 | /* | 70 | /* |
106 | * Extended attributes can be read independently of the main file | 71 | * Extended attributes can be read independently of the main file |
107 | * data. Taking i_mutex even when reading would cause contention | 72 | * data. Taking i_mutex even when reading would cause contention |
@@ -111,7 +76,7 @@ struct ext4_inode_info { | |||
111 | */ | 76 | */ |
112 | struct rw_semaphore xattr_sem; | 77 | struct rw_semaphore xattr_sem; |
113 | #endif | 78 | #endif |
114 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 79 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
115 | struct posix_acl *i_acl; | 80 | struct posix_acl *i_acl; |
116 | struct posix_acl *i_default_acl; | 81 | struct posix_acl *i_default_acl; |
117 | #endif | 82 | #endif |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index eb8bc3afe6e9..b455c685a98b 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -51,6 +51,14 @@ | |||
51 | EXT4_XATTR_TRANS_BLOCKS - 2 + \ | 51 | EXT4_XATTR_TRANS_BLOCKS - 2 + \ |
52 | 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) | 52 | 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) |
53 | 53 | ||
54 | /* | ||
55 | * Define the number of metadata blocks we need to account to modify data. | ||
56 | * | ||
57 | * This include super block, inode block, quota blocks and xattr blocks | ||
58 | */ | ||
59 | #define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ | ||
60 | 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) | ||
61 | |||
54 | /* Delete operations potentially hit one directory's namespace plus an | 62 | /* Delete operations potentially hit one directory's namespace plus an |
55 | * entire inode, plus arbitrary amounts of bitmap/indirection data. Be | 63 | * entire inode, plus arbitrary amounts of bitmap/indirection data. Be |
56 | * generous. We can grow the delete transaction later if necessary. */ | 64 | * generous. We can grow the delete transaction later if necessary. */ |
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h index 6300226d5531..445fde603df8 100644 --- a/fs/ext4/ext4_sb.h +++ b/fs/ext4/ext4_sb.h | |||
@@ -40,8 +40,8 @@ struct ext4_sb_info { | |||
40 | unsigned long s_blocks_last; /* Last seen block count */ | 40 | unsigned long s_blocks_last; /* Last seen block count */ |
41 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ | 41 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ |
42 | struct buffer_head * s_sbh; /* Buffer containing the super block */ | 42 | struct buffer_head * s_sbh; /* Buffer containing the super block */ |
43 | struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ | 43 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ |
44 | struct buffer_head ** s_group_desc; | 44 | struct buffer_head **s_group_desc; |
45 | unsigned long s_mount_opt; | 45 | unsigned long s_mount_opt; |
46 | ext4_fsblk_t s_sb_block; | 46 | ext4_fsblk_t s_sb_block; |
47 | uid_t s_resuid; | 47 | uid_t s_resuid; |
@@ -52,6 +52,7 @@ struct ext4_sb_info { | |||
52 | int s_desc_per_block_bits; | 52 | int s_desc_per_block_bits; |
53 | int s_inode_size; | 53 | int s_inode_size; |
54 | int s_first_ino; | 54 | int s_first_ino; |
55 | unsigned int s_inode_readahead_blks; | ||
55 | spinlock_t s_next_gen_lock; | 56 | spinlock_t s_next_gen_lock; |
56 | u32 s_next_generation; | 57 | u32 s_next_generation; |
57 | u32 s_hash_seed[4]; | 58 | u32 s_hash_seed[4]; |
@@ -59,16 +60,17 @@ struct ext4_sb_info { | |||
59 | struct percpu_counter s_freeblocks_counter; | 60 | struct percpu_counter s_freeblocks_counter; |
60 | struct percpu_counter s_freeinodes_counter; | 61 | struct percpu_counter s_freeinodes_counter; |
61 | struct percpu_counter s_dirs_counter; | 62 | struct percpu_counter s_dirs_counter; |
63 | struct percpu_counter s_dirtyblocks_counter; | ||
62 | struct blockgroup_lock s_blockgroup_lock; | 64 | struct blockgroup_lock s_blockgroup_lock; |
65 | struct proc_dir_entry *s_proc; | ||
63 | 66 | ||
64 | /* root of the per fs reservation window tree */ | 67 | /* root of the per fs reservation window tree */ |
65 | spinlock_t s_rsv_window_lock; | 68 | spinlock_t s_rsv_window_lock; |
66 | struct rb_root s_rsv_window_root; | 69 | struct rb_root s_rsv_window_root; |
67 | struct ext4_reserve_window_node s_rsv_window_head; | ||
68 | 70 | ||
69 | /* Journaling */ | 71 | /* Journaling */ |
70 | struct inode * s_journal_inode; | 72 | struct inode *s_journal_inode; |
71 | struct journal_s * s_journal; | 73 | struct journal_s *s_journal; |
72 | struct list_head s_orphan; | 74 | struct list_head s_orphan; |
73 | unsigned long s_commit_interval; | 75 | unsigned long s_commit_interval; |
74 | struct block_device *journal_bdev; | 76 | struct block_device *journal_bdev; |
@@ -97,21 +99,18 @@ struct ext4_sb_info { | |||
97 | struct inode *s_buddy_cache; | 99 | struct inode *s_buddy_cache; |
98 | long s_blocks_reserved; | 100 | long s_blocks_reserved; |
99 | spinlock_t s_reserve_lock; | 101 | spinlock_t s_reserve_lock; |
100 | struct list_head s_active_transaction; | ||
101 | struct list_head s_closed_transaction; | ||
102 | struct list_head s_committed_transaction; | ||
103 | spinlock_t s_md_lock; | 102 | spinlock_t s_md_lock; |
104 | tid_t s_last_transaction; | 103 | tid_t s_last_transaction; |
105 | unsigned short *s_mb_offsets, *s_mb_maxs; | 104 | unsigned short *s_mb_offsets, *s_mb_maxs; |
106 | 105 | ||
107 | /* tunables */ | 106 | /* tunables */ |
108 | unsigned long s_stripe; | 107 | unsigned long s_stripe; |
109 | unsigned long s_mb_stream_request; | 108 | unsigned int s_mb_stream_request; |
110 | unsigned long s_mb_max_to_scan; | 109 | unsigned int s_mb_max_to_scan; |
111 | unsigned long s_mb_min_to_scan; | 110 | unsigned int s_mb_min_to_scan; |
112 | unsigned long s_mb_stats; | 111 | unsigned int s_mb_stats; |
113 | unsigned long s_mb_order2_reqs; | 112 | unsigned int s_mb_order2_reqs; |
114 | unsigned long s_mb_group_prealloc; | 113 | unsigned int s_mb_group_prealloc; |
115 | /* where last allocation was done - for stream allocation */ | 114 | /* where last allocation was done - for stream allocation */ |
116 | unsigned long s_mb_last_group; | 115 | unsigned long s_mb_last_group; |
117 | unsigned long s_mb_last_start; | 116 | unsigned long s_mb_last_start; |
@@ -121,7 +120,6 @@ struct ext4_sb_info { | |||
121 | int s_mb_history_cur; | 120 | int s_mb_history_cur; |
122 | int s_mb_history_max; | 121 | int s_mb_history_max; |
123 | int s_mb_history_num; | 122 | int s_mb_history_num; |
124 | struct proc_dir_entry *s_mb_proc; | ||
125 | spinlock_t s_mb_history_lock; | 123 | spinlock_t s_mb_history_lock; |
126 | int s_mb_history_filter; | 124 | int s_mb_history_filter; |
127 | 125 | ||
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 612c3d2c3824..ea2ce3c0ae66 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
41 | #include <linux/falloc.h> | 41 | #include <linux/falloc.h> |
42 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
43 | #include <linux/fiemap.h> | ||
43 | #include "ext4_jbd2.h" | 44 | #include "ext4_jbd2.h" |
44 | #include "ext4_extents.h" | 45 | #include "ext4_extents.h" |
45 | 46 | ||
@@ -383,8 +384,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) | |||
383 | ext_debug("\n"); | 384 | ext_debug("\n"); |
384 | } | 385 | } |
385 | #else | 386 | #else |
386 | #define ext4_ext_show_path(inode,path) | 387 | #define ext4_ext_show_path(inode, path) |
387 | #define ext4_ext_show_leaf(inode,path) | 388 | #define ext4_ext_show_leaf(inode, path) |
388 | #endif | 389 | #endif |
389 | 390 | ||
390 | void ext4_ext_drop_refs(struct ext4_ext_path *path) | 391 | void ext4_ext_drop_refs(struct ext4_ext_path *path) |
@@ -440,9 +441,10 @@ ext4_ext_binsearch_idx(struct inode *inode, | |||
440 | for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { | 441 | for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { |
441 | if (k != 0 && | 442 | if (k != 0 && |
442 | le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { | 443 | le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { |
443 | printk("k=%d, ix=0x%p, first=0x%p\n", k, | 444 | printk(KERN_DEBUG "k=%d, ix=0x%p, " |
444 | ix, EXT_FIRST_INDEX(eh)); | 445 | "first=0x%p\n", k, |
445 | printk("%u <= %u\n", | 446 | ix, EXT_FIRST_INDEX(eh)); |
447 | printk(KERN_DEBUG "%u <= %u\n", | ||
446 | le32_to_cpu(ix->ei_block), | 448 | le32_to_cpu(ix->ei_block), |
447 | le32_to_cpu(ix[-1].ei_block)); | 449 | le32_to_cpu(ix[-1].ei_block)); |
448 | } | 450 | } |
@@ -1475,7 +1477,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1475 | struct ext4_ext_path *path, | 1477 | struct ext4_ext_path *path, |
1476 | struct ext4_extent *newext) | 1478 | struct ext4_extent *newext) |
1477 | { | 1479 | { |
1478 | struct ext4_extent_header * eh; | 1480 | struct ext4_extent_header *eh; |
1479 | struct ext4_extent *ex, *fex; | 1481 | struct ext4_extent *ex, *fex; |
1480 | struct ext4_extent *nearex; /* nearest extent */ | 1482 | struct ext4_extent *nearex; /* nearest extent */ |
1481 | struct ext4_ext_path *npath = NULL; | 1483 | struct ext4_ext_path *npath = NULL; |
@@ -1625,6 +1627,113 @@ cleanup: | |||
1625 | return err; | 1627 | return err; |
1626 | } | 1628 | } |
1627 | 1629 | ||
1630 | int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | ||
1631 | ext4_lblk_t num, ext_prepare_callback func, | ||
1632 | void *cbdata) | ||
1633 | { | ||
1634 | struct ext4_ext_path *path = NULL; | ||
1635 | struct ext4_ext_cache cbex; | ||
1636 | struct ext4_extent *ex; | ||
1637 | ext4_lblk_t next, start = 0, end = 0; | ||
1638 | ext4_lblk_t last = block + num; | ||
1639 | int depth, exists, err = 0; | ||
1640 | |||
1641 | BUG_ON(func == NULL); | ||
1642 | BUG_ON(inode == NULL); | ||
1643 | |||
1644 | while (block < last && block != EXT_MAX_BLOCK) { | ||
1645 | num = last - block; | ||
1646 | /* find extent for this block */ | ||
1647 | path = ext4_ext_find_extent(inode, block, path); | ||
1648 | if (IS_ERR(path)) { | ||
1649 | err = PTR_ERR(path); | ||
1650 | path = NULL; | ||
1651 | break; | ||
1652 | } | ||
1653 | |||
1654 | depth = ext_depth(inode); | ||
1655 | BUG_ON(path[depth].p_hdr == NULL); | ||
1656 | ex = path[depth].p_ext; | ||
1657 | next = ext4_ext_next_allocated_block(path); | ||
1658 | |||
1659 | exists = 0; | ||
1660 | if (!ex) { | ||
1661 | /* there is no extent yet, so try to allocate | ||
1662 | * all requested space */ | ||
1663 | start = block; | ||
1664 | end = block + num; | ||
1665 | } else if (le32_to_cpu(ex->ee_block) > block) { | ||
1666 | /* need to allocate space before found extent */ | ||
1667 | start = block; | ||
1668 | end = le32_to_cpu(ex->ee_block); | ||
1669 | if (block + num < end) | ||
1670 | end = block + num; | ||
1671 | } else if (block >= le32_to_cpu(ex->ee_block) | ||
1672 | + ext4_ext_get_actual_len(ex)) { | ||
1673 | /* need to allocate space after found extent */ | ||
1674 | start = block; | ||
1675 | end = block + num; | ||
1676 | if (end >= next) | ||
1677 | end = next; | ||
1678 | } else if (block >= le32_to_cpu(ex->ee_block)) { | ||
1679 | /* | ||
1680 | * some part of requested space is covered | ||
1681 | * by found extent | ||
1682 | */ | ||
1683 | start = block; | ||
1684 | end = le32_to_cpu(ex->ee_block) | ||
1685 | + ext4_ext_get_actual_len(ex); | ||
1686 | if (block + num < end) | ||
1687 | end = block + num; | ||
1688 | exists = 1; | ||
1689 | } else { | ||
1690 | BUG(); | ||
1691 | } | ||
1692 | BUG_ON(end <= start); | ||
1693 | |||
1694 | if (!exists) { | ||
1695 | cbex.ec_block = start; | ||
1696 | cbex.ec_len = end - start; | ||
1697 | cbex.ec_start = 0; | ||
1698 | cbex.ec_type = EXT4_EXT_CACHE_GAP; | ||
1699 | } else { | ||
1700 | cbex.ec_block = le32_to_cpu(ex->ee_block); | ||
1701 | cbex.ec_len = ext4_ext_get_actual_len(ex); | ||
1702 | cbex.ec_start = ext_pblock(ex); | ||
1703 | cbex.ec_type = EXT4_EXT_CACHE_EXTENT; | ||
1704 | } | ||
1705 | |||
1706 | BUG_ON(cbex.ec_len == 0); | ||
1707 | err = func(inode, path, &cbex, ex, cbdata); | ||
1708 | ext4_ext_drop_refs(path); | ||
1709 | |||
1710 | if (err < 0) | ||
1711 | break; | ||
1712 | |||
1713 | if (err == EXT_REPEAT) | ||
1714 | continue; | ||
1715 | else if (err == EXT_BREAK) { | ||
1716 | err = 0; | ||
1717 | break; | ||
1718 | } | ||
1719 | |||
1720 | if (ext_depth(inode) != depth) { | ||
1721 | /* depth was changed. we have to realloc path */ | ||
1722 | kfree(path); | ||
1723 | path = NULL; | ||
1724 | } | ||
1725 | |||
1726 | block = cbex.ec_block + cbex.ec_len; | ||
1727 | } | ||
1728 | |||
1729 | if (path) { | ||
1730 | ext4_ext_drop_refs(path); | ||
1731 | kfree(path); | ||
1732 | } | ||
1733 | |||
1734 | return err; | ||
1735 | } | ||
1736 | |||
1628 | static void | 1737 | static void |
1629 | ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, | 1738 | ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, |
1630 | __u32 len, ext4_fsblk_t start, int type) | 1739 | __u32 len, ext4_fsblk_t start, int type) |
@@ -1747,54 +1856,61 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
1747 | } | 1856 | } |
1748 | 1857 | ||
1749 | /* | 1858 | /* |
1750 | * ext4_ext_calc_credits_for_insert: | 1859 | * ext4_ext_calc_credits_for_single_extent: |
1751 | * This routine returns max. credits that the extent tree can consume. | 1860 | * This routine returns max. credits that needed to insert an extent |
1752 | * It should be OK for low-performance paths like ->writepage() | 1861 | * to the extent tree. |
1753 | * To allow many writing processes to fit into a single transaction, | 1862 | * When pass the actual path, the caller should calculate credits |
1754 | * the caller should calculate credits under i_data_sem and | 1863 | * under i_data_sem. |
1755 | * pass the actual path. | ||
1756 | */ | 1864 | */ |
1757 | int ext4_ext_calc_credits_for_insert(struct inode *inode, | 1865 | int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, |
1758 | struct ext4_ext_path *path) | 1866 | struct ext4_ext_path *path) |
1759 | { | 1867 | { |
1760 | int depth, needed; | ||
1761 | |||
1762 | if (path) { | 1868 | if (path) { |
1869 | int depth = ext_depth(inode); | ||
1870 | int ret = 0; | ||
1871 | |||
1763 | /* probably there is space in leaf? */ | 1872 | /* probably there is space in leaf? */ |
1764 | depth = ext_depth(inode); | ||
1765 | if (le16_to_cpu(path[depth].p_hdr->eh_entries) | 1873 | if (le16_to_cpu(path[depth].p_hdr->eh_entries) |
1766 | < le16_to_cpu(path[depth].p_hdr->eh_max)) | 1874 | < le16_to_cpu(path[depth].p_hdr->eh_max)) { |
1767 | return 1; | ||
1768 | } | ||
1769 | |||
1770 | /* | ||
1771 | * given 32-bit logical block (4294967296 blocks), max. tree | ||
1772 | * can be 4 levels in depth -- 4 * 340^4 == 53453440000. | ||
1773 | * Let's also add one more level for imbalance. | ||
1774 | */ | ||
1775 | depth = 5; | ||
1776 | 1875 | ||
1777 | /* allocation of new data block(s) */ | 1876 | /* |
1778 | needed = 2; | 1877 | * There are some space in the leaf tree, no |
1878 | * need to account for leaf block credit | ||
1879 | * | ||
1880 | * bitmaps and block group descriptor blocks | ||
1881 | * and other metadat blocks still need to be | ||
1882 | * accounted. | ||
1883 | */ | ||
1884 | /* 1 bitmap, 1 block group descriptor */ | ||
1885 | ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); | ||
1886 | } | ||
1887 | } | ||
1779 | 1888 | ||
1780 | /* | 1889 | return ext4_chunk_trans_blocks(inode, nrblocks); |
1781 | * tree can be full, so it would need to grow in depth: | 1890 | } |
1782 | * we need one credit to modify old root, credits for | ||
1783 | * new root will be added in split accounting | ||
1784 | */ | ||
1785 | needed += 1; | ||
1786 | 1891 | ||
1787 | /* | 1892 | /* |
1788 | * Index split can happen, we would need: | 1893 | * How many index/leaf blocks need to change/allocate to modify nrblocks? |
1789 | * allocate intermediate indexes (bitmap + group) | 1894 | * |
1790 | * + change two blocks at each level, but root (already included) | 1895 | * if nrblocks are fit in a single extent (chunk flag is 1), then |
1791 | */ | 1896 | * in the worse case, each tree level index/leaf need to be changed |
1792 | needed += (depth * 2) + (depth * 2); | 1897 | * if the tree split due to insert a new extent, then the old tree |
1898 | * index/leaf need to be updated too | ||
1899 | * | ||
1900 | * If the nrblocks are discontiguous, they could cause | ||
1901 | * the whole tree split more than once, but this is really rare. | ||
1902 | */ | ||
1903 | int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | ||
1904 | { | ||
1905 | int index; | ||
1906 | int depth = ext_depth(inode); | ||
1793 | 1907 | ||
1794 | /* any allocation modifies superblock */ | 1908 | if (chunk) |
1795 | needed += 1; | 1909 | index = depth * 2; |
1910 | else | ||
1911 | index = depth * 3; | ||
1796 | 1912 | ||
1797 | return needed; | 1913 | return index; |
1798 | } | 1914 | } |
1799 | 1915 | ||
1800 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | 1916 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, |
@@ -1921,9 +2037,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
1921 | correct_index = 1; | 2037 | correct_index = 1; |
1922 | credits += (ext_depth(inode)) + 1; | 2038 | credits += (ext_depth(inode)) + 1; |
1923 | } | 2039 | } |
1924 | #ifdef CONFIG_QUOTA | ||
1925 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | 2040 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); |
1926 | #endif | ||
1927 | 2041 | ||
1928 | err = ext4_ext_journal_restart(handle, credits); | 2042 | err = ext4_ext_journal_restart(handle, credits); |
1929 | if (err) | 2043 | if (err) |
@@ -2137,7 +2251,7 @@ void ext4_ext_init(struct super_block *sb) | |||
2137 | */ | 2251 | */ |
2138 | 2252 | ||
2139 | if (test_opt(sb, EXTENTS)) { | 2253 | if (test_opt(sb, EXTENTS)) { |
2140 | printk("EXT4-fs: file extents enabled"); | 2254 | printk(KERN_INFO "EXT4-fs: file extents enabled"); |
2141 | #ifdef AGGRESSIVE_TEST | 2255 | #ifdef AGGRESSIVE_TEST |
2142 | printk(", aggressive tests"); | 2256 | printk(", aggressive tests"); |
2143 | #endif | 2257 | #endif |
@@ -2691,11 +2805,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2691 | goto out2; | 2805 | goto out2; |
2692 | } | 2806 | } |
2693 | /* | 2807 | /* |
2694 | * Okay, we need to do block allocation. Lazily initialize the block | 2808 | * Okay, we need to do block allocation. |
2695 | * allocation info here if necessary. | ||
2696 | */ | 2809 | */ |
2697 | if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) | ||
2698 | ext4_init_block_alloc_info(inode); | ||
2699 | 2810 | ||
2700 | /* find neighbour allocated blocks */ | 2811 | /* find neighbour allocated blocks */ |
2701 | ar.lleft = iblock; | 2812 | ar.lleft = iblock; |
@@ -2755,7 +2866,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2755 | /* free data blocks we just allocated */ | 2866 | /* free data blocks we just allocated */ |
2756 | /* not a good idea to call discard here directly, | 2867 | /* not a good idea to call discard here directly, |
2757 | * but otherwise we'd need to call it every free() */ | 2868 | * but otherwise we'd need to call it every free() */ |
2758 | ext4_mb_discard_inode_preallocations(inode); | 2869 | ext4_discard_preallocations(inode); |
2759 | ext4_free_blocks(handle, inode, ext_pblock(&newex), | 2870 | ext4_free_blocks(handle, inode, ext_pblock(&newex), |
2760 | ext4_ext_get_actual_len(&newex), 0); | 2871 | ext4_ext_get_actual_len(&newex), 0); |
2761 | goto out2; | 2872 | goto out2; |
@@ -2805,7 +2916,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
2805 | /* | 2916 | /* |
2806 | * probably first extent we're gonna free will be last in block | 2917 | * probably first extent we're gonna free will be last in block |
2807 | */ | 2918 | */ |
2808 | err = ext4_writepage_trans_blocks(inode) + 3; | 2919 | err = ext4_writepage_trans_blocks(inode); |
2809 | handle = ext4_journal_start(inode, err); | 2920 | handle = ext4_journal_start(inode, err); |
2810 | if (IS_ERR(handle)) | 2921 | if (IS_ERR(handle)) |
2811 | return; | 2922 | return; |
@@ -2819,7 +2930,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
2819 | down_write(&EXT4_I(inode)->i_data_sem); | 2930 | down_write(&EXT4_I(inode)->i_data_sem); |
2820 | ext4_ext_invalidate_cache(inode); | 2931 | ext4_ext_invalidate_cache(inode); |
2821 | 2932 | ||
2822 | ext4_mb_discard_inode_preallocations(inode); | 2933 | ext4_discard_preallocations(inode); |
2823 | 2934 | ||
2824 | /* | 2935 | /* |
2825 | * TODO: optimization is possible here. | 2936 | * TODO: optimization is possible here. |
@@ -2858,27 +2969,6 @@ out_stop: | |||
2858 | ext4_journal_stop(handle); | 2969 | ext4_journal_stop(handle); |
2859 | } | 2970 | } |
2860 | 2971 | ||
2861 | /* | ||
2862 | * ext4_ext_writepage_trans_blocks: | ||
2863 | * calculate max number of blocks we could modify | ||
2864 | * in order to allocate new block for an inode | ||
2865 | */ | ||
2866 | int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) | ||
2867 | { | ||
2868 | int needed; | ||
2869 | |||
2870 | needed = ext4_ext_calc_credits_for_insert(inode, NULL); | ||
2871 | |||
2872 | /* caller wants to allocate num blocks, but note it includes sb */ | ||
2873 | needed = needed * num - (num - 1); | ||
2874 | |||
2875 | #ifdef CONFIG_QUOTA | ||
2876 | needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | ||
2877 | #endif | ||
2878 | |||
2879 | return needed; | ||
2880 | } | ||
2881 | |||
2882 | static void ext4_falloc_update_inode(struct inode *inode, | 2972 | static void ext4_falloc_update_inode(struct inode *inode, |
2883 | int mode, loff_t new_size, int update_ctime) | 2973 | int mode, loff_t new_size, int update_ctime) |
2884 | { | 2974 | { |
@@ -2893,10 +2983,11 @@ static void ext4_falloc_update_inode(struct inode *inode, | |||
2893 | * Update only when preallocation was requested beyond | 2983 | * Update only when preallocation was requested beyond |
2894 | * the file size. | 2984 | * the file size. |
2895 | */ | 2985 | */ |
2896 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 2986 | if (!(mode & FALLOC_FL_KEEP_SIZE)) { |
2897 | new_size > i_size_read(inode)) { | 2987 | if (new_size > i_size_read(inode)) |
2898 | i_size_write(inode, new_size); | 2988 | i_size_write(inode, new_size); |
2899 | EXT4_I(inode)->i_disksize = new_size; | 2989 | if (new_size > EXT4_I(inode)->i_disksize) |
2990 | ext4_update_i_disksize(inode, new_size); | ||
2900 | } | 2991 | } |
2901 | 2992 | ||
2902 | } | 2993 | } |
@@ -2939,10 +3030,9 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) | |||
2939 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) | 3030 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) |
2940 | - block; | 3031 | - block; |
2941 | /* | 3032 | /* |
2942 | * credits to insert 1 extent into extent tree + buffers to be able to | 3033 | * credits to insert 1 extent into extent tree |
2943 | * modify 1 super block, 1 block bitmap and 1 group descriptor. | ||
2944 | */ | 3034 | */ |
2945 | credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; | 3035 | credits = ext4_chunk_trans_blocks(inode, max_blocks); |
2946 | mutex_lock(&inode->i_mutex); | 3036 | mutex_lock(&inode->i_mutex); |
2947 | retry: | 3037 | retry: |
2948 | while (ret >= 0 && ret < max_blocks) { | 3038 | while (ret >= 0 && ret < max_blocks) { |
@@ -2989,3 +3079,143 @@ retry: | |||
2989 | mutex_unlock(&inode->i_mutex); | 3079 | mutex_unlock(&inode->i_mutex); |
2990 | return ret > 0 ? ret2 : ret; | 3080 | return ret > 0 ? ret2 : ret; |
2991 | } | 3081 | } |
3082 | |||
3083 | /* | ||
3084 | * Callback function called for each extent to gather FIEMAP information. | ||
3085 | */ | ||
3086 | int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | ||
3087 | struct ext4_ext_cache *newex, struct ext4_extent *ex, | ||
3088 | void *data) | ||
3089 | { | ||
3090 | struct fiemap_extent_info *fieinfo = data; | ||
3091 | unsigned long blksize_bits = inode->i_sb->s_blocksize_bits; | ||
3092 | __u64 logical; | ||
3093 | __u64 physical; | ||
3094 | __u64 length; | ||
3095 | __u32 flags = 0; | ||
3096 | int error; | ||
3097 | |||
3098 | logical = (__u64)newex->ec_block << blksize_bits; | ||
3099 | |||
3100 | if (newex->ec_type == EXT4_EXT_CACHE_GAP) { | ||
3101 | pgoff_t offset; | ||
3102 | struct page *page; | ||
3103 | struct buffer_head *bh = NULL; | ||
3104 | |||
3105 | offset = logical >> PAGE_SHIFT; | ||
3106 | page = find_get_page(inode->i_mapping, offset); | ||
3107 | if (!page || !page_has_buffers(page)) | ||
3108 | return EXT_CONTINUE; | ||
3109 | |||
3110 | bh = page_buffers(page); | ||
3111 | |||
3112 | if (!bh) | ||
3113 | return EXT_CONTINUE; | ||
3114 | |||
3115 | if (buffer_delay(bh)) { | ||
3116 | flags |= FIEMAP_EXTENT_DELALLOC; | ||
3117 | page_cache_release(page); | ||
3118 | } else { | ||
3119 | page_cache_release(page); | ||
3120 | return EXT_CONTINUE; | ||
3121 | } | ||
3122 | } | ||
3123 | |||
3124 | physical = (__u64)newex->ec_start << blksize_bits; | ||
3125 | length = (__u64)newex->ec_len << blksize_bits; | ||
3126 | |||
3127 | if (ex && ext4_ext_is_uninitialized(ex)) | ||
3128 | flags |= FIEMAP_EXTENT_UNWRITTEN; | ||
3129 | |||
3130 | /* | ||
3131 | * If this extent reaches EXT_MAX_BLOCK, it must be last. | ||
3132 | * | ||
3133 | * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK, | ||
3134 | * this also indicates no more allocated blocks. | ||
3135 | * | ||
3136 | * XXX this might miss a single-block extent at EXT_MAX_BLOCK | ||
3137 | */ | ||
3138 | if (logical + length - 1 == EXT_MAX_BLOCK || | ||
3139 | ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK) | ||
3140 | flags |= FIEMAP_EXTENT_LAST; | ||
3141 | |||
3142 | error = fiemap_fill_next_extent(fieinfo, logical, physical, | ||
3143 | length, flags); | ||
3144 | if (error < 0) | ||
3145 | return error; | ||
3146 | if (error == 1) | ||
3147 | return EXT_BREAK; | ||
3148 | |||
3149 | return EXT_CONTINUE; | ||
3150 | } | ||
3151 | |||
3152 | /* fiemap flags we can handle specified here */ | ||
3153 | #define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) | ||
3154 | |||
3155 | int ext4_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo) | ||
3156 | { | ||
3157 | __u64 physical = 0; | ||
3158 | __u64 length; | ||
3159 | __u32 flags = FIEMAP_EXTENT_LAST; | ||
3160 | int blockbits = inode->i_sb->s_blocksize_bits; | ||
3161 | int error = 0; | ||
3162 | |||
3163 | /* in-inode? */ | ||
3164 | if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { | ||
3165 | struct ext4_iloc iloc; | ||
3166 | int offset; /* offset of xattr in inode */ | ||
3167 | |||
3168 | error = ext4_get_inode_loc(inode, &iloc); | ||
3169 | if (error) | ||
3170 | return error; | ||
3171 | physical = iloc.bh->b_blocknr << blockbits; | ||
3172 | offset = EXT4_GOOD_OLD_INODE_SIZE + | ||
3173 | EXT4_I(inode)->i_extra_isize; | ||
3174 | physical += offset; | ||
3175 | length = EXT4_SB(inode->i_sb)->s_inode_size - offset; | ||
3176 | flags |= FIEMAP_EXTENT_DATA_INLINE; | ||
3177 | } else { /* external block */ | ||
3178 | physical = EXT4_I(inode)->i_file_acl << blockbits; | ||
3179 | length = inode->i_sb->s_blocksize; | ||
3180 | } | ||
3181 | |||
3182 | if (physical) | ||
3183 | error = fiemap_fill_next_extent(fieinfo, 0, physical, | ||
3184 | length, flags); | ||
3185 | return (error < 0 ? error : 0); | ||
3186 | } | ||
3187 | |||
3188 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
3189 | __u64 start, __u64 len) | ||
3190 | { | ||
3191 | ext4_lblk_t start_blk; | ||
3192 | ext4_lblk_t len_blks; | ||
3193 | int error = 0; | ||
3194 | |||
3195 | /* fallback to generic here if not in extents fmt */ | ||
3196 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
3197 | return generic_block_fiemap(inode, fieinfo, start, len, | ||
3198 | ext4_get_block); | ||
3199 | |||
3200 | if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) | ||
3201 | return -EBADR; | ||
3202 | |||
3203 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { | ||
3204 | error = ext4_xattr_fiemap(inode, fieinfo); | ||
3205 | } else { | ||
3206 | start_blk = start >> inode->i_sb->s_blocksize_bits; | ||
3207 | len_blks = len >> inode->i_sb->s_blocksize_bits; | ||
3208 | |||
3209 | /* | ||
3210 | * Walk the extent tree gathering extent information. | ||
3211 | * ext4_ext_fiemap_cb will push extents back to user. | ||
3212 | */ | ||
3213 | down_write(&EXT4_I(inode)->i_data_sem); | ||
3214 | error = ext4_ext_walk_space(inode, start_blk, len_blks, | ||
3215 | ext4_ext_fiemap_cb, fieinfo); | ||
3216 | up_write(&EXT4_I(inode)->i_data_sem); | ||
3217 | } | ||
3218 | |||
3219 | return error; | ||
3220 | } | ||
3221 | |||
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 430eb7978db4..6bd11fba71f7 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -31,14 +31,14 @@ | |||
31 | * from ext4_file_open: open gets called at every open, but release | 31 | * from ext4_file_open: open gets called at every open, but release |
32 | * gets called only when /all/ the files are closed. | 32 | * gets called only when /all/ the files are closed. |
33 | */ | 33 | */ |
34 | static int ext4_release_file (struct inode * inode, struct file * filp) | 34 | static int ext4_release_file(struct inode *inode, struct file *filp) |
35 | { | 35 | { |
36 | /* if we are the last writer on the inode, drop the block reservation */ | 36 | /* if we are the last writer on the inode, drop the block reservation */ |
37 | if ((filp->f_mode & FMODE_WRITE) && | 37 | if ((filp->f_mode & FMODE_WRITE) && |
38 | (atomic_read(&inode->i_writecount) == 1)) | 38 | (atomic_read(&inode->i_writecount) == 1)) |
39 | { | 39 | { |
40 | down_write(&EXT4_I(inode)->i_data_sem); | 40 | down_write(&EXT4_I(inode)->i_data_sem); |
41 | ext4_discard_reservation(inode); | 41 | ext4_discard_preallocations(inode); |
42 | up_write(&EXT4_I(inode)->i_data_sem); | 42 | up_write(&EXT4_I(inode)->i_data_sem); |
43 | } | 43 | } |
44 | if (is_dx(inode) && filp->private_data) | 44 | if (is_dx(inode) && filp->private_data) |
@@ -140,6 +140,9 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
140 | return 0; | 140 | return 0; |
141 | } | 141 | } |
142 | 142 | ||
143 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
144 | __u64 start, __u64 len); | ||
145 | |||
143 | const struct file_operations ext4_file_operations = { | 146 | const struct file_operations ext4_file_operations = { |
144 | .llseek = generic_file_llseek, | 147 | .llseek = generic_file_llseek, |
145 | .read = do_sync_read, | 148 | .read = do_sync_read, |
@@ -162,7 +165,7 @@ const struct inode_operations ext4_file_inode_operations = { | |||
162 | .truncate = ext4_truncate, | 165 | .truncate = ext4_truncate, |
163 | .setattr = ext4_setattr, | 166 | .setattr = ext4_setattr, |
164 | .getattr = ext4_getattr, | 167 | .getattr = ext4_getattr, |
165 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 168 | #ifdef CONFIG_EXT4_FS_XATTR |
166 | .setxattr = generic_setxattr, | 169 | .setxattr = generic_setxattr, |
167 | .getxattr = generic_getxattr, | 170 | .getxattr = generic_getxattr, |
168 | .listxattr = ext4_listxattr, | 171 | .listxattr = ext4_listxattr, |
@@ -170,5 +173,6 @@ const struct inode_operations ext4_file_inode_operations = { | |||
170 | #endif | 173 | #endif |
171 | .permission = ext4_permission, | 174 | .permission = ext4_permission, |
172 | .fallocate = ext4_fallocate, | 175 | .fallocate = ext4_fallocate, |
176 | .fiemap = ext4_fiemap, | ||
173 | }; | 177 | }; |
174 | 178 | ||
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index a45c3737ad31..5afe4370840b 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/writeback.h> | 28 | #include <linux/writeback.h> |
29 | #include <linux/jbd2.h> | 29 | #include <linux/jbd2.h> |
30 | #include <linux/blkdev.h> | 30 | #include <linux/blkdev.h> |
31 | #include <linux/marker.h> | ||
31 | #include "ext4.h" | 32 | #include "ext4.h" |
32 | #include "ext4_jbd2.h" | 33 | #include "ext4_jbd2.h" |
33 | 34 | ||
@@ -43,7 +44,7 @@ | |||
43 | * inode to disk. | 44 | * inode to disk. |
44 | */ | 45 | */ |
45 | 46 | ||
46 | int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) | 47 | int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) |
47 | { | 48 | { |
48 | struct inode *inode = dentry->d_inode; | 49 | struct inode *inode = dentry->d_inode; |
49 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 50 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
@@ -51,6 +52,10 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) | |||
51 | 52 | ||
52 | J_ASSERT(ext4_journal_current_handle() == NULL); | 53 | J_ASSERT(ext4_journal_current_handle() == NULL); |
53 | 54 | ||
55 | trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld", | ||
56 | inode->i_sb->s_id, datasync, inode->i_ino, | ||
57 | dentry->d_parent->d_inode->i_ino); | ||
58 | |||
54 | /* | 59 | /* |
55 | * data=writeback: | 60 | * data=writeback: |
56 | * The caller's filemap_fdatawrite()/wait will sync the data. | 61 | * The caller's filemap_fdatawrite()/wait will sync the data. |
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index 1d6329dbe390..556ca8eba3db 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c | |||
@@ -27,7 +27,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[]) | |||
27 | sum += DELTA; | 27 | sum += DELTA; |
28 | b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); | 28 | b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); |
29 | b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); | 29 | b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); |
30 | } while(--n); | 30 | } while (--n); |
31 | 31 | ||
32 | buf[0] += b0; | 32 | buf[0] += b0; |
33 | buf[1] += b1; | 33 | buf[1] += b1; |
@@ -35,7 +35,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[]) | |||
35 | 35 | ||
36 | 36 | ||
37 | /* The old legacy hash */ | 37 | /* The old legacy hash */ |
38 | static __u32 dx_hack_hash (const char *name, int len) | 38 | static __u32 dx_hack_hash(const char *name, int len) |
39 | { | 39 | { |
40 | __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; | 40 | __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; |
41 | while (len--) { | 41 | while (len--) { |
@@ -59,7 +59,7 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) | |||
59 | val = pad; | 59 | val = pad; |
60 | if (len > num*4) | 60 | if (len > num*4) |
61 | len = num * 4; | 61 | len = num * 4; |
62 | for (i=0; i < len; i++) { | 62 | for (i = 0; i < len; i++) { |
63 | if ((i % 4) == 0) | 63 | if ((i % 4) == 0) |
64 | val = pad; | 64 | val = pad; |
65 | val = msg[i] + (val << 8); | 65 | val = msg[i] + (val << 8); |
@@ -104,7 +104,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) | |||
104 | 104 | ||
105 | /* Check to see if the seed is all zero's */ | 105 | /* Check to see if the seed is all zero's */ |
106 | if (hinfo->seed) { | 106 | if (hinfo->seed) { |
107 | for (i=0; i < 4; i++) { | 107 | for (i = 0; i < 4; i++) { |
108 | if (hinfo->seed[i]) | 108 | if (hinfo->seed[i]) |
109 | break; | 109 | break; |
110 | } | 110 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 655e760212b8..fe34d74cfb19 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -115,9 +115,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
115 | block_group, bitmap_blk); | 115 | block_group, bitmap_blk); |
116 | return NULL; | 116 | return NULL; |
117 | } | 117 | } |
118 | if (bh_uptodate_or_lock(bh)) | 118 | if (buffer_uptodate(bh) && |
119 | !(desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) | ||
119 | return bh; | 120 | return bh; |
120 | 121 | ||
122 | lock_buffer(bh); | ||
121 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 123 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); |
122 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | 124 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { |
123 | ext4_init_inode_bitmap(sb, bh, block_group, desc); | 125 | ext4_init_inode_bitmap(sb, bh, block_group, desc); |
@@ -154,39 +156,40 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
154 | * though), and then we'd have two inodes sharing the | 156 | * though), and then we'd have two inodes sharing the |
155 | * same inode number and space on the harddisk. | 157 | * same inode number and space on the harddisk. |
156 | */ | 158 | */ |
157 | void ext4_free_inode (handle_t *handle, struct inode * inode) | 159 | void ext4_free_inode(handle_t *handle, struct inode *inode) |
158 | { | 160 | { |
159 | struct super_block * sb = inode->i_sb; | 161 | struct super_block *sb = inode->i_sb; |
160 | int is_directory; | 162 | int is_directory; |
161 | unsigned long ino; | 163 | unsigned long ino; |
162 | struct buffer_head *bitmap_bh = NULL; | 164 | struct buffer_head *bitmap_bh = NULL; |
163 | struct buffer_head *bh2; | 165 | struct buffer_head *bh2; |
164 | ext4_group_t block_group; | 166 | ext4_group_t block_group; |
165 | unsigned long bit; | 167 | unsigned long bit; |
166 | struct ext4_group_desc * gdp; | 168 | struct ext4_group_desc *gdp; |
167 | struct ext4_super_block * es; | 169 | struct ext4_super_block *es; |
168 | struct ext4_sb_info *sbi; | 170 | struct ext4_sb_info *sbi; |
169 | int fatal = 0, err; | 171 | int fatal = 0, err; |
170 | ext4_group_t flex_group; | 172 | ext4_group_t flex_group; |
171 | 173 | ||
172 | if (atomic_read(&inode->i_count) > 1) { | 174 | if (atomic_read(&inode->i_count) > 1) { |
173 | printk ("ext4_free_inode: inode has count=%d\n", | 175 | printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", |
174 | atomic_read(&inode->i_count)); | 176 | atomic_read(&inode->i_count)); |
175 | return; | 177 | return; |
176 | } | 178 | } |
177 | if (inode->i_nlink) { | 179 | if (inode->i_nlink) { |
178 | printk ("ext4_free_inode: inode has nlink=%d\n", | 180 | printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n", |
179 | inode->i_nlink); | 181 | inode->i_nlink); |
180 | return; | 182 | return; |
181 | } | 183 | } |
182 | if (!sb) { | 184 | if (!sb) { |
183 | printk("ext4_free_inode: inode on nonexistent device\n"); | 185 | printk(KERN_ERR "ext4_free_inode: inode on " |
186 | "nonexistent device\n"); | ||
184 | return; | 187 | return; |
185 | } | 188 | } |
186 | sbi = EXT4_SB(sb); | 189 | sbi = EXT4_SB(sb); |
187 | 190 | ||
188 | ino = inode->i_ino; | 191 | ino = inode->i_ino; |
189 | ext4_debug ("freeing inode %lu\n", ino); | 192 | ext4_debug("freeing inode %lu\n", ino); |
190 | 193 | ||
191 | /* | 194 | /* |
192 | * Note: we must free any quota before locking the superblock, | 195 | * Note: we must free any quota before locking the superblock, |
@@ -200,12 +203,12 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) | |||
200 | is_directory = S_ISDIR(inode->i_mode); | 203 | is_directory = S_ISDIR(inode->i_mode); |
201 | 204 | ||
202 | /* Do this BEFORE marking the inode not in use or returning an error */ | 205 | /* Do this BEFORE marking the inode not in use or returning an error */ |
203 | clear_inode (inode); | 206 | clear_inode(inode); |
204 | 207 | ||
205 | es = EXT4_SB(sb)->s_es; | 208 | es = EXT4_SB(sb)->s_es; |
206 | if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { | 209 | if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { |
207 | ext4_error (sb, "ext4_free_inode", | 210 | ext4_error(sb, "ext4_free_inode", |
208 | "reserved or nonexistent inode %lu", ino); | 211 | "reserved or nonexistent inode %lu", ino); |
209 | goto error_return; | 212 | goto error_return; |
210 | } | 213 | } |
211 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); | 214 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); |
@@ -222,10 +225,10 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) | |||
222 | /* Ok, now we can actually update the inode bitmaps.. */ | 225 | /* Ok, now we can actually update the inode bitmaps.. */ |
223 | if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), | 226 | if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), |
224 | bit, bitmap_bh->b_data)) | 227 | bit, bitmap_bh->b_data)) |
225 | ext4_error (sb, "ext4_free_inode", | 228 | ext4_error(sb, "ext4_free_inode", |
226 | "bit already cleared for inode %lu", ino); | 229 | "bit already cleared for inode %lu", ino); |
227 | else { | 230 | else { |
228 | gdp = ext4_get_group_desc (sb, block_group, &bh2); | 231 | gdp = ext4_get_group_desc(sb, block_group, &bh2); |
229 | 232 | ||
230 | BUFFER_TRACE(bh2, "get_write_access"); | 233 | BUFFER_TRACE(bh2, "get_write_access"); |
231 | fatal = ext4_journal_get_write_access(handle, bh2); | 234 | fatal = ext4_journal_get_write_access(handle, bh2); |
@@ -287,7 +290,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent, | |||
287 | avefreei = freei / ngroups; | 290 | avefreei = freei / ngroups; |
288 | 291 | ||
289 | for (group = 0; group < ngroups; group++) { | 292 | for (group = 0; group < ngroups; group++) { |
290 | desc = ext4_get_group_desc (sb, group, NULL); | 293 | desc = ext4_get_group_desc(sb, group, NULL); |
291 | if (!desc || !desc->bg_free_inodes_count) | 294 | if (!desc || !desc->bg_free_inodes_count) |
292 | continue; | 295 | continue; |
293 | if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) | 296 | if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) |
@@ -351,7 +354,7 @@ find_close_to_parent: | |||
351 | goto found_flexbg; | 354 | goto found_flexbg; |
352 | } | 355 | } |
353 | 356 | ||
354 | if (best_flex < 0 || | 357 | if (flex_group[best_flex].free_inodes == 0 || |
355 | (flex_group[i].free_blocks > | 358 | (flex_group[i].free_blocks > |
356 | flex_group[best_flex].free_blocks && | 359 | flex_group[best_flex].free_blocks && |
357 | flex_group[i].free_inodes)) | 360 | flex_group[i].free_inodes)) |
@@ -576,16 +579,16 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
576 | * For other inodes, search forward from the parent directory's block | 579 | * For other inodes, search forward from the parent directory's block |
577 | * group to find a free inode. | 580 | * group to find a free inode. |
578 | */ | 581 | */ |
579 | struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | 582 | struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) |
580 | { | 583 | { |
581 | struct super_block *sb; | 584 | struct super_block *sb; |
582 | struct buffer_head *bitmap_bh = NULL; | 585 | struct buffer_head *bitmap_bh = NULL; |
583 | struct buffer_head *bh2; | 586 | struct buffer_head *bh2; |
584 | ext4_group_t group = 0; | 587 | ext4_group_t group = 0; |
585 | unsigned long ino = 0; | 588 | unsigned long ino = 0; |
586 | struct inode * inode; | 589 | struct inode *inode; |
587 | struct ext4_group_desc * gdp = NULL; | 590 | struct ext4_group_desc *gdp = NULL; |
588 | struct ext4_super_block * es; | 591 | struct ext4_super_block *es; |
589 | struct ext4_inode_info *ei; | 592 | struct ext4_inode_info *ei; |
590 | struct ext4_sb_info *sbi; | 593 | struct ext4_sb_info *sbi; |
591 | int ret2, err = 0; | 594 | int ret2, err = 0; |
@@ -613,7 +616,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | |||
613 | } | 616 | } |
614 | 617 | ||
615 | if (S_ISDIR(mode)) { | 618 | if (S_ISDIR(mode)) { |
616 | if (test_opt (sb, OLDALLOC)) | 619 | if (test_opt(sb, OLDALLOC)) |
617 | ret2 = find_group_dir(sb, dir, &group); | 620 | ret2 = find_group_dir(sb, dir, &group); |
618 | else | 621 | else |
619 | ret2 = find_group_orlov(sb, dir, &group); | 622 | ret2 = find_group_orlov(sb, dir, &group); |
@@ -783,7 +786,7 @@ got: | |||
783 | } | 786 | } |
784 | 787 | ||
785 | inode->i_uid = current->fsuid; | 788 | inode->i_uid = current->fsuid; |
786 | if (test_opt (sb, GRPID)) | 789 | if (test_opt(sb, GRPID)) |
787 | inode->i_gid = dir->i_gid; | 790 | inode->i_gid = dir->i_gid; |
788 | else if (dir->i_mode & S_ISGID) { | 791 | else if (dir->i_mode & S_ISGID) { |
789 | inode->i_gid = dir->i_gid; | 792 | inode->i_gid = dir->i_gid; |
@@ -816,7 +819,6 @@ got: | |||
816 | ei->i_flags &= ~EXT4_DIRSYNC_FL; | 819 | ei->i_flags &= ~EXT4_DIRSYNC_FL; |
817 | ei->i_file_acl = 0; | 820 | ei->i_file_acl = 0; |
818 | ei->i_dtime = 0; | 821 | ei->i_dtime = 0; |
819 | ei->i_block_alloc_info = NULL; | ||
820 | ei->i_block_group = group; | 822 | ei->i_block_group = group; |
821 | 823 | ||
822 | ext4_set_inode_flags(inode); | 824 | ext4_set_inode_flags(inode); |
@@ -832,7 +834,7 @@ got: | |||
832 | ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; | 834 | ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; |
833 | 835 | ||
834 | ret = inode; | 836 | ret = inode; |
835 | if(DQUOT_ALLOC_INODE(inode)) { | 837 | if (DQUOT_ALLOC_INODE(inode)) { |
836 | err = -EDQUOT; | 838 | err = -EDQUOT; |
837 | goto fail_drop; | 839 | goto fail_drop; |
838 | } | 840 | } |
@@ -841,7 +843,7 @@ got: | |||
841 | if (err) | 843 | if (err) |
842 | goto fail_free_drop; | 844 | goto fail_free_drop; |
843 | 845 | ||
844 | err = ext4_init_security(handle,inode, dir); | 846 | err = ext4_init_security(handle, inode, dir); |
845 | if (err) | 847 | if (err) |
846 | goto fail_free_drop; | 848 | goto fail_free_drop; |
847 | 849 | ||
@@ -959,7 +961,7 @@ error: | |||
959 | return ERR_PTR(err); | 961 | return ERR_PTR(err); |
960 | } | 962 | } |
961 | 963 | ||
962 | unsigned long ext4_count_free_inodes (struct super_block * sb) | 964 | unsigned long ext4_count_free_inodes(struct super_block *sb) |
963 | { | 965 | { |
964 | unsigned long desc_count; | 966 | unsigned long desc_count; |
965 | struct ext4_group_desc *gdp; | 967 | struct ext4_group_desc *gdp; |
@@ -974,7 +976,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) | |||
974 | bitmap_count = 0; | 976 | bitmap_count = 0; |
975 | gdp = NULL; | 977 | gdp = NULL; |
976 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 978 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { |
977 | gdp = ext4_get_group_desc (sb, i, NULL); | 979 | gdp = ext4_get_group_desc(sb, i, NULL); |
978 | if (!gdp) | 980 | if (!gdp) |
979 | continue; | 981 | continue; |
980 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); | 982 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); |
@@ -989,13 +991,14 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) | |||
989 | bitmap_count += x; | 991 | bitmap_count += x; |
990 | } | 992 | } |
991 | brelse(bitmap_bh); | 993 | brelse(bitmap_bh); |
992 | printk("ext4_count_free_inodes: stored = %u, computed = %lu, %lu\n", | 994 | printk(KERN_DEBUG "ext4_count_free_inodes: " |
993 | le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); | 995 | "stored = %u, computed = %lu, %lu\n", |
996 | le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); | ||
994 | return desc_count; | 997 | return desc_count; |
995 | #else | 998 | #else |
996 | desc_count = 0; | 999 | desc_count = 0; |
997 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 1000 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { |
998 | gdp = ext4_get_group_desc (sb, i, NULL); | 1001 | gdp = ext4_get_group_desc(sb, i, NULL); |
999 | if (!gdp) | 1002 | if (!gdp) |
1000 | continue; | 1003 | continue; |
1001 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); | 1004 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); |
@@ -1006,13 +1009,13 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) | |||
1006 | } | 1009 | } |
1007 | 1010 | ||
1008 | /* Called at mount-time, super-block is locked */ | 1011 | /* Called at mount-time, super-block is locked */ |
1009 | unsigned long ext4_count_dirs (struct super_block * sb) | 1012 | unsigned long ext4_count_dirs(struct super_block * sb) |
1010 | { | 1013 | { |
1011 | unsigned long count = 0; | 1014 | unsigned long count = 0; |
1012 | ext4_group_t i; | 1015 | ext4_group_t i; |
1013 | 1016 | ||
1014 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 1017 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { |
1015 | struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL); | 1018 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); |
1016 | if (!gdp) | 1019 | if (!gdp) |
1017 | continue; | 1020 | continue; |
1018 | count += le16_to_cpu(gdp->bg_used_dirs_count); | 1021 | count += le16_to_cpu(gdp->bg_used_dirs_count); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 59fbbe899acc..8dbf6953845b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -41,6 +41,8 @@ | |||
41 | #include "acl.h" | 41 | #include "acl.h" |
42 | #include "ext4_extents.h" | 42 | #include "ext4_extents.h" |
43 | 43 | ||
44 | #define MPAGE_DA_EXTENT_TAIL 0x01 | ||
45 | |||
44 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | 46 | static inline int ext4_begin_ordered_truncate(struct inode *inode, |
45 | loff_t new_size) | 47 | loff_t new_size) |
46 | { | 48 | { |
@@ -188,7 +190,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) | |||
188 | /* | 190 | /* |
189 | * Called at the last iput() if i_nlink is zero. | 191 | * Called at the last iput() if i_nlink is zero. |
190 | */ | 192 | */ |
191 | void ext4_delete_inode (struct inode * inode) | 193 | void ext4_delete_inode(struct inode *inode) |
192 | { | 194 | { |
193 | handle_t *handle; | 195 | handle_t *handle; |
194 | int err; | 196 | int err; |
@@ -328,11 +330,11 @@ static int ext4_block_to_path(struct inode *inode, | |||
328 | int final = 0; | 330 | int final = 0; |
329 | 331 | ||
330 | if (i_block < 0) { | 332 | if (i_block < 0) { |
331 | ext4_warning (inode->i_sb, "ext4_block_to_path", "block < 0"); | 333 | ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0"); |
332 | } else if (i_block < direct_blocks) { | 334 | } else if (i_block < direct_blocks) { |
333 | offsets[n++] = i_block; | 335 | offsets[n++] = i_block; |
334 | final = direct_blocks; | 336 | final = direct_blocks; |
335 | } else if ( (i_block -= direct_blocks) < indirect_blocks) { | 337 | } else if ((i_block -= direct_blocks) < indirect_blocks) { |
336 | offsets[n++] = EXT4_IND_BLOCK; | 338 | offsets[n++] = EXT4_IND_BLOCK; |
337 | offsets[n++] = i_block; | 339 | offsets[n++] = i_block; |
338 | final = ptrs; | 340 | final = ptrs; |
@@ -398,14 +400,14 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, | |||
398 | 400 | ||
399 | *err = 0; | 401 | *err = 0; |
400 | /* i_data is not going away, no lock needed */ | 402 | /* i_data is not going away, no lock needed */ |
401 | add_chain (chain, NULL, EXT4_I(inode)->i_data + *offsets); | 403 | add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); |
402 | if (!p->key) | 404 | if (!p->key) |
403 | goto no_block; | 405 | goto no_block; |
404 | while (--depth) { | 406 | while (--depth) { |
405 | bh = sb_bread(sb, le32_to_cpu(p->key)); | 407 | bh = sb_bread(sb, le32_to_cpu(p->key)); |
406 | if (!bh) | 408 | if (!bh) |
407 | goto failure; | 409 | goto failure; |
408 | add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); | 410 | add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); |
409 | /* Reader: end */ | 411 | /* Reader: end */ |
410 | if (!p->key) | 412 | if (!p->key) |
411 | goto no_block; | 413 | goto no_block; |
@@ -441,7 +443,7 @@ no_block: | |||
441 | static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | 443 | static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) |
442 | { | 444 | { |
443 | struct ext4_inode_info *ei = EXT4_I(inode); | 445 | struct ext4_inode_info *ei = EXT4_I(inode); |
444 | __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; | 446 | __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; |
445 | __le32 *p; | 447 | __le32 *p; |
446 | ext4_fsblk_t bg_start; | 448 | ext4_fsblk_t bg_start; |
447 | ext4_fsblk_t last_block; | 449 | ext4_fsblk_t last_block; |
@@ -484,18 +486,9 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | |||
484 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | 486 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, |
485 | Indirect *partial) | 487 | Indirect *partial) |
486 | { | 488 | { |
487 | struct ext4_block_alloc_info *block_i; | ||
488 | |||
489 | block_i = EXT4_I(inode)->i_block_alloc_info; | ||
490 | |||
491 | /* | 489 | /* |
492 | * try the heuristic for sequential allocation, | 490 | * XXX need to get goal block from mballoc's data structures |
493 | * failing that at least try to get decent locality. | ||
494 | */ | 491 | */ |
495 | if (block_i && (block == block_i->last_alloc_logical_block + 1) | ||
496 | && (block_i->last_alloc_physical_block != 0)) { | ||
497 | return block_i->last_alloc_physical_block + 1; | ||
498 | } | ||
499 | 492 | ||
500 | return ext4_find_near(inode, partial); | 493 | return ext4_find_near(inode, partial); |
501 | } | 494 | } |
@@ -628,7 +621,7 @@ allocated: | |||
628 | *err = 0; | 621 | *err = 0; |
629 | return ret; | 622 | return ret; |
630 | failed_out: | 623 | failed_out: |
631 | for (i = 0; i <index; i++) | 624 | for (i = 0; i < index; i++) |
632 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); | 625 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); |
633 | return ret; | 626 | return ret; |
634 | } | 627 | } |
@@ -701,7 +694,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
701 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; | 694 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; |
702 | branch[n].key = cpu_to_le32(new_blocks[n]); | 695 | branch[n].key = cpu_to_le32(new_blocks[n]); |
703 | *branch[n].p = branch[n].key; | 696 | *branch[n].p = branch[n].key; |
704 | if ( n == indirect_blks) { | 697 | if (n == indirect_blks) { |
705 | current_block = new_blocks[n]; | 698 | current_block = new_blocks[n]; |
706 | /* | 699 | /* |
707 | * End of chain, update the last new metablock of | 700 | * End of chain, update the last new metablock of |
@@ -728,7 +721,7 @@ failed: | |||
728 | BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); | 721 | BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); |
729 | ext4_journal_forget(handle, branch[i].bh); | 722 | ext4_journal_forget(handle, branch[i].bh); |
730 | } | 723 | } |
731 | for (i = 0; i <indirect_blks; i++) | 724 | for (i = 0; i < indirect_blks; i++) |
732 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); | 725 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); |
733 | 726 | ||
734 | ext4_free_blocks(handle, inode, new_blocks[i], num, 0); | 727 | ext4_free_blocks(handle, inode, new_blocks[i], num, 0); |
@@ -755,10 +748,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
755 | { | 748 | { |
756 | int i; | 749 | int i; |
757 | int err = 0; | 750 | int err = 0; |
758 | struct ext4_block_alloc_info *block_i; | ||
759 | ext4_fsblk_t current_block; | 751 | ext4_fsblk_t current_block; |
760 | 752 | ||
761 | block_i = EXT4_I(inode)->i_block_alloc_info; | ||
762 | /* | 753 | /* |
763 | * If we're splicing into a [td]indirect block (as opposed to the | 754 | * If we're splicing into a [td]indirect block (as opposed to the |
764 | * inode) then we need to get write access to the [td]indirect block | 755 | * inode) then we need to get write access to the [td]indirect block |
@@ -781,18 +772,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
781 | if (num == 0 && blks > 1) { | 772 | if (num == 0 && blks > 1) { |
782 | current_block = le32_to_cpu(where->key) + 1; | 773 | current_block = le32_to_cpu(where->key) + 1; |
783 | for (i = 1; i < blks; i++) | 774 | for (i = 1; i < blks; i++) |
784 | *(where->p + i ) = cpu_to_le32(current_block++); | 775 | *(where->p + i) = cpu_to_le32(current_block++); |
785 | } | ||
786 | |||
787 | /* | ||
788 | * update the most recently allocated logical & physical block | ||
789 | * in i_block_alloc_info, to assist find the proper goal block for next | ||
790 | * allocation | ||
791 | */ | ||
792 | if (block_i) { | ||
793 | block_i->last_alloc_logical_block = block + blks - 1; | ||
794 | block_i->last_alloc_physical_block = | ||
795 | le32_to_cpu(where[num].key) + blks - 1; | ||
796 | } | 776 | } |
797 | 777 | ||
798 | /* We are done with atomic stuff, now do the rest of housekeeping */ | 778 | /* We are done with atomic stuff, now do the rest of housekeeping */ |
@@ -912,12 +892,8 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
912 | goto cleanup; | 892 | goto cleanup; |
913 | 893 | ||
914 | /* | 894 | /* |
915 | * Okay, we need to do block allocation. Lazily initialize the block | 895 | * Okay, we need to do block allocation. |
916 | * allocation info here if necessary | ||
917 | */ | 896 | */ |
918 | if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) | ||
919 | ext4_init_block_alloc_info(inode); | ||
920 | |||
921 | goal = ext4_find_goal(inode, iblock, partial); | 897 | goal = ext4_find_goal(inode, iblock, partial); |
922 | 898 | ||
923 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | 899 | /* the number of blocks need to allocate for [d,t]indirect blocks */ |
@@ -1005,6 +981,9 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) | |||
1005 | */ | 981 | */ |
1006 | static int ext4_calc_metadata_amount(struct inode *inode, int blocks) | 982 | static int ext4_calc_metadata_amount(struct inode *inode, int blocks) |
1007 | { | 983 | { |
984 | if (!blocks) | ||
985 | return 0; | ||
986 | |||
1008 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 987 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) |
1009 | return ext4_ext_calc_metadata_amount(inode, blocks); | 988 | return ext4_ext_calc_metadata_amount(inode, blocks); |
1010 | 989 | ||
@@ -1025,34 +1004,23 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1025 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | 1004 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); |
1026 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; | 1005 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; |
1027 | 1006 | ||
1028 | /* Account for allocated meta_blocks */ | 1007 | if (mdb_free) { |
1029 | mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; | 1008 | /* Account for allocated meta_blocks */ |
1009 | mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; | ||
1030 | 1010 | ||
1031 | /* update fs free blocks counter for truncate case */ | 1011 | /* update fs dirty blocks counter */ |
1032 | percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free); | 1012 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); |
1013 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | ||
1014 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
1015 | } | ||
1033 | 1016 | ||
1034 | /* update per-inode reservations */ | 1017 | /* update per-inode reservations */ |
1035 | BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); | 1018 | BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); |
1036 | EXT4_I(inode)->i_reserved_data_blocks -= used; | 1019 | EXT4_I(inode)->i_reserved_data_blocks -= used; |
1037 | 1020 | ||
1038 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | ||
1039 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
1040 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | ||
1041 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1021 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1042 | } | 1022 | } |
1043 | 1023 | ||
1044 | /* Maximum number of blocks we map for direct IO at once. */ | ||
1045 | #define DIO_MAX_BLOCKS 4096 | ||
1046 | /* | ||
1047 | * Number of credits we need for writing DIO_MAX_BLOCKS: | ||
1048 | * We need sb + group descriptor + bitmap + inode -> 4 | ||
1049 | * For B blocks with A block pointers per block we need: | ||
1050 | * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). | ||
1051 | * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. | ||
1052 | */ | ||
1053 | #define DIO_CREDITS 25 | ||
1054 | |||
1055 | |||
1056 | /* | 1024 | /* |
1057 | * The ext4_get_blocks_wrap() function try to look up the requested blocks, | 1025 | * The ext4_get_blocks_wrap() function try to look up the requested blocks, |
1058 | * and returns if the blocks are already mapped. | 1026 | * and returns if the blocks are already mapped. |
@@ -1164,19 +1132,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1164 | return retval; | 1132 | return retval; |
1165 | } | 1133 | } |
1166 | 1134 | ||
1167 | static int ext4_get_block(struct inode *inode, sector_t iblock, | 1135 | /* Maximum number of blocks we map for direct IO at once. */ |
1168 | struct buffer_head *bh_result, int create) | 1136 | #define DIO_MAX_BLOCKS 4096 |
1137 | |||
1138 | int ext4_get_block(struct inode *inode, sector_t iblock, | ||
1139 | struct buffer_head *bh_result, int create) | ||
1169 | { | 1140 | { |
1170 | handle_t *handle = ext4_journal_current_handle(); | 1141 | handle_t *handle = ext4_journal_current_handle(); |
1171 | int ret = 0, started = 0; | 1142 | int ret = 0, started = 0; |
1172 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 1143 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; |
1144 | int dio_credits; | ||
1173 | 1145 | ||
1174 | if (create && !handle) { | 1146 | if (create && !handle) { |
1175 | /* Direct IO write... */ | 1147 | /* Direct IO write... */ |
1176 | if (max_blocks > DIO_MAX_BLOCKS) | 1148 | if (max_blocks > DIO_MAX_BLOCKS) |
1177 | max_blocks = DIO_MAX_BLOCKS; | 1149 | max_blocks = DIO_MAX_BLOCKS; |
1178 | handle = ext4_journal_start(inode, DIO_CREDITS + | 1150 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); |
1179 | 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); | 1151 | handle = ext4_journal_start(inode, dio_credits); |
1180 | if (IS_ERR(handle)) { | 1152 | if (IS_ERR(handle)) { |
1181 | ret = PTR_ERR(handle); | 1153 | ret = PTR_ERR(handle); |
1182 | goto out; | 1154 | goto out; |
@@ -1244,7 +1216,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
1244 | BUFFER_TRACE(bh, "call get_create_access"); | 1216 | BUFFER_TRACE(bh, "call get_create_access"); |
1245 | fatal = ext4_journal_get_create_access(handle, bh); | 1217 | fatal = ext4_journal_get_create_access(handle, bh); |
1246 | if (!fatal && !buffer_uptodate(bh)) { | 1218 | if (!fatal && !buffer_uptodate(bh)) { |
1247 | memset(bh->b_data,0,inode->i_sb->s_blocksize); | 1219 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); |
1248 | set_buffer_uptodate(bh); | 1220 | set_buffer_uptodate(bh); |
1249 | } | 1221 | } |
1250 | unlock_buffer(bh); | 1222 | unlock_buffer(bh); |
@@ -1269,7 +1241,7 @@ err: | |||
1269 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | 1241 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, |
1270 | ext4_lblk_t block, int create, int *err) | 1242 | ext4_lblk_t block, int create, int *err) |
1271 | { | 1243 | { |
1272 | struct buffer_head * bh; | 1244 | struct buffer_head *bh; |
1273 | 1245 | ||
1274 | bh = ext4_getblk(handle, inode, block, create, err); | 1246 | bh = ext4_getblk(handle, inode, block, create, err); |
1275 | if (!bh) | 1247 | if (!bh) |
@@ -1285,13 +1257,13 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | |||
1285 | return NULL; | 1257 | return NULL; |
1286 | } | 1258 | } |
1287 | 1259 | ||
1288 | static int walk_page_buffers( handle_t *handle, | 1260 | static int walk_page_buffers(handle_t *handle, |
1289 | struct buffer_head *head, | 1261 | struct buffer_head *head, |
1290 | unsigned from, | 1262 | unsigned from, |
1291 | unsigned to, | 1263 | unsigned to, |
1292 | int *partial, | 1264 | int *partial, |
1293 | int (*fn)( handle_t *handle, | 1265 | int (*fn)(handle_t *handle, |
1294 | struct buffer_head *bh)) | 1266 | struct buffer_head *bh)) |
1295 | { | 1267 | { |
1296 | struct buffer_head *bh; | 1268 | struct buffer_head *bh; |
1297 | unsigned block_start, block_end; | 1269 | unsigned block_start, block_end; |
@@ -1299,9 +1271,9 @@ static int walk_page_buffers( handle_t *handle, | |||
1299 | int err, ret = 0; | 1271 | int err, ret = 0; |
1300 | struct buffer_head *next; | 1272 | struct buffer_head *next; |
1301 | 1273 | ||
1302 | for ( bh = head, block_start = 0; | 1274 | for (bh = head, block_start = 0; |
1303 | ret == 0 && (bh != head || !block_start); | 1275 | ret == 0 && (bh != head || !block_start); |
1304 | block_start = block_end, bh = next) | 1276 | block_start = block_end, bh = next) |
1305 | { | 1277 | { |
1306 | next = bh->b_this_page; | 1278 | next = bh->b_this_page; |
1307 | block_end = block_start + blocksize; | 1279 | block_end = block_start + blocksize; |
@@ -1354,23 +1326,23 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
1354 | loff_t pos, unsigned len, unsigned flags, | 1326 | loff_t pos, unsigned len, unsigned flags, |
1355 | struct page **pagep, void **fsdata) | 1327 | struct page **pagep, void **fsdata) |
1356 | { | 1328 | { |
1357 | struct inode *inode = mapping->host; | 1329 | struct inode *inode = mapping->host; |
1358 | int ret, needed_blocks = ext4_writepage_trans_blocks(inode); | 1330 | int ret, needed_blocks = ext4_writepage_trans_blocks(inode); |
1359 | handle_t *handle; | 1331 | handle_t *handle; |
1360 | int retries = 0; | 1332 | int retries = 0; |
1361 | struct page *page; | 1333 | struct page *page; |
1362 | pgoff_t index; | 1334 | pgoff_t index; |
1363 | unsigned from, to; | 1335 | unsigned from, to; |
1364 | 1336 | ||
1365 | index = pos >> PAGE_CACHE_SHIFT; | 1337 | index = pos >> PAGE_CACHE_SHIFT; |
1366 | from = pos & (PAGE_CACHE_SIZE - 1); | 1338 | from = pos & (PAGE_CACHE_SIZE - 1); |
1367 | to = from + len; | 1339 | to = from + len; |
1368 | 1340 | ||
1369 | retry: | 1341 | retry: |
1370 | handle = ext4_journal_start(inode, needed_blocks); | 1342 | handle = ext4_journal_start(inode, needed_blocks); |
1371 | if (IS_ERR(handle)) { | 1343 | if (IS_ERR(handle)) { |
1372 | ret = PTR_ERR(handle); | 1344 | ret = PTR_ERR(handle); |
1373 | goto out; | 1345 | goto out; |
1374 | } | 1346 | } |
1375 | 1347 | ||
1376 | page = __grab_cache_page(mapping, index); | 1348 | page = __grab_cache_page(mapping, index); |
@@ -1390,9 +1362,16 @@ retry: | |||
1390 | } | 1362 | } |
1391 | 1363 | ||
1392 | if (ret) { | 1364 | if (ret) { |
1393 | unlock_page(page); | 1365 | unlock_page(page); |
1394 | ext4_journal_stop(handle); | 1366 | ext4_journal_stop(handle); |
1395 | page_cache_release(page); | 1367 | page_cache_release(page); |
1368 | /* | ||
1369 | * block_write_begin may have instantiated a few blocks | ||
1370 | * outside i_size. Trim these off again. Don't need | ||
1371 | * i_size_read because we hold i_mutex. | ||
1372 | */ | ||
1373 | if (pos + len > inode->i_size) | ||
1374 | vmtruncate(inode, inode->i_size); | ||
1396 | } | 1375 | } |
1397 | 1376 | ||
1398 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 1377 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -1429,16 +1408,18 @@ static int ext4_ordered_write_end(struct file *file, | |||
1429 | ret = ext4_jbd2_file_inode(handle, inode); | 1408 | ret = ext4_jbd2_file_inode(handle, inode); |
1430 | 1409 | ||
1431 | if (ret == 0) { | 1410 | if (ret == 0) { |
1432 | /* | ||
1433 | * generic_write_end() will run mark_inode_dirty() if i_size | ||
1434 | * changes. So let's piggyback the i_disksize mark_inode_dirty | ||
1435 | * into that. | ||
1436 | */ | ||
1437 | loff_t new_i_size; | 1411 | loff_t new_i_size; |
1438 | 1412 | ||
1439 | new_i_size = pos + copied; | 1413 | new_i_size = pos + copied; |
1440 | if (new_i_size > EXT4_I(inode)->i_disksize) | 1414 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
1441 | EXT4_I(inode)->i_disksize = new_i_size; | 1415 | ext4_update_i_disksize(inode, new_i_size); |
1416 | /* We need to mark inode dirty even if | ||
1417 | * new_i_size is less that inode->i_size | ||
1418 | * bu greater than i_disksize.(hint delalloc) | ||
1419 | */ | ||
1420 | ext4_mark_inode_dirty(handle, inode); | ||
1421 | } | ||
1422 | |||
1442 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 1423 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
1443 | page, fsdata); | 1424 | page, fsdata); |
1444 | copied = ret2; | 1425 | copied = ret2; |
@@ -1463,8 +1444,14 @@ static int ext4_writeback_write_end(struct file *file, | |||
1463 | loff_t new_i_size; | 1444 | loff_t new_i_size; |
1464 | 1445 | ||
1465 | new_i_size = pos + copied; | 1446 | new_i_size = pos + copied; |
1466 | if (new_i_size > EXT4_I(inode)->i_disksize) | 1447 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
1467 | EXT4_I(inode)->i_disksize = new_i_size; | 1448 | ext4_update_i_disksize(inode, new_i_size); |
1449 | /* We need to mark inode dirty even if | ||
1450 | * new_i_size is less that inode->i_size | ||
1451 | * bu greater than i_disksize.(hint delalloc) | ||
1452 | */ | ||
1453 | ext4_mark_inode_dirty(handle, inode); | ||
1454 | } | ||
1468 | 1455 | ||
1469 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 1456 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
1470 | page, fsdata); | 1457 | page, fsdata); |
@@ -1489,6 +1476,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1489 | int ret = 0, ret2; | 1476 | int ret = 0, ret2; |
1490 | int partial = 0; | 1477 | int partial = 0; |
1491 | unsigned from, to; | 1478 | unsigned from, to; |
1479 | loff_t new_i_size; | ||
1492 | 1480 | ||
1493 | from = pos & (PAGE_CACHE_SIZE - 1); | 1481 | from = pos & (PAGE_CACHE_SIZE - 1); |
1494 | to = from + len; | 1482 | to = from + len; |
@@ -1503,11 +1491,12 @@ static int ext4_journalled_write_end(struct file *file, | |||
1503 | to, &partial, write_end_fn); | 1491 | to, &partial, write_end_fn); |
1504 | if (!partial) | 1492 | if (!partial) |
1505 | SetPageUptodate(page); | 1493 | SetPageUptodate(page); |
1506 | if (pos+copied > inode->i_size) | 1494 | new_i_size = pos + copied; |
1495 | if (new_i_size > inode->i_size) | ||
1507 | i_size_write(inode, pos+copied); | 1496 | i_size_write(inode, pos+copied); |
1508 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | 1497 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; |
1509 | if (inode->i_size > EXT4_I(inode)->i_disksize) { | 1498 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
1510 | EXT4_I(inode)->i_disksize = inode->i_size; | 1499 | ext4_update_i_disksize(inode, new_i_size); |
1511 | ret2 = ext4_mark_inode_dirty(handle, inode); | 1500 | ret2 = ext4_mark_inode_dirty(handle, inode); |
1512 | if (!ret) | 1501 | if (!ret) |
1513 | ret = ret2; | 1502 | ret = ret2; |
@@ -1524,6 +1513,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1524 | 1513 | ||
1525 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | 1514 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) |
1526 | { | 1515 | { |
1516 | int retries = 0; | ||
1527 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1517 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1528 | unsigned long md_needed, mdblocks, total = 0; | 1518 | unsigned long md_needed, mdblocks, total = 0; |
1529 | 1519 | ||
@@ -1532,6 +1522,7 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | |||
1532 | * in order to allocate nrblocks | 1522 | * in order to allocate nrblocks |
1533 | * worse case is one extent per block | 1523 | * worse case is one extent per block |
1534 | */ | 1524 | */ |
1525 | repeat: | ||
1535 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1526 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
1536 | total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; | 1527 | total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; |
1537 | mdblocks = ext4_calc_metadata_amount(inode, total); | 1528 | mdblocks = ext4_calc_metadata_amount(inode, total); |
@@ -1540,13 +1531,14 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | |||
1540 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; | 1531 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; |
1541 | total = md_needed + nrblocks; | 1532 | total = md_needed + nrblocks; |
1542 | 1533 | ||
1543 | if (ext4_has_free_blocks(sbi, total) < total) { | 1534 | if (ext4_claim_free_blocks(sbi, total)) { |
1544 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1535 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1536 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | ||
1537 | yield(); | ||
1538 | goto repeat; | ||
1539 | } | ||
1545 | return -ENOSPC; | 1540 | return -ENOSPC; |
1546 | } | 1541 | } |
1547 | /* reduce fs free blocks counter */ | ||
1548 | percpu_counter_sub(&sbi->s_freeblocks_counter, total); | ||
1549 | |||
1550 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; | 1542 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; |
1551 | EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; | 1543 | EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; |
1552 | 1544 | ||
@@ -1559,7 +1551,25 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1559 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1551 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1560 | int total, mdb, mdb_free, release; | 1552 | int total, mdb, mdb_free, release; |
1561 | 1553 | ||
1554 | if (!to_free) | ||
1555 | return; /* Nothing to release, exit */ | ||
1556 | |||
1562 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1557 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
1558 | |||
1559 | if (!EXT4_I(inode)->i_reserved_data_blocks) { | ||
1560 | /* | ||
1561 | * if there is no reserved blocks, but we try to free some | ||
1562 | * then the counter is messed up somewhere. | ||
1563 | * but since this function is called from invalidate | ||
1564 | * page, it's harmless to return without any action | ||
1565 | */ | ||
1566 | printk(KERN_INFO "ext4 delalloc try to release %d reserved " | ||
1567 | "blocks for inode %lu, but there is no reserved " | ||
1568 | "data blocks\n", to_free, inode->i_ino); | ||
1569 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
1570 | return; | ||
1571 | } | ||
1572 | |||
1563 | /* recalculate the number of metablocks still need to be reserved */ | 1573 | /* recalculate the number of metablocks still need to be reserved */ |
1564 | total = EXT4_I(inode)->i_reserved_data_blocks - to_free; | 1574 | total = EXT4_I(inode)->i_reserved_data_blocks - to_free; |
1565 | mdb = ext4_calc_metadata_amount(inode, total); | 1575 | mdb = ext4_calc_metadata_amount(inode, total); |
@@ -1570,8 +1580,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1570 | 1580 | ||
1571 | release = to_free + mdb_free; | 1581 | release = to_free + mdb_free; |
1572 | 1582 | ||
1573 | /* update fs free blocks counter for truncate case */ | 1583 | /* update fs dirty blocks counter for truncate case */ |
1574 | percpu_counter_add(&sbi->s_freeblocks_counter, release); | 1584 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, release); |
1575 | 1585 | ||
1576 | /* update per-inode reservations */ | 1586 | /* update per-inode reservations */ |
1577 | BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); | 1587 | BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); |
@@ -1613,11 +1623,14 @@ struct mpage_da_data { | |||
1613 | unsigned long first_page, next_page; /* extent of pages */ | 1623 | unsigned long first_page, next_page; /* extent of pages */ |
1614 | get_block_t *get_block; | 1624 | get_block_t *get_block; |
1615 | struct writeback_control *wbc; | 1625 | struct writeback_control *wbc; |
1626 | int io_done; | ||
1627 | long pages_written; | ||
1628 | int retval; | ||
1616 | }; | 1629 | }; |
1617 | 1630 | ||
1618 | /* | 1631 | /* |
1619 | * mpage_da_submit_io - walks through extent of pages and try to write | 1632 | * mpage_da_submit_io - walks through extent of pages and try to write |
1620 | * them with __mpage_writepage() | 1633 | * them with writepage() call back |
1621 | * | 1634 | * |
1622 | * @mpd->inode: inode | 1635 | * @mpd->inode: inode |
1623 | * @mpd->first_page: first page of the extent | 1636 | * @mpd->first_page: first page of the extent |
@@ -1632,37 +1645,42 @@ struct mpage_da_data { | |||
1632 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | 1645 | static int mpage_da_submit_io(struct mpage_da_data *mpd) |
1633 | { | 1646 | { |
1634 | struct address_space *mapping = mpd->inode->i_mapping; | 1647 | struct address_space *mapping = mpd->inode->i_mapping; |
1635 | struct mpage_data mpd_pp = { | ||
1636 | .bio = NULL, | ||
1637 | .last_block_in_bio = 0, | ||
1638 | .get_block = mpd->get_block, | ||
1639 | .use_writepage = 1, | ||
1640 | }; | ||
1641 | int ret = 0, err, nr_pages, i; | 1648 | int ret = 0, err, nr_pages, i; |
1642 | unsigned long index, end; | 1649 | unsigned long index, end; |
1643 | struct pagevec pvec; | 1650 | struct pagevec pvec; |
1651 | long pages_skipped; | ||
1644 | 1652 | ||
1645 | BUG_ON(mpd->next_page <= mpd->first_page); | 1653 | BUG_ON(mpd->next_page <= mpd->first_page); |
1646 | |||
1647 | pagevec_init(&pvec, 0); | 1654 | pagevec_init(&pvec, 0); |
1648 | index = mpd->first_page; | 1655 | index = mpd->first_page; |
1649 | end = mpd->next_page - 1; | 1656 | end = mpd->next_page - 1; |
1650 | 1657 | ||
1651 | while (index <= end) { | 1658 | while (index <= end) { |
1652 | /* XXX: optimize tail */ | 1659 | /* |
1653 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | 1660 | * We can use PAGECACHE_TAG_DIRTY lookup here because |
1661 | * even though we have cleared the dirty flag on the page | ||
1662 | * We still keep the page in the radix tree with tag | ||
1663 | * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io. | ||
1664 | * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback | ||
1665 | * which is called via the below writepage callback. | ||
1666 | */ | ||
1667 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
1668 | PAGECACHE_TAG_DIRTY, | ||
1669 | min(end - index, | ||
1670 | (pgoff_t)PAGEVEC_SIZE-1) + 1); | ||
1654 | if (nr_pages == 0) | 1671 | if (nr_pages == 0) |
1655 | break; | 1672 | break; |
1656 | for (i = 0; i < nr_pages; i++) { | 1673 | for (i = 0; i < nr_pages; i++) { |
1657 | struct page *page = pvec.pages[i]; | 1674 | struct page *page = pvec.pages[i]; |
1658 | 1675 | ||
1659 | index = page->index; | 1676 | pages_skipped = mpd->wbc->pages_skipped; |
1660 | if (index > end) | 1677 | err = mapping->a_ops->writepage(page, mpd->wbc); |
1661 | break; | 1678 | if (!err && (pages_skipped == mpd->wbc->pages_skipped)) |
1662 | index++; | 1679 | /* |
1663 | 1680 | * have successfully written the page | |
1664 | err = __mpage_writepage(page, mpd->wbc, &mpd_pp); | 1681 | * without skipping the same |
1665 | 1682 | */ | |
1683 | mpd->pages_written++; | ||
1666 | /* | 1684 | /* |
1667 | * In error case, we have to continue because | 1685 | * In error case, we have to continue because |
1668 | * remaining pages are still locked | 1686 | * remaining pages are still locked |
@@ -1673,9 +1691,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1673 | } | 1691 | } |
1674 | pagevec_release(&pvec); | 1692 | pagevec_release(&pvec); |
1675 | } | 1693 | } |
1676 | if (mpd_pp.bio) | ||
1677 | mpage_bio_submit(WRITE, mpd_pp.bio); | ||
1678 | |||
1679 | return ret; | 1694 | return ret; |
1680 | } | 1695 | } |
1681 | 1696 | ||
@@ -1698,7 +1713,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
1698 | int blocks = exbh->b_size >> inode->i_blkbits; | 1713 | int blocks = exbh->b_size >> inode->i_blkbits; |
1699 | sector_t pblock = exbh->b_blocknr, cur_logical; | 1714 | sector_t pblock = exbh->b_blocknr, cur_logical; |
1700 | struct buffer_head *head, *bh; | 1715 | struct buffer_head *head, *bh; |
1701 | unsigned long index, end; | 1716 | pgoff_t index, end; |
1702 | struct pagevec pvec; | 1717 | struct pagevec pvec; |
1703 | int nr_pages, i; | 1718 | int nr_pages, i; |
1704 | 1719 | ||
@@ -1741,6 +1756,13 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
1741 | if (buffer_delay(bh)) { | 1756 | if (buffer_delay(bh)) { |
1742 | bh->b_blocknr = pblock; | 1757 | bh->b_blocknr = pblock; |
1743 | clear_buffer_delay(bh); | 1758 | clear_buffer_delay(bh); |
1759 | bh->b_bdev = inode->i_sb->s_bdev; | ||
1760 | } else if (buffer_unwritten(bh)) { | ||
1761 | bh->b_blocknr = pblock; | ||
1762 | clear_buffer_unwritten(bh); | ||
1763 | set_buffer_mapped(bh); | ||
1764 | set_buffer_new(bh); | ||
1765 | bh->b_bdev = inode->i_sb->s_bdev; | ||
1744 | } else if (buffer_mapped(bh)) | 1766 | } else if (buffer_mapped(bh)) |
1745 | BUG_ON(bh->b_blocknr != pblock); | 1767 | BUG_ON(bh->b_blocknr != pblock); |
1746 | 1768 | ||
@@ -1768,6 +1790,57 @@ static inline void __unmap_underlying_blocks(struct inode *inode, | |||
1768 | unmap_underlying_metadata(bdev, bh->b_blocknr + i); | 1790 | unmap_underlying_metadata(bdev, bh->b_blocknr + i); |
1769 | } | 1791 | } |
1770 | 1792 | ||
1793 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | ||
1794 | sector_t logical, long blk_cnt) | ||
1795 | { | ||
1796 | int nr_pages, i; | ||
1797 | pgoff_t index, end; | ||
1798 | struct pagevec pvec; | ||
1799 | struct inode *inode = mpd->inode; | ||
1800 | struct address_space *mapping = inode->i_mapping; | ||
1801 | |||
1802 | index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1803 | end = (logical + blk_cnt - 1) >> | ||
1804 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1805 | while (index <= end) { | ||
1806 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
1807 | if (nr_pages == 0) | ||
1808 | break; | ||
1809 | for (i = 0; i < nr_pages; i++) { | ||
1810 | struct page *page = pvec.pages[i]; | ||
1811 | index = page->index; | ||
1812 | if (index > end) | ||
1813 | break; | ||
1814 | index++; | ||
1815 | |||
1816 | BUG_ON(!PageLocked(page)); | ||
1817 | BUG_ON(PageWriteback(page)); | ||
1818 | block_invalidatepage(page, 0); | ||
1819 | ClearPageUptodate(page); | ||
1820 | unlock_page(page); | ||
1821 | } | ||
1822 | } | ||
1823 | return; | ||
1824 | } | ||
1825 | |||
1826 | static void ext4_print_free_blocks(struct inode *inode) | ||
1827 | { | ||
1828 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
1829 | printk(KERN_EMERG "Total free blocks count %lld\n", | ||
1830 | ext4_count_free_blocks(inode->i_sb)); | ||
1831 | printk(KERN_EMERG "Free/Dirty block details\n"); | ||
1832 | printk(KERN_EMERG "free_blocks=%lld\n", | ||
1833 | percpu_counter_sum(&sbi->s_freeblocks_counter)); | ||
1834 | printk(KERN_EMERG "dirty_blocks=%lld\n", | ||
1835 | percpu_counter_sum(&sbi->s_dirtyblocks_counter)); | ||
1836 | printk(KERN_EMERG "Block reservation details\n"); | ||
1837 | printk(KERN_EMERG "i_reserved_data_blocks=%lu\n", | ||
1838 | EXT4_I(inode)->i_reserved_data_blocks); | ||
1839 | printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n", | ||
1840 | EXT4_I(inode)->i_reserved_meta_blocks); | ||
1841 | return; | ||
1842 | } | ||
1843 | |||
1771 | /* | 1844 | /* |
1772 | * mpage_da_map_blocks - go through given space | 1845 | * mpage_da_map_blocks - go through given space |
1773 | * | 1846 | * |
@@ -1776,54 +1849,87 @@ static inline void __unmap_underlying_blocks(struct inode *inode, | |||
1776 | * | 1849 | * |
1777 | * The function skips space we know is already mapped to disk blocks. | 1850 | * The function skips space we know is already mapped to disk blocks. |
1778 | * | 1851 | * |
1779 | * The function ignores errors ->get_block() returns, thus real | ||
1780 | * error handling is postponed to __mpage_writepage() | ||
1781 | */ | 1852 | */ |
1782 | static void mpage_da_map_blocks(struct mpage_da_data *mpd) | 1853 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) |
1783 | { | 1854 | { |
1784 | struct buffer_head *lbh = &mpd->lbh; | 1855 | int err = 0; |
1785 | int err = 0, remain = lbh->b_size; | ||
1786 | sector_t next = lbh->b_blocknr; | ||
1787 | struct buffer_head new; | 1856 | struct buffer_head new; |
1857 | struct buffer_head *lbh = &mpd->lbh; | ||
1858 | sector_t next; | ||
1788 | 1859 | ||
1789 | /* | 1860 | /* |
1790 | * We consider only non-mapped and non-allocated blocks | 1861 | * We consider only non-mapped and non-allocated blocks |
1791 | */ | 1862 | */ |
1792 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) | 1863 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) |
1793 | return; | 1864 | return 0; |
1865 | new.b_state = lbh->b_state; | ||
1866 | new.b_blocknr = 0; | ||
1867 | new.b_size = lbh->b_size; | ||
1868 | next = lbh->b_blocknr; | ||
1869 | /* | ||
1870 | * If we didn't accumulate anything | ||
1871 | * to write simply return | ||
1872 | */ | ||
1873 | if (!new.b_size) | ||
1874 | return 0; | ||
1875 | err = mpd->get_block(mpd->inode, next, &new, 1); | ||
1876 | if (err) { | ||
1794 | 1877 | ||
1795 | while (remain) { | 1878 | /* If get block returns with error |
1796 | new.b_state = lbh->b_state; | 1879 | * we simply return. Later writepage |
1797 | new.b_blocknr = 0; | 1880 | * will redirty the page and writepages |
1798 | new.b_size = remain; | 1881 | * will find the dirty page again |
1799 | err = mpd->get_block(mpd->inode, next, &new, 1); | 1882 | */ |
1800 | if (err) { | 1883 | if (err == -EAGAIN) |
1801 | /* | 1884 | return 0; |
1802 | * Rather than implement own error handling | ||
1803 | * here, we just leave remaining blocks | ||
1804 | * unallocated and try again with ->writepage() | ||
1805 | */ | ||
1806 | break; | ||
1807 | } | ||
1808 | BUG_ON(new.b_size == 0); | ||
1809 | 1885 | ||
1810 | if (buffer_new(&new)) | 1886 | if (err == -ENOSPC && |
1811 | __unmap_underlying_blocks(mpd->inode, &new); | 1887 | ext4_count_free_blocks(mpd->inode->i_sb)) { |
1888 | mpd->retval = err; | ||
1889 | return 0; | ||
1890 | } | ||
1812 | 1891 | ||
1813 | /* | 1892 | /* |
1814 | * If blocks are delayed marked, we need to | 1893 | * get block failure will cause us |
1815 | * put actual blocknr and drop delayed bit | 1894 | * to loop in writepages. Because |
1895 | * a_ops->writepage won't be able to | ||
1896 | * make progress. The page will be redirtied | ||
1897 | * by writepage and writepages will again | ||
1898 | * try to write the same. | ||
1816 | */ | 1899 | */ |
1817 | if (buffer_delay(lbh)) | 1900 | printk(KERN_EMERG "%s block allocation failed for inode %lu " |
1818 | mpage_put_bnr_to_bhs(mpd, next, &new); | 1901 | "at logical offset %llu with max blocks " |
1819 | 1902 | "%zd with error %d\n", | |
1820 | /* go for the remaining blocks */ | 1903 | __func__, mpd->inode->i_ino, |
1821 | next += new.b_size >> mpd->inode->i_blkbits; | 1904 | (unsigned long long)next, |
1822 | remain -= new.b_size; | 1905 | lbh->b_size >> mpd->inode->i_blkbits, err); |
1906 | printk(KERN_EMERG "This should not happen.!! " | ||
1907 | "Data will be lost\n"); | ||
1908 | if (err == -ENOSPC) { | ||
1909 | ext4_print_free_blocks(mpd->inode); | ||
1910 | } | ||
1911 | /* invlaidate all the pages */ | ||
1912 | ext4_da_block_invalidatepages(mpd, next, | ||
1913 | lbh->b_size >> mpd->inode->i_blkbits); | ||
1914 | return err; | ||
1823 | } | 1915 | } |
1916 | BUG_ON(new.b_size == 0); | ||
1917 | |||
1918 | if (buffer_new(&new)) | ||
1919 | __unmap_underlying_blocks(mpd->inode, &new); | ||
1920 | |||
1921 | /* | ||
1922 | * If blocks are delayed marked, we need to | ||
1923 | * put actual blocknr and drop delayed bit | ||
1924 | */ | ||
1925 | if (buffer_delay(lbh) || buffer_unwritten(lbh)) | ||
1926 | mpage_put_bnr_to_bhs(mpd, next, &new); | ||
1927 | |||
1928 | return 0; | ||
1824 | } | 1929 | } |
1825 | 1930 | ||
1826 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay)) | 1931 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
1932 | (1 << BH_Delay) | (1 << BH_Unwritten)) | ||
1827 | 1933 | ||
1828 | /* | 1934 | /* |
1829 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks | 1935 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks |
@@ -1837,41 +1943,61 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
1837 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | 1943 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, |
1838 | sector_t logical, struct buffer_head *bh) | 1944 | sector_t logical, struct buffer_head *bh) |
1839 | { | 1945 | { |
1840 | struct buffer_head *lbh = &mpd->lbh; | ||
1841 | sector_t next; | 1946 | sector_t next; |
1947 | size_t b_size = bh->b_size; | ||
1948 | struct buffer_head *lbh = &mpd->lbh; | ||
1949 | int nrblocks = lbh->b_size >> mpd->inode->i_blkbits; | ||
1842 | 1950 | ||
1843 | next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits); | 1951 | /* check if thereserved journal credits might overflow */ |
1844 | 1952 | if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { | |
1953 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { | ||
1954 | /* | ||
1955 | * With non-extent format we are limited by the journal | ||
1956 | * credit available. Total credit needed to insert | ||
1957 | * nrblocks contiguous blocks is dependent on the | ||
1958 | * nrblocks. So limit nrblocks. | ||
1959 | */ | ||
1960 | goto flush_it; | ||
1961 | } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) > | ||
1962 | EXT4_MAX_TRANS_DATA) { | ||
1963 | /* | ||
1964 | * Adding the new buffer_head would make it cross the | ||
1965 | * allowed limit for which we have journal credit | ||
1966 | * reserved. So limit the new bh->b_size | ||
1967 | */ | ||
1968 | b_size = (EXT4_MAX_TRANS_DATA - nrblocks) << | ||
1969 | mpd->inode->i_blkbits; | ||
1970 | /* we will do mpage_da_submit_io in the next loop */ | ||
1971 | } | ||
1972 | } | ||
1845 | /* | 1973 | /* |
1846 | * First block in the extent | 1974 | * First block in the extent |
1847 | */ | 1975 | */ |
1848 | if (lbh->b_size == 0) { | 1976 | if (lbh->b_size == 0) { |
1849 | lbh->b_blocknr = logical; | 1977 | lbh->b_blocknr = logical; |
1850 | lbh->b_size = bh->b_size; | 1978 | lbh->b_size = b_size; |
1851 | lbh->b_state = bh->b_state & BH_FLAGS; | 1979 | lbh->b_state = bh->b_state & BH_FLAGS; |
1852 | return; | 1980 | return; |
1853 | } | 1981 | } |
1854 | 1982 | ||
1983 | next = lbh->b_blocknr + nrblocks; | ||
1855 | /* | 1984 | /* |
1856 | * Can we merge the block to our big extent? | 1985 | * Can we merge the block to our big extent? |
1857 | */ | 1986 | */ |
1858 | if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { | 1987 | if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { |
1859 | lbh->b_size += bh->b_size; | 1988 | lbh->b_size += b_size; |
1860 | return; | 1989 | return; |
1861 | } | 1990 | } |
1862 | 1991 | ||
1992 | flush_it: | ||
1863 | /* | 1993 | /* |
1864 | * We couldn't merge the block to our extent, so we | 1994 | * We couldn't merge the block to our extent, so we |
1865 | * need to flush current extent and start new one | 1995 | * need to flush current extent and start new one |
1866 | */ | 1996 | */ |
1867 | mpage_da_map_blocks(mpd); | 1997 | if (mpage_da_map_blocks(mpd) == 0) |
1868 | 1998 | mpage_da_submit_io(mpd); | |
1869 | /* | 1999 | mpd->io_done = 1; |
1870 | * Now start a new extent | 2000 | return; |
1871 | */ | ||
1872 | lbh->b_size = bh->b_size; | ||
1873 | lbh->b_state = bh->b_state & BH_FLAGS; | ||
1874 | lbh->b_blocknr = logical; | ||
1875 | } | 2001 | } |
1876 | 2002 | ||
1877 | /* | 2003 | /* |
@@ -1891,17 +2017,35 @@ static int __mpage_da_writepage(struct page *page, | |||
1891 | struct buffer_head *bh, *head, fake; | 2017 | struct buffer_head *bh, *head, fake; |
1892 | sector_t logical; | 2018 | sector_t logical; |
1893 | 2019 | ||
2020 | if (mpd->io_done) { | ||
2021 | /* | ||
2022 | * Rest of the page in the page_vec | ||
2023 | * redirty then and skip then. We will | ||
2024 | * try to to write them again after | ||
2025 | * starting a new transaction | ||
2026 | */ | ||
2027 | redirty_page_for_writepage(wbc, page); | ||
2028 | unlock_page(page); | ||
2029 | return MPAGE_DA_EXTENT_TAIL; | ||
2030 | } | ||
1894 | /* | 2031 | /* |
1895 | * Can we merge this page to current extent? | 2032 | * Can we merge this page to current extent? |
1896 | */ | 2033 | */ |
1897 | if (mpd->next_page != page->index) { | 2034 | if (mpd->next_page != page->index) { |
1898 | /* | 2035 | /* |
1899 | * Nope, we can't. So, we map non-allocated blocks | 2036 | * Nope, we can't. So, we map non-allocated blocks |
1900 | * and start IO on them using __mpage_writepage() | 2037 | * and start IO on them using writepage() |
1901 | */ | 2038 | */ |
1902 | if (mpd->next_page != mpd->first_page) { | 2039 | if (mpd->next_page != mpd->first_page) { |
1903 | mpage_da_map_blocks(mpd); | 2040 | if (mpage_da_map_blocks(mpd) == 0) |
1904 | mpage_da_submit_io(mpd); | 2041 | mpage_da_submit_io(mpd); |
2042 | /* | ||
2043 | * skip rest of the page in the page_vec | ||
2044 | */ | ||
2045 | mpd->io_done = 1; | ||
2046 | redirty_page_for_writepage(wbc, page); | ||
2047 | unlock_page(page); | ||
2048 | return MPAGE_DA_EXTENT_TAIL; | ||
1905 | } | 2049 | } |
1906 | 2050 | ||
1907 | /* | 2051 | /* |
@@ -1932,6 +2076,8 @@ static int __mpage_da_writepage(struct page *page, | |||
1932 | set_buffer_dirty(bh); | 2076 | set_buffer_dirty(bh); |
1933 | set_buffer_uptodate(bh); | 2077 | set_buffer_uptodate(bh); |
1934 | mpage_add_bh_to_extent(mpd, logical, bh); | 2078 | mpage_add_bh_to_extent(mpd, logical, bh); |
2079 | if (mpd->io_done) | ||
2080 | return MPAGE_DA_EXTENT_TAIL; | ||
1935 | } else { | 2081 | } else { |
1936 | /* | 2082 | /* |
1937 | * Page with regular buffer heads, just add all dirty ones | 2083 | * Page with regular buffer heads, just add all dirty ones |
@@ -1940,8 +2086,12 @@ static int __mpage_da_writepage(struct page *page, | |||
1940 | bh = head; | 2086 | bh = head; |
1941 | do { | 2087 | do { |
1942 | BUG_ON(buffer_locked(bh)); | 2088 | BUG_ON(buffer_locked(bh)); |
1943 | if (buffer_dirty(bh)) | 2089 | if (buffer_dirty(bh) && |
2090 | (!buffer_mapped(bh) || buffer_delay(bh))) { | ||
1944 | mpage_add_bh_to_extent(mpd, logical, bh); | 2091 | mpage_add_bh_to_extent(mpd, logical, bh); |
2092 | if (mpd->io_done) | ||
2093 | return MPAGE_DA_EXTENT_TAIL; | ||
2094 | } | ||
1945 | logical++; | 2095 | logical++; |
1946 | } while ((bh = bh->b_this_page) != head); | 2096 | } while ((bh = bh->b_this_page) != head); |
1947 | } | 2097 | } |
@@ -1960,46 +2110,37 @@ static int __mpage_da_writepage(struct page *page, | |||
1960 | * | 2110 | * |
1961 | * This is a library function, which implements the writepages() | 2111 | * This is a library function, which implements the writepages() |
1962 | * address_space_operation. | 2112 | * address_space_operation. |
1963 | * | ||
1964 | * In order to avoid duplication of logic that deals with partial pages, | ||
1965 | * multiple bio per page, etc, we find non-allocated blocks, allocate | ||
1966 | * them with minimal calls to ->get_block() and re-use __mpage_writepage() | ||
1967 | * | ||
1968 | * It's important that we call __mpage_writepage() only once for each | ||
1969 | * involved page, otherwise we'd have to implement more complicated logic | ||
1970 | * to deal with pages w/o PG_lock or w/ PG_writeback and so on. | ||
1971 | * | ||
1972 | * See comments to mpage_writepages() | ||
1973 | */ | 2113 | */ |
1974 | static int mpage_da_writepages(struct address_space *mapping, | 2114 | static int mpage_da_writepages(struct address_space *mapping, |
1975 | struct writeback_control *wbc, | 2115 | struct writeback_control *wbc, |
1976 | get_block_t get_block) | 2116 | struct mpage_da_data *mpd) |
1977 | { | 2117 | { |
1978 | struct mpage_da_data mpd; | ||
1979 | int ret; | 2118 | int ret; |
1980 | 2119 | ||
1981 | if (!get_block) | 2120 | if (!mpd->get_block) |
1982 | return generic_writepages(mapping, wbc); | 2121 | return generic_writepages(mapping, wbc); |
1983 | 2122 | ||
1984 | mpd.wbc = wbc; | 2123 | mpd->lbh.b_size = 0; |
1985 | mpd.inode = mapping->host; | 2124 | mpd->lbh.b_state = 0; |
1986 | mpd.lbh.b_size = 0; | 2125 | mpd->lbh.b_blocknr = 0; |
1987 | mpd.lbh.b_state = 0; | 2126 | mpd->first_page = 0; |
1988 | mpd.lbh.b_blocknr = 0; | 2127 | mpd->next_page = 0; |
1989 | mpd.first_page = 0; | 2128 | mpd->io_done = 0; |
1990 | mpd.next_page = 0; | 2129 | mpd->pages_written = 0; |
1991 | mpd.get_block = get_block; | 2130 | mpd->retval = 0; |
1992 | |||
1993 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); | ||
1994 | 2131 | ||
2132 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd); | ||
1995 | /* | 2133 | /* |
1996 | * Handle last extent of pages | 2134 | * Handle last extent of pages |
1997 | */ | 2135 | */ |
1998 | if (mpd.next_page != mpd.first_page) { | 2136 | if (!mpd->io_done && mpd->next_page != mpd->first_page) { |
1999 | mpage_da_map_blocks(&mpd); | 2137 | if (mpage_da_map_blocks(mpd) == 0) |
2000 | mpage_da_submit_io(&mpd); | 2138 | mpage_da_submit_io(mpd); |
2001 | } | ||
2002 | 2139 | ||
2140 | mpd->io_done = 1; | ||
2141 | ret = MPAGE_DA_EXTENT_TAIL; | ||
2142 | } | ||
2143 | wbc->nr_to_write -= mpd->pages_written; | ||
2003 | return ret; | 2144 | return ret; |
2004 | } | 2145 | } |
2005 | 2146 | ||
@@ -2052,18 +2193,24 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | |||
2052 | handle_t *handle = NULL; | 2193 | handle_t *handle = NULL; |
2053 | 2194 | ||
2054 | handle = ext4_journal_current_handle(); | 2195 | handle = ext4_journal_current_handle(); |
2055 | if (!handle) { | 2196 | BUG_ON(!handle); |
2056 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | 2197 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, |
2057 | bh_result, 0, 0, 0); | 2198 | bh_result, create, 0, EXT4_DELALLOC_RSVED); |
2058 | BUG_ON(!ret); | ||
2059 | } else { | ||
2060 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
2061 | bh_result, create, 0, EXT4_DELALLOC_RSVED); | ||
2062 | } | ||
2063 | |||
2064 | if (ret > 0) { | 2199 | if (ret > 0) { |
2200 | |||
2065 | bh_result->b_size = (ret << inode->i_blkbits); | 2201 | bh_result->b_size = (ret << inode->i_blkbits); |
2066 | 2202 | ||
2203 | if (ext4_should_order_data(inode)) { | ||
2204 | int retval; | ||
2205 | retval = ext4_jbd2_file_inode(handle, inode); | ||
2206 | if (retval) | ||
2207 | /* | ||
2208 | * Failed to add inode for ordered | ||
2209 | * mode. Don't update file size | ||
2210 | */ | ||
2211 | return retval; | ||
2212 | } | ||
2213 | |||
2067 | /* | 2214 | /* |
2068 | * Update on-disk size along with block allocation | 2215 | * Update on-disk size along with block allocation |
2069 | * we don't use 'extend_disksize' as size may change | 2216 | * we don't use 'extend_disksize' as size may change |
@@ -2073,18 +2220,9 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | |||
2073 | if (disksize > i_size_read(inode)) | 2220 | if (disksize > i_size_read(inode)) |
2074 | disksize = i_size_read(inode); | 2221 | disksize = i_size_read(inode); |
2075 | if (disksize > EXT4_I(inode)->i_disksize) { | 2222 | if (disksize > EXT4_I(inode)->i_disksize) { |
2076 | /* | 2223 | ext4_update_i_disksize(inode, disksize); |
2077 | * XXX: replace with spinlock if seen contended -bzzz | 2224 | ret = ext4_mark_inode_dirty(handle, inode); |
2078 | */ | 2225 | return ret; |
2079 | down_write(&EXT4_I(inode)->i_data_sem); | ||
2080 | if (disksize > EXT4_I(inode)->i_disksize) | ||
2081 | EXT4_I(inode)->i_disksize = disksize; | ||
2082 | up_write(&EXT4_I(inode)->i_data_sem); | ||
2083 | |||
2084 | if (EXT4_I(inode)->i_disksize == disksize) { | ||
2085 | ret = ext4_mark_inode_dirty(handle, inode); | ||
2086 | return ret; | ||
2087 | } | ||
2088 | } | 2226 | } |
2089 | ret = 0; | 2227 | ret = 0; |
2090 | } | 2228 | } |
@@ -2204,102 +2342,177 @@ static int ext4_da_writepage(struct page *page, | |||
2204 | } | 2342 | } |
2205 | 2343 | ||
2206 | /* | 2344 | /* |
2207 | * For now just follow the DIO way to estimate the max credits | 2345 | * This is called via ext4_da_writepages() to |
2208 | * needed to write out EXT4_MAX_WRITEBACK_PAGES. | 2346 | * calulate the total number of credits to reserve to fit |
2209 | * todo: need to calculate the max credits need for | 2347 | * a single extent allocation into a single transaction, |
2210 | * extent based files, currently the DIO credits is based on | 2348 | * ext4_da_writpeages() will loop calling this before |
2211 | * indirect-blocks mapping way. | 2349 | * the block allocation. |
2212 | * | ||
2213 | * Probably should have a generic way to calculate credits | ||
2214 | * for DIO, writepages, and truncate | ||
2215 | */ | 2350 | */ |
2216 | #define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS | 2351 | |
2217 | #define EXT4_MAX_WRITEBACK_CREDITS DIO_CREDITS | 2352 | static int ext4_da_writepages_trans_blocks(struct inode *inode) |
2353 | { | ||
2354 | int max_blocks = EXT4_I(inode)->i_reserved_data_blocks; | ||
2355 | |||
2356 | /* | ||
2357 | * With non-extent format the journal credit needed to | ||
2358 | * insert nrblocks contiguous block is dependent on | ||
2359 | * number of contiguous block. So we will limit | ||
2360 | * number of contiguous block to a sane value | ||
2361 | */ | ||
2362 | if (!(inode->i_flags & EXT4_EXTENTS_FL) && | ||
2363 | (max_blocks > EXT4_MAX_TRANS_DATA)) | ||
2364 | max_blocks = EXT4_MAX_TRANS_DATA; | ||
2365 | |||
2366 | return ext4_chunk_trans_blocks(inode, max_blocks); | ||
2367 | } | ||
2218 | 2368 | ||
2219 | static int ext4_da_writepages(struct address_space *mapping, | 2369 | static int ext4_da_writepages(struct address_space *mapping, |
2220 | struct writeback_control *wbc) | 2370 | struct writeback_control *wbc) |
2221 | { | 2371 | { |
2222 | struct inode *inode = mapping->host; | 2372 | pgoff_t index; |
2373 | int range_whole = 0; | ||
2223 | handle_t *handle = NULL; | 2374 | handle_t *handle = NULL; |
2224 | int needed_blocks; | 2375 | struct mpage_da_data mpd; |
2225 | int ret = 0; | 2376 | struct inode *inode = mapping->host; |
2226 | long to_write; | 2377 | int no_nrwrite_index_update; |
2227 | loff_t range_start = 0; | 2378 | long pages_written = 0, pages_skipped; |
2379 | int needed_blocks, ret = 0, nr_to_writebump = 0; | ||
2380 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | ||
2228 | 2381 | ||
2229 | /* | 2382 | /* |
2230 | * No pages to write? This is mainly a kludge to avoid starting | 2383 | * No pages to write? This is mainly a kludge to avoid starting |
2231 | * a transaction for special inodes like journal inode on last iput() | 2384 | * a transaction for special inodes like journal inode on last iput() |
2232 | * because that could violate lock ordering on umount | 2385 | * because that could violate lock ordering on umount |
2233 | */ | 2386 | */ |
2234 | if (!mapping->nrpages) | 2387 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
2235 | return 0; | 2388 | return 0; |
2389 | /* | ||
2390 | * Make sure nr_to_write is >= sbi->s_mb_stream_request | ||
2391 | * This make sure small files blocks are allocated in | ||
2392 | * single attempt. This ensure that small files | ||
2393 | * get less fragmented. | ||
2394 | */ | ||
2395 | if (wbc->nr_to_write < sbi->s_mb_stream_request) { | ||
2396 | nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; | ||
2397 | wbc->nr_to_write = sbi->s_mb_stream_request; | ||
2398 | } | ||
2399 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | ||
2400 | range_whole = 1; | ||
2401 | |||
2402 | if (wbc->range_cyclic) | ||
2403 | index = mapping->writeback_index; | ||
2404 | else | ||
2405 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
2406 | |||
2407 | mpd.wbc = wbc; | ||
2408 | mpd.inode = mapping->host; | ||
2236 | 2409 | ||
2237 | /* | 2410 | /* |
2238 | * Estimate the worse case needed credits to write out | 2411 | * we don't want write_cache_pages to update |
2239 | * EXT4_MAX_BUF_BLOCKS pages | 2412 | * nr_to_write and writeback_index |
2240 | */ | 2413 | */ |
2241 | needed_blocks = EXT4_MAX_WRITEBACK_CREDITS; | 2414 | no_nrwrite_index_update = wbc->no_nrwrite_index_update; |
2415 | wbc->no_nrwrite_index_update = 1; | ||
2416 | pages_skipped = wbc->pages_skipped; | ||
2417 | |||
2418 | while (!ret && wbc->nr_to_write > 0) { | ||
2242 | 2419 | ||
2243 | to_write = wbc->nr_to_write; | ||
2244 | if (!wbc->range_cyclic) { | ||
2245 | /* | 2420 | /* |
2246 | * If range_cyclic is not set force range_cont | 2421 | * we insert one extent at a time. So we need |
2247 | * and save the old writeback_index | 2422 | * credit needed for single extent allocation. |
2423 | * journalled mode is currently not supported | ||
2424 | * by delalloc | ||
2248 | */ | 2425 | */ |
2249 | wbc->range_cont = 1; | 2426 | BUG_ON(ext4_should_journal_data(inode)); |
2250 | range_start = wbc->range_start; | 2427 | needed_blocks = ext4_da_writepages_trans_blocks(inode); |
2251 | } | ||
2252 | 2428 | ||
2253 | while (!ret && to_write) { | ||
2254 | /* start a new transaction*/ | 2429 | /* start a new transaction*/ |
2255 | handle = ext4_journal_start(inode, needed_blocks); | 2430 | handle = ext4_journal_start(inode, needed_blocks); |
2256 | if (IS_ERR(handle)) { | 2431 | if (IS_ERR(handle)) { |
2257 | ret = PTR_ERR(handle); | 2432 | ret = PTR_ERR(handle); |
2433 | printk(KERN_EMERG "%s: jbd2_start: " | ||
2434 | "%ld pages, ino %lu; err %d\n", __func__, | ||
2435 | wbc->nr_to_write, inode->i_ino, ret); | ||
2436 | dump_stack(); | ||
2258 | goto out_writepages; | 2437 | goto out_writepages; |
2259 | } | 2438 | } |
2260 | if (ext4_should_order_data(inode)) { | 2439 | mpd.get_block = ext4_da_get_block_write; |
2261 | /* | 2440 | ret = mpage_da_writepages(mapping, wbc, &mpd); |
2262 | * With ordered mode we need to add | ||
2263 | * the inode to the journal handle | ||
2264 | * when we do block allocation. | ||
2265 | */ | ||
2266 | ret = ext4_jbd2_file_inode(handle, inode); | ||
2267 | if (ret) { | ||
2268 | ext4_journal_stop(handle); | ||
2269 | goto out_writepages; | ||
2270 | } | ||
2271 | |||
2272 | } | ||
2273 | /* | ||
2274 | * set the max dirty pages could be write at a time | ||
2275 | * to fit into the reserved transaction credits | ||
2276 | */ | ||
2277 | if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES) | ||
2278 | wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES; | ||
2279 | 2441 | ||
2280 | to_write -= wbc->nr_to_write; | ||
2281 | ret = mpage_da_writepages(mapping, wbc, | ||
2282 | ext4_da_get_block_write); | ||
2283 | ext4_journal_stop(handle); | 2442 | ext4_journal_stop(handle); |
2284 | if (wbc->nr_to_write) { | 2443 | |
2444 | if (mpd.retval == -ENOSPC) { | ||
2445 | /* commit the transaction which would | ||
2446 | * free blocks released in the transaction | ||
2447 | * and try again | ||
2448 | */ | ||
2449 | jbd2_journal_force_commit_nested(sbi->s_journal); | ||
2450 | wbc->pages_skipped = pages_skipped; | ||
2451 | ret = 0; | ||
2452 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { | ||
2453 | /* | ||
2454 | * got one extent now try with | ||
2455 | * rest of the pages | ||
2456 | */ | ||
2457 | pages_written += mpd.pages_written; | ||
2458 | wbc->pages_skipped = pages_skipped; | ||
2459 | ret = 0; | ||
2460 | } else if (wbc->nr_to_write) | ||
2285 | /* | 2461 | /* |
2286 | * There is no more writeout needed | 2462 | * There is no more writeout needed |
2287 | * or we requested for a noblocking writeout | 2463 | * or we requested for a noblocking writeout |
2288 | * and we found the device congested | 2464 | * and we found the device congested |
2289 | */ | 2465 | */ |
2290 | to_write += wbc->nr_to_write; | ||
2291 | break; | 2466 | break; |
2292 | } | ||
2293 | wbc->nr_to_write = to_write; | ||
2294 | } | 2467 | } |
2468 | if (pages_skipped != wbc->pages_skipped) | ||
2469 | printk(KERN_EMERG "This should not happen leaving %s " | ||
2470 | "with nr_to_write = %ld ret = %d\n", | ||
2471 | __func__, wbc->nr_to_write, ret); | ||
2472 | |||
2473 | /* Update index */ | ||
2474 | index += pages_written; | ||
2475 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | ||
2476 | /* | ||
2477 | * set the writeback_index so that range_cyclic | ||
2478 | * mode will write it back later | ||
2479 | */ | ||
2480 | mapping->writeback_index = index; | ||
2295 | 2481 | ||
2296 | out_writepages: | 2482 | out_writepages: |
2297 | wbc->nr_to_write = to_write; | 2483 | if (!no_nrwrite_index_update) |
2298 | if (range_start) | 2484 | wbc->no_nrwrite_index_update = 0; |
2299 | wbc->range_start = range_start; | 2485 | wbc->nr_to_write -= nr_to_writebump; |
2300 | return ret; | 2486 | return ret; |
2301 | } | 2487 | } |
2302 | 2488 | ||
2489 | #define FALL_BACK_TO_NONDELALLOC 1 | ||
2490 | static int ext4_nonda_switch(struct super_block *sb) | ||
2491 | { | ||
2492 | s64 free_blocks, dirty_blocks; | ||
2493 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2494 | |||
2495 | /* | ||
2496 | * switch to non delalloc mode if we are running low | ||
2497 | * on free block. The free block accounting via percpu | ||
2498 | * counters can get slightly wrong with FBC_BATCH getting | ||
2499 | * accumulated on each CPU without updating global counters | ||
2500 | * Delalloc need an accurate free block accounting. So switch | ||
2501 | * to non delalloc when we are near to error range. | ||
2502 | */ | ||
2503 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | ||
2504 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter); | ||
2505 | if (2 * free_blocks < 3 * dirty_blocks || | ||
2506 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { | ||
2507 | /* | ||
2508 | * free block count is less that 150% of dirty blocks | ||
2509 | * or free blocks is less that watermark | ||
2510 | */ | ||
2511 | return 1; | ||
2512 | } | ||
2513 | return 0; | ||
2514 | } | ||
2515 | |||
2303 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | 2516 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, |
2304 | loff_t pos, unsigned len, unsigned flags, | 2517 | loff_t pos, unsigned len, unsigned flags, |
2305 | struct page **pagep, void **fsdata) | 2518 | struct page **pagep, void **fsdata) |
@@ -2315,6 +2528,12 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
2315 | from = pos & (PAGE_CACHE_SIZE - 1); | 2528 | from = pos & (PAGE_CACHE_SIZE - 1); |
2316 | to = from + len; | 2529 | to = from + len; |
2317 | 2530 | ||
2531 | if (ext4_nonda_switch(inode->i_sb)) { | ||
2532 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; | ||
2533 | return ext4_write_begin(file, mapping, pos, | ||
2534 | len, flags, pagep, fsdata); | ||
2535 | } | ||
2536 | *fsdata = (void *)0; | ||
2318 | retry: | 2537 | retry: |
2319 | /* | 2538 | /* |
2320 | * With delayed allocation, we don't log the i_disksize update | 2539 | * With delayed allocation, we don't log the i_disksize update |
@@ -2342,6 +2561,13 @@ retry: | |||
2342 | unlock_page(page); | 2561 | unlock_page(page); |
2343 | ext4_journal_stop(handle); | 2562 | ext4_journal_stop(handle); |
2344 | page_cache_release(page); | 2563 | page_cache_release(page); |
2564 | /* | ||
2565 | * block_write_begin may have instantiated a few blocks | ||
2566 | * outside i_size. Trim these off again. Don't need | ||
2567 | * i_size_read because we hold i_mutex. | ||
2568 | */ | ||
2569 | if (pos + len > inode->i_size) | ||
2570 | vmtruncate(inode, inode->i_size); | ||
2345 | } | 2571 | } |
2346 | 2572 | ||
2347 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 2573 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -2365,7 +2591,7 @@ static int ext4_da_should_update_i_disksize(struct page *page, | |||
2365 | bh = page_buffers(page); | 2591 | bh = page_buffers(page); |
2366 | idx = offset >> inode->i_blkbits; | 2592 | idx = offset >> inode->i_blkbits; |
2367 | 2593 | ||
2368 | for (i=0; i < idx; i++) | 2594 | for (i = 0; i < idx; i++) |
2369 | bh = bh->b_this_page; | 2595 | bh = bh->b_this_page; |
2370 | 2596 | ||
2371 | if (!buffer_mapped(bh) || (buffer_delay(bh))) | 2597 | if (!buffer_mapped(bh) || (buffer_delay(bh))) |
@@ -2383,9 +2609,22 @@ static int ext4_da_write_end(struct file *file, | |||
2383 | handle_t *handle = ext4_journal_current_handle(); | 2609 | handle_t *handle = ext4_journal_current_handle(); |
2384 | loff_t new_i_size; | 2610 | loff_t new_i_size; |
2385 | unsigned long start, end; | 2611 | unsigned long start, end; |
2612 | int write_mode = (int)(unsigned long)fsdata; | ||
2613 | |||
2614 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { | ||
2615 | if (ext4_should_order_data(inode)) { | ||
2616 | return ext4_ordered_write_end(file, mapping, pos, | ||
2617 | len, copied, page, fsdata); | ||
2618 | } else if (ext4_should_writeback_data(inode)) { | ||
2619 | return ext4_writeback_write_end(file, mapping, pos, | ||
2620 | len, copied, page, fsdata); | ||
2621 | } else { | ||
2622 | BUG(); | ||
2623 | } | ||
2624 | } | ||
2386 | 2625 | ||
2387 | start = pos & (PAGE_CACHE_SIZE - 1); | 2626 | start = pos & (PAGE_CACHE_SIZE - 1); |
2388 | end = start + copied -1; | 2627 | end = start + copied - 1; |
2389 | 2628 | ||
2390 | /* | 2629 | /* |
2391 | * generic_write_end() will run mark_inode_dirty() if i_size | 2630 | * generic_write_end() will run mark_inode_dirty() if i_size |
@@ -2409,6 +2648,11 @@ static int ext4_da_write_end(struct file *file, | |||
2409 | EXT4_I(inode)->i_disksize = new_i_size; | 2648 | EXT4_I(inode)->i_disksize = new_i_size; |
2410 | } | 2649 | } |
2411 | up_write(&EXT4_I(inode)->i_data_sem); | 2650 | up_write(&EXT4_I(inode)->i_data_sem); |
2651 | /* We need to mark inode dirty even if | ||
2652 | * new_i_size is less that inode->i_size | ||
2653 | * bu greater than i_disksize.(hint delalloc) | ||
2654 | */ | ||
2655 | ext4_mark_inode_dirty(handle, inode); | ||
2412 | } | 2656 | } |
2413 | } | 2657 | } |
2414 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 2658 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
@@ -2500,7 +2744,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
2500 | return 0; | 2744 | return 0; |
2501 | } | 2745 | } |
2502 | 2746 | ||
2503 | return generic_block_bmap(mapping,block,ext4_get_block); | 2747 | return generic_block_bmap(mapping, block, ext4_get_block); |
2504 | } | 2748 | } |
2505 | 2749 | ||
2506 | static int bget_one(handle_t *handle, struct buffer_head *bh) | 2750 | static int bget_one(handle_t *handle, struct buffer_head *bh) |
@@ -3106,7 +3350,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth, | |||
3106 | if (!partial->key && *partial->p) | 3350 | if (!partial->key && *partial->p) |
3107 | /* Writer: end */ | 3351 | /* Writer: end */ |
3108 | goto no_top; | 3352 | goto no_top; |
3109 | for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) | 3353 | for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) |
3110 | ; | 3354 | ; |
3111 | /* | 3355 | /* |
3112 | * OK, we've found the last block that must survive. The rest of our | 3356 | * OK, we've found the last block that must survive. The rest of our |
@@ -3125,7 +3369,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth, | |||
3125 | } | 3369 | } |
3126 | /* Writer: end */ | 3370 | /* Writer: end */ |
3127 | 3371 | ||
3128 | while(partial > p) { | 3372 | while (partial > p) { |
3129 | brelse(partial->bh); | 3373 | brelse(partial->bh); |
3130 | partial--; | 3374 | partial--; |
3131 | } | 3375 | } |
@@ -3317,9 +3561,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
3317 | /* This zaps the entire block. Bottom up. */ | 3561 | /* This zaps the entire block. Bottom up. */ |
3318 | BUFFER_TRACE(bh, "free child branches"); | 3562 | BUFFER_TRACE(bh, "free child branches"); |
3319 | ext4_free_branches(handle, inode, bh, | 3563 | ext4_free_branches(handle, inode, bh, |
3320 | (__le32*)bh->b_data, | 3564 | (__le32 *) bh->b_data, |
3321 | (__le32*)bh->b_data + addr_per_block, | 3565 | (__le32 *) bh->b_data + addr_per_block, |
3322 | depth); | 3566 | depth); |
3323 | 3567 | ||
3324 | /* | 3568 | /* |
3325 | * We've probably journalled the indirect block several | 3569 | * We've probably journalled the indirect block several |
@@ -3486,6 +3730,9 @@ void ext4_truncate(struct inode *inode) | |||
3486 | * modify the block allocation tree. | 3730 | * modify the block allocation tree. |
3487 | */ | 3731 | */ |
3488 | down_write(&ei->i_data_sem); | 3732 | down_write(&ei->i_data_sem); |
3733 | |||
3734 | ext4_discard_preallocations(inode); | ||
3735 | |||
3489 | /* | 3736 | /* |
3490 | * The orphan list entry will now protect us from any crash which | 3737 | * The orphan list entry will now protect us from any crash which |
3491 | * occurs before the truncate completes, so it is now safe to propagate | 3738 | * occurs before the truncate completes, so it is now safe to propagate |
@@ -3555,8 +3802,6 @@ do_indirects: | |||
3555 | ; | 3802 | ; |
3556 | } | 3803 | } |
3557 | 3804 | ||
3558 | ext4_discard_reservation(inode); | ||
3559 | |||
3560 | up_write(&ei->i_data_sem); | 3805 | up_write(&ei->i_data_sem); |
3561 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 3806 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
3562 | ext4_mark_inode_dirty(handle, inode); | 3807 | ext4_mark_inode_dirty(handle, inode); |
@@ -3581,41 +3826,6 @@ out_stop: | |||
3581 | ext4_journal_stop(handle); | 3826 | ext4_journal_stop(handle); |
3582 | } | 3827 | } |
3583 | 3828 | ||
3584 | static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, | ||
3585 | unsigned long ino, struct ext4_iloc *iloc) | ||
3586 | { | ||
3587 | ext4_group_t block_group; | ||
3588 | unsigned long offset; | ||
3589 | ext4_fsblk_t block; | ||
3590 | struct ext4_group_desc *gdp; | ||
3591 | |||
3592 | if (!ext4_valid_inum(sb, ino)) { | ||
3593 | /* | ||
3594 | * This error is already checked for in namei.c unless we are | ||
3595 | * looking at an NFS filehandle, in which case no error | ||
3596 | * report is needed | ||
3597 | */ | ||
3598 | return 0; | ||
3599 | } | ||
3600 | |||
3601 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); | ||
3602 | gdp = ext4_get_group_desc(sb, block_group, NULL); | ||
3603 | if (!gdp) | ||
3604 | return 0; | ||
3605 | |||
3606 | /* | ||
3607 | * Figure out the offset within the block group inode table | ||
3608 | */ | ||
3609 | offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) * | ||
3610 | EXT4_INODE_SIZE(sb); | ||
3611 | block = ext4_inode_table(sb, gdp) + | ||
3612 | (offset >> EXT4_BLOCK_SIZE_BITS(sb)); | ||
3613 | |||
3614 | iloc->block_group = block_group; | ||
3615 | iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1); | ||
3616 | return block; | ||
3617 | } | ||
3618 | |||
3619 | /* | 3829 | /* |
3620 | * ext4_get_inode_loc returns with an extra refcount against the inode's | 3830 | * ext4_get_inode_loc returns with an extra refcount against the inode's |
3621 | * underlying buffer_head on success. If 'in_mem' is true, we have all | 3831 | * underlying buffer_head on success. If 'in_mem' is true, we have all |
@@ -3625,19 +3835,35 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, | |||
3625 | static int __ext4_get_inode_loc(struct inode *inode, | 3835 | static int __ext4_get_inode_loc(struct inode *inode, |
3626 | struct ext4_iloc *iloc, int in_mem) | 3836 | struct ext4_iloc *iloc, int in_mem) |
3627 | { | 3837 | { |
3628 | ext4_fsblk_t block; | 3838 | struct ext4_group_desc *gdp; |
3629 | struct buffer_head *bh; | 3839 | struct buffer_head *bh; |
3840 | struct super_block *sb = inode->i_sb; | ||
3841 | ext4_fsblk_t block; | ||
3842 | int inodes_per_block, inode_offset; | ||
3843 | |||
3844 | iloc->bh = 0; | ||
3845 | if (!ext4_valid_inum(sb, inode->i_ino)) | ||
3846 | return -EIO; | ||
3630 | 3847 | ||
3631 | block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc); | 3848 | iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); |
3632 | if (!block) | 3849 | gdp = ext4_get_group_desc(sb, iloc->block_group, NULL); |
3850 | if (!gdp) | ||
3633 | return -EIO; | 3851 | return -EIO; |
3634 | 3852 | ||
3635 | bh = sb_getblk(inode->i_sb, block); | 3853 | /* |
3854 | * Figure out the offset within the block group inode table | ||
3855 | */ | ||
3856 | inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb)); | ||
3857 | inode_offset = ((inode->i_ino - 1) % | ||
3858 | EXT4_INODES_PER_GROUP(sb)); | ||
3859 | block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); | ||
3860 | iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); | ||
3861 | |||
3862 | bh = sb_getblk(sb, block); | ||
3636 | if (!bh) { | 3863 | if (!bh) { |
3637 | ext4_error (inode->i_sb, "ext4_get_inode_loc", | 3864 | ext4_error(sb, "ext4_get_inode_loc", "unable to read " |
3638 | "unable to read inode block - " | 3865 | "inode block - inode=%lu, block=%llu", |
3639 | "inode=%lu, block=%llu", | 3866 | inode->i_ino, block); |
3640 | inode->i_ino, block); | ||
3641 | return -EIO; | 3867 | return -EIO; |
3642 | } | 3868 | } |
3643 | if (!buffer_uptodate(bh)) { | 3869 | if (!buffer_uptodate(bh)) { |
@@ -3665,28 +3891,12 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
3665 | */ | 3891 | */ |
3666 | if (in_mem) { | 3892 | if (in_mem) { |
3667 | struct buffer_head *bitmap_bh; | 3893 | struct buffer_head *bitmap_bh; |
3668 | struct ext4_group_desc *desc; | 3894 | int i, start; |
3669 | int inodes_per_buffer; | ||
3670 | int inode_offset, i; | ||
3671 | ext4_group_t block_group; | ||
3672 | int start; | ||
3673 | |||
3674 | block_group = (inode->i_ino - 1) / | ||
3675 | EXT4_INODES_PER_GROUP(inode->i_sb); | ||
3676 | inodes_per_buffer = bh->b_size / | ||
3677 | EXT4_INODE_SIZE(inode->i_sb); | ||
3678 | inode_offset = ((inode->i_ino - 1) % | ||
3679 | EXT4_INODES_PER_GROUP(inode->i_sb)); | ||
3680 | start = inode_offset & ~(inodes_per_buffer - 1); | ||
3681 | 3895 | ||
3682 | /* Is the inode bitmap in cache? */ | 3896 | start = inode_offset & ~(inodes_per_block - 1); |
3683 | desc = ext4_get_group_desc(inode->i_sb, | ||
3684 | block_group, NULL); | ||
3685 | if (!desc) | ||
3686 | goto make_io; | ||
3687 | 3897 | ||
3688 | bitmap_bh = sb_getblk(inode->i_sb, | 3898 | /* Is the inode bitmap in cache? */ |
3689 | ext4_inode_bitmap(inode->i_sb, desc)); | 3899 | bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); |
3690 | if (!bitmap_bh) | 3900 | if (!bitmap_bh) |
3691 | goto make_io; | 3901 | goto make_io; |
3692 | 3902 | ||
@@ -3699,14 +3909,14 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
3699 | brelse(bitmap_bh); | 3909 | brelse(bitmap_bh); |
3700 | goto make_io; | 3910 | goto make_io; |
3701 | } | 3911 | } |
3702 | for (i = start; i < start + inodes_per_buffer; i++) { | 3912 | for (i = start; i < start + inodes_per_block; i++) { |
3703 | if (i == inode_offset) | 3913 | if (i == inode_offset) |
3704 | continue; | 3914 | continue; |
3705 | if (ext4_test_bit(i, bitmap_bh->b_data)) | 3915 | if (ext4_test_bit(i, bitmap_bh->b_data)) |
3706 | break; | 3916 | break; |
3707 | } | 3917 | } |
3708 | brelse(bitmap_bh); | 3918 | brelse(bitmap_bh); |
3709 | if (i == start + inodes_per_buffer) { | 3919 | if (i == start + inodes_per_block) { |
3710 | /* all other inodes are free, so skip I/O */ | 3920 | /* all other inodes are free, so skip I/O */ |
3711 | memset(bh->b_data, 0, bh->b_size); | 3921 | memset(bh->b_data, 0, bh->b_size); |
3712 | set_buffer_uptodate(bh); | 3922 | set_buffer_uptodate(bh); |
@@ -3717,6 +3927,36 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
3717 | 3927 | ||
3718 | make_io: | 3928 | make_io: |
3719 | /* | 3929 | /* |
3930 | * If we need to do any I/O, try to pre-readahead extra | ||
3931 | * blocks from the inode table. | ||
3932 | */ | ||
3933 | if (EXT4_SB(sb)->s_inode_readahead_blks) { | ||
3934 | ext4_fsblk_t b, end, table; | ||
3935 | unsigned num; | ||
3936 | |||
3937 | table = ext4_inode_table(sb, gdp); | ||
3938 | /* Make sure s_inode_readahead_blks is a power of 2 */ | ||
3939 | while (EXT4_SB(sb)->s_inode_readahead_blks & | ||
3940 | (EXT4_SB(sb)->s_inode_readahead_blks-1)) | ||
3941 | EXT4_SB(sb)->s_inode_readahead_blks = | ||
3942 | (EXT4_SB(sb)->s_inode_readahead_blks & | ||
3943 | (EXT4_SB(sb)->s_inode_readahead_blks-1)); | ||
3944 | b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1); | ||
3945 | if (table > b) | ||
3946 | b = table; | ||
3947 | end = b + EXT4_SB(sb)->s_inode_readahead_blks; | ||
3948 | num = EXT4_INODES_PER_GROUP(sb); | ||
3949 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
3950 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) | ||
3951 | num -= le16_to_cpu(gdp->bg_itable_unused); | ||
3952 | table += num / inodes_per_block; | ||
3953 | if (end > table) | ||
3954 | end = table; | ||
3955 | while (b <= end) | ||
3956 | sb_breadahead(sb, b++); | ||
3957 | } | ||
3958 | |||
3959 | /* | ||
3720 | * There are other valid inodes in the buffer, this inode | 3960 | * There are other valid inodes in the buffer, this inode |
3721 | * has in-inode xattrs, or we don't have this inode in memory. | 3961 | * has in-inode xattrs, or we don't have this inode in memory. |
3722 | * Read the block from disk. | 3962 | * Read the block from disk. |
@@ -3726,10 +3966,9 @@ make_io: | |||
3726 | submit_bh(READ_META, bh); | 3966 | submit_bh(READ_META, bh); |
3727 | wait_on_buffer(bh); | 3967 | wait_on_buffer(bh); |
3728 | if (!buffer_uptodate(bh)) { | 3968 | if (!buffer_uptodate(bh)) { |
3729 | ext4_error(inode->i_sb, "ext4_get_inode_loc", | 3969 | ext4_error(sb, __func__, |
3730 | "unable to read inode block - " | 3970 | "unable to read inode block - inode=%lu, " |
3731 | "inode=%lu, block=%llu", | 3971 | "block=%llu", inode->i_ino, block); |
3732 | inode->i_ino, block); | ||
3733 | brelse(bh); | 3972 | brelse(bh); |
3734 | return -EIO; | 3973 | return -EIO; |
3735 | } | 3974 | } |
@@ -3821,11 +4060,10 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3821 | return inode; | 4060 | return inode; |
3822 | 4061 | ||
3823 | ei = EXT4_I(inode); | 4062 | ei = EXT4_I(inode); |
3824 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 4063 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
3825 | ei->i_acl = EXT4_ACL_NOT_CACHED; | 4064 | ei->i_acl = EXT4_ACL_NOT_CACHED; |
3826 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; | 4065 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; |
3827 | #endif | 4066 | #endif |
3828 | ei->i_block_alloc_info = NULL; | ||
3829 | 4067 | ||
3830 | ret = __ext4_get_inode_loc(inode, &iloc, 0); | 4068 | ret = __ext4_get_inode_loc(inode, &iloc, 0); |
3831 | if (ret < 0) | 4069 | if (ret < 0) |
@@ -3835,7 +4073,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3835 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | 4073 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); |
3836 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); | 4074 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); |
3837 | inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); | 4075 | inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); |
3838 | if(!(test_opt (inode->i_sb, NO_UID32))) { | 4076 | if (!(test_opt(inode->i_sb, NO_UID32))) { |
3839 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; | 4077 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; |
3840 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; | 4078 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; |
3841 | } | 4079 | } |
@@ -3853,7 +4091,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3853 | if (inode->i_mode == 0 || | 4091 | if (inode->i_mode == 0 || |
3854 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { | 4092 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { |
3855 | /* this inode is deleted */ | 4093 | /* this inode is deleted */ |
3856 | brelse (bh); | 4094 | brelse(bh); |
3857 | ret = -ESTALE; | 4095 | ret = -ESTALE; |
3858 | goto bad_inode; | 4096 | goto bad_inode; |
3859 | } | 4097 | } |
@@ -3886,7 +4124,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3886 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); | 4124 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); |
3887 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > | 4125 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > |
3888 | EXT4_INODE_SIZE(inode->i_sb)) { | 4126 | EXT4_INODE_SIZE(inode->i_sb)) { |
3889 | brelse (bh); | 4127 | brelse(bh); |
3890 | ret = -EIO; | 4128 | ret = -EIO; |
3891 | goto bad_inode; | 4129 | goto bad_inode; |
3892 | } | 4130 | } |
@@ -3939,7 +4177,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3939 | init_special_inode(inode, inode->i_mode, | 4177 | init_special_inode(inode, inode->i_mode, |
3940 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | 4178 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); |
3941 | } | 4179 | } |
3942 | brelse (iloc.bh); | 4180 | brelse(iloc.bh); |
3943 | ext4_set_inode_flags(inode); | 4181 | ext4_set_inode_flags(inode); |
3944 | unlock_new_inode(inode); | 4182 | unlock_new_inode(inode); |
3945 | return inode; | 4183 | return inode; |
@@ -3956,7 +4194,6 @@ static int ext4_inode_blocks_set(handle_t *handle, | |||
3956 | struct inode *inode = &(ei->vfs_inode); | 4194 | struct inode *inode = &(ei->vfs_inode); |
3957 | u64 i_blocks = inode->i_blocks; | 4195 | u64 i_blocks = inode->i_blocks; |
3958 | struct super_block *sb = inode->i_sb; | 4196 | struct super_block *sb = inode->i_sb; |
3959 | int err = 0; | ||
3960 | 4197 | ||
3961 | if (i_blocks <= ~0U) { | 4198 | if (i_blocks <= ~0U) { |
3962 | /* | 4199 | /* |
@@ -3966,36 +4203,27 @@ static int ext4_inode_blocks_set(handle_t *handle, | |||
3966 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 4203 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
3967 | raw_inode->i_blocks_high = 0; | 4204 | raw_inode->i_blocks_high = 0; |
3968 | ei->i_flags &= ~EXT4_HUGE_FILE_FL; | 4205 | ei->i_flags &= ~EXT4_HUGE_FILE_FL; |
3969 | } else if (i_blocks <= 0xffffffffffffULL) { | 4206 | return 0; |
4207 | } | ||
4208 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) | ||
4209 | return -EFBIG; | ||
4210 | |||
4211 | if (i_blocks <= 0xffffffffffffULL) { | ||
3970 | /* | 4212 | /* |
3971 | * i_blocks can be represented in a 48 bit variable | 4213 | * i_blocks can be represented in a 48 bit variable |
3972 | * as multiple of 512 bytes | 4214 | * as multiple of 512 bytes |
3973 | */ | 4215 | */ |
3974 | err = ext4_update_rocompat_feature(handle, sb, | ||
3975 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
3976 | if (err) | ||
3977 | goto err_out; | ||
3978 | /* i_block is stored in the split 48 bit fields */ | ||
3979 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 4216 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
3980 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); | 4217 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); |
3981 | ei->i_flags &= ~EXT4_HUGE_FILE_FL; | 4218 | ei->i_flags &= ~EXT4_HUGE_FILE_FL; |
3982 | } else { | 4219 | } else { |
3983 | /* | ||
3984 | * i_blocks should be represented in a 48 bit variable | ||
3985 | * as multiple of file system block size | ||
3986 | */ | ||
3987 | err = ext4_update_rocompat_feature(handle, sb, | ||
3988 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
3989 | if (err) | ||
3990 | goto err_out; | ||
3991 | ei->i_flags |= EXT4_HUGE_FILE_FL; | 4220 | ei->i_flags |= EXT4_HUGE_FILE_FL; |
3992 | /* i_block is stored in file system block size */ | 4221 | /* i_block is stored in file system block size */ |
3993 | i_blocks = i_blocks >> (inode->i_blkbits - 9); | 4222 | i_blocks = i_blocks >> (inode->i_blkbits - 9); |
3994 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 4223 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
3995 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); | 4224 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); |
3996 | } | 4225 | } |
3997 | err_out: | 4226 | return 0; |
3998 | return err; | ||
3999 | } | 4227 | } |
4000 | 4228 | ||
4001 | /* | 4229 | /* |
@@ -4021,14 +4249,14 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4021 | 4249 | ||
4022 | ext4_get_inode_flags(ei); | 4250 | ext4_get_inode_flags(ei); |
4023 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); | 4251 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); |
4024 | if(!(test_opt(inode->i_sb, NO_UID32))) { | 4252 | if (!(test_opt(inode->i_sb, NO_UID32))) { |
4025 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); | 4253 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); |
4026 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); | 4254 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); |
4027 | /* | 4255 | /* |
4028 | * Fix up interoperability with old kernels. Otherwise, old inodes get | 4256 | * Fix up interoperability with old kernels. Otherwise, old inodes get |
4029 | * re-used with the upper 16 bits of the uid/gid intact | 4257 | * re-used with the upper 16 bits of the uid/gid intact |
4030 | */ | 4258 | */ |
4031 | if(!ei->i_dtime) { | 4259 | if (!ei->i_dtime) { |
4032 | raw_inode->i_uid_high = | 4260 | raw_inode->i_uid_high = |
4033 | cpu_to_le16(high_16_bits(inode->i_uid)); | 4261 | cpu_to_le16(high_16_bits(inode->i_uid)); |
4034 | raw_inode->i_gid_high = | 4262 | raw_inode->i_gid_high = |
@@ -4116,7 +4344,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4116 | ei->i_state &= ~EXT4_STATE_NEW; | 4344 | ei->i_state &= ~EXT4_STATE_NEW; |
4117 | 4345 | ||
4118 | out_brelse: | 4346 | out_brelse: |
4119 | brelse (bh); | 4347 | brelse(bh); |
4120 | ext4_std_error(inode->i_sb, err); | 4348 | ext4_std_error(inode->i_sb, err); |
4121 | return err; | 4349 | return err; |
4122 | } | 4350 | } |
@@ -4324,57 +4552,129 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
4324 | return 0; | 4552 | return 0; |
4325 | } | 4553 | } |
4326 | 4554 | ||
4555 | static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, | ||
4556 | int chunk) | ||
4557 | { | ||
4558 | int indirects; | ||
4559 | |||
4560 | /* if nrblocks are contiguous */ | ||
4561 | if (chunk) { | ||
4562 | /* | ||
4563 | * With N contiguous data blocks, it need at most | ||
4564 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks | ||
4565 | * 2 dindirect blocks | ||
4566 | * 1 tindirect block | ||
4567 | */ | ||
4568 | indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
4569 | return indirects + 3; | ||
4570 | } | ||
4571 | /* | ||
4572 | * if nrblocks are not contiguous, worse case, each block touch | ||
4573 | * a indirect block, and each indirect block touch a double indirect | ||
4574 | * block, plus a triple indirect block | ||
4575 | */ | ||
4576 | indirects = nrblocks * 2 + 1; | ||
4577 | return indirects; | ||
4578 | } | ||
4579 | |||
4580 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | ||
4581 | { | ||
4582 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
4583 | return ext4_indirect_trans_blocks(inode, nrblocks, 0); | ||
4584 | return ext4_ext_index_trans_blocks(inode, nrblocks, 0); | ||
4585 | } | ||
4327 | /* | 4586 | /* |
4328 | * How many blocks doth make a writepage()? | 4587 | * Account for index blocks, block groups bitmaps and block group |
4329 | * | 4588 | * descriptor blocks if modify datablocks and index blocks |
4330 | * With N blocks per page, it may be: | 4589 | * worse case, the indexs blocks spread over different block groups |
4331 | * N data blocks | ||
4332 | * 2 indirect block | ||
4333 | * 2 dindirect | ||
4334 | * 1 tindirect | ||
4335 | * N+5 bitmap blocks (from the above) | ||
4336 | * N+5 group descriptor summary blocks | ||
4337 | * 1 inode block | ||
4338 | * 1 superblock. | ||
4339 | * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files | ||
4340 | * | 4590 | * |
4341 | * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS | 4591 | * If datablocks are discontiguous, they are possible to spread over |
4592 | * different block groups too. If they are contiugous, with flexbg, | ||
4593 | * they could still across block group boundary. | ||
4342 | * | 4594 | * |
4343 | * With ordered or writeback data it's the same, less the N data blocks. | 4595 | * Also account for superblock, inode, quota and xattr blocks |
4596 | */ | ||
4597 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | ||
4598 | { | ||
4599 | int groups, gdpblocks; | ||
4600 | int idxblocks; | ||
4601 | int ret = 0; | ||
4602 | |||
4603 | /* | ||
4604 | * How many index blocks need to touch to modify nrblocks? | ||
4605 | * The "Chunk" flag indicating whether the nrblocks is | ||
4606 | * physically contiguous on disk | ||
4607 | * | ||
4608 | * For Direct IO and fallocate, they calls get_block to allocate | ||
4609 | * one single extent at a time, so they could set the "Chunk" flag | ||
4610 | */ | ||
4611 | idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk); | ||
4612 | |||
4613 | ret = idxblocks; | ||
4614 | |||
4615 | /* | ||
4616 | * Now let's see how many group bitmaps and group descriptors need | ||
4617 | * to account | ||
4618 | */ | ||
4619 | groups = idxblocks; | ||
4620 | if (chunk) | ||
4621 | groups += 1; | ||
4622 | else | ||
4623 | groups += nrblocks; | ||
4624 | |||
4625 | gdpblocks = groups; | ||
4626 | if (groups > EXT4_SB(inode->i_sb)->s_groups_count) | ||
4627 | groups = EXT4_SB(inode->i_sb)->s_groups_count; | ||
4628 | if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) | ||
4629 | gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; | ||
4630 | |||
4631 | /* bitmaps and block group descriptor blocks */ | ||
4632 | ret += groups + gdpblocks; | ||
4633 | |||
4634 | /* Blocks for super block, inode, quota and xattr blocks */ | ||
4635 | ret += EXT4_META_TRANS_BLOCKS(inode->i_sb); | ||
4636 | |||
4637 | return ret; | ||
4638 | } | ||
4639 | |||
4640 | /* | ||
4641 | * Calulate the total number of credits to reserve to fit | ||
4642 | * the modification of a single pages into a single transaction, | ||
4643 | * which may include multiple chunks of block allocations. | ||
4344 | * | 4644 | * |
4345 | * If the inode's direct blocks can hold an integral number of pages then a | 4645 | * This could be called via ext4_write_begin() |
4346 | * page cannot straddle two indirect blocks, and we can only touch one indirect | ||
4347 | * and dindirect block, and the "5" above becomes "3". | ||
4348 | * | 4646 | * |
4349 | * This still overestimates under most circumstances. If we were to pass the | 4647 | * We need to consider the worse case, when |
4350 | * start and end offsets in here as well we could do block_to_path() on each | 4648 | * one new block per extent. |
4351 | * block and work out the exact number of indirects which are touched. Pah. | ||
4352 | */ | 4649 | */ |
4353 | |||
4354 | int ext4_writepage_trans_blocks(struct inode *inode) | 4650 | int ext4_writepage_trans_blocks(struct inode *inode) |
4355 | { | 4651 | { |
4356 | int bpp = ext4_journal_blocks_per_page(inode); | 4652 | int bpp = ext4_journal_blocks_per_page(inode); |
4357 | int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3; | ||
4358 | int ret; | 4653 | int ret; |
4359 | 4654 | ||
4360 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 4655 | ret = ext4_meta_trans_blocks(inode, bpp, 0); |
4361 | return ext4_ext_writepage_trans_blocks(inode, bpp); | ||
4362 | 4656 | ||
4657 | /* Account for data blocks for journalled mode */ | ||
4363 | if (ext4_should_journal_data(inode)) | 4658 | if (ext4_should_journal_data(inode)) |
4364 | ret = 3 * (bpp + indirects) + 2; | 4659 | ret += bpp; |
4365 | else | ||
4366 | ret = 2 * (bpp + indirects) + 2; | ||
4367 | |||
4368 | #ifdef CONFIG_QUOTA | ||
4369 | /* We know that structure was already allocated during DQUOT_INIT so | ||
4370 | * we will be updating only the data blocks + inodes */ | ||
4371 | ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | ||
4372 | #endif | ||
4373 | |||
4374 | return ret; | 4660 | return ret; |
4375 | } | 4661 | } |
4376 | 4662 | ||
4377 | /* | 4663 | /* |
4664 | * Calculate the journal credits for a chunk of data modification. | ||
4665 | * | ||
4666 | * This is called from DIO, fallocate or whoever calling | ||
4667 | * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks. | ||
4668 | * | ||
4669 | * journal buffers for data blocks are not included here, as DIO | ||
4670 | * and fallocate do no need to journal data buffers. | ||
4671 | */ | ||
4672 | int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks) | ||
4673 | { | ||
4674 | return ext4_meta_trans_blocks(inode, nrblocks, 1); | ||
4675 | } | ||
4676 | |||
4677 | /* | ||
4378 | * The caller must have previously called ext4_reserve_inode_write(). | 4678 | * The caller must have previously called ext4_reserve_inode_write(). |
4379 | * Give this, we know that the caller already has write access to iloc->bh. | 4679 | * Give this, we know that the caller already has write access to iloc->bh. |
4380 | */ | 4680 | */ |
@@ -4647,6 +4947,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
4647 | loff_t size; | 4947 | loff_t size; |
4648 | unsigned long len; | 4948 | unsigned long len; |
4649 | int ret = -EINVAL; | 4949 | int ret = -EINVAL; |
4950 | void *fsdata; | ||
4650 | struct file *file = vma->vm_file; | 4951 | struct file *file = vma->vm_file; |
4651 | struct inode *inode = file->f_path.dentry->d_inode; | 4952 | struct inode *inode = file->f_path.dentry->d_inode; |
4652 | struct address_space *mapping = inode->i_mapping; | 4953 | struct address_space *mapping = inode->i_mapping; |
@@ -4685,11 +4986,11 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
4685 | * on the same page though | 4986 | * on the same page though |
4686 | */ | 4987 | */ |
4687 | ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), | 4988 | ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), |
4688 | len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | 4989 | len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); |
4689 | if (ret < 0) | 4990 | if (ret < 0) |
4690 | goto out_unlock; | 4991 | goto out_unlock; |
4691 | ret = mapping->a_ops->write_end(file, mapping, page_offset(page), | 4992 | ret = mapping->a_ops->write_end(file, mapping, page_offset(page), |
4692 | len, len, page, NULL); | 4993 | len, len, page, fsdata); |
4693 | if (ret < 0) | 4994 | if (ret < 0) |
4694 | goto out_unlock; | 4995 | goto out_unlock; |
4695 | ret = 0; | 4996 | ret = 0; |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7a6c2f1faba6..dc99b4776d58 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -23,9 +23,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
23 | struct inode *inode = filp->f_dentry->d_inode; | 23 | struct inode *inode = filp->f_dentry->d_inode; |
24 | struct ext4_inode_info *ei = EXT4_I(inode); | 24 | struct ext4_inode_info *ei = EXT4_I(inode); |
25 | unsigned int flags; | 25 | unsigned int flags; |
26 | unsigned short rsv_window_size; | ||
27 | 26 | ||
28 | ext4_debug ("cmd = %u, arg = %lu\n", cmd, arg); | 27 | ext4_debug("cmd = %u, arg = %lu\n", cmd, arg); |
29 | 28 | ||
30 | switch (cmd) { | 29 | switch (cmd) { |
31 | case EXT4_IOC_GETFLAGS: | 30 | case EXT4_IOC_GETFLAGS: |
@@ -34,7 +33,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
34 | return put_user(flags, (int __user *) arg); | 33 | return put_user(flags, (int __user *) arg); |
35 | case EXT4_IOC_SETFLAGS: { | 34 | case EXT4_IOC_SETFLAGS: { |
36 | handle_t *handle = NULL; | 35 | handle_t *handle = NULL; |
37 | int err; | 36 | int err, migrate = 0; |
38 | struct ext4_iloc iloc; | 37 | struct ext4_iloc iloc; |
39 | unsigned int oldflags; | 38 | unsigned int oldflags; |
40 | unsigned int jflag; | 39 | unsigned int jflag; |
@@ -82,6 +81,17 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
82 | if (!capable(CAP_SYS_RESOURCE)) | 81 | if (!capable(CAP_SYS_RESOURCE)) |
83 | goto flags_out; | 82 | goto flags_out; |
84 | } | 83 | } |
84 | if (oldflags & EXT4_EXTENTS_FL) { | ||
85 | /* We don't support clearning extent flags */ | ||
86 | if (!(flags & EXT4_EXTENTS_FL)) { | ||
87 | err = -EOPNOTSUPP; | ||
88 | goto flags_out; | ||
89 | } | ||
90 | } else if (flags & EXT4_EXTENTS_FL) { | ||
91 | /* migrate the file */ | ||
92 | migrate = 1; | ||
93 | flags &= ~EXT4_EXTENTS_FL; | ||
94 | } | ||
85 | 95 | ||
86 | handle = ext4_journal_start(inode, 1); | 96 | handle = ext4_journal_start(inode, 1); |
87 | if (IS_ERR(handle)) { | 97 | if (IS_ERR(handle)) { |
@@ -109,6 +119,10 @@ flags_err: | |||
109 | 119 | ||
110 | if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) | 120 | if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) |
111 | err = ext4_change_inode_journal_flag(inode, jflag); | 121 | err = ext4_change_inode_journal_flag(inode, jflag); |
122 | if (err) | ||
123 | goto flags_out; | ||
124 | if (migrate) | ||
125 | err = ext4_ext_migrate(inode); | ||
112 | flags_out: | 126 | flags_out: |
113 | mutex_unlock(&inode->i_mutex); | 127 | mutex_unlock(&inode->i_mutex); |
114 | mnt_drop_write(filp->f_path.mnt); | 128 | mnt_drop_write(filp->f_path.mnt); |
@@ -175,53 +189,10 @@ setversion_out: | |||
175 | return ret; | 189 | return ret; |
176 | } | 190 | } |
177 | #endif | 191 | #endif |
178 | case EXT4_IOC_GETRSVSZ: | ||
179 | if (test_opt(inode->i_sb, RESERVATION) | ||
180 | && S_ISREG(inode->i_mode) | ||
181 | && ei->i_block_alloc_info) { | ||
182 | rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size; | ||
183 | return put_user(rsv_window_size, (int __user *)arg); | ||
184 | } | ||
185 | return -ENOTTY; | ||
186 | case EXT4_IOC_SETRSVSZ: { | ||
187 | int err; | ||
188 | |||
189 | if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) | ||
190 | return -ENOTTY; | ||
191 | |||
192 | if (!is_owner_or_cap(inode)) | ||
193 | return -EACCES; | ||
194 | |||
195 | if (get_user(rsv_window_size, (int __user *)arg)) | ||
196 | return -EFAULT; | ||
197 | |||
198 | err = mnt_want_write(filp->f_path.mnt); | ||
199 | if (err) | ||
200 | return err; | ||
201 | |||
202 | if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS) | ||
203 | rsv_window_size = EXT4_MAX_RESERVE_BLOCKS; | ||
204 | |||
205 | /* | ||
206 | * need to allocate reservation structure for this inode | ||
207 | * before set the window size | ||
208 | */ | ||
209 | down_write(&ei->i_data_sem); | ||
210 | if (!ei->i_block_alloc_info) | ||
211 | ext4_init_block_alloc_info(inode); | ||
212 | |||
213 | if (ei->i_block_alloc_info){ | ||
214 | struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; | ||
215 | rsv->rsv_goal_size = rsv_window_size; | ||
216 | } | ||
217 | up_write(&ei->i_data_sem); | ||
218 | mnt_drop_write(filp->f_path.mnt); | ||
219 | return 0; | ||
220 | } | ||
221 | case EXT4_IOC_GROUP_EXTEND: { | 192 | case EXT4_IOC_GROUP_EXTEND: { |
222 | ext4_fsblk_t n_blocks_count; | 193 | ext4_fsblk_t n_blocks_count; |
223 | struct super_block *sb = inode->i_sb; | 194 | struct super_block *sb = inode->i_sb; |
224 | int err; | 195 | int err, err2; |
225 | 196 | ||
226 | if (!capable(CAP_SYS_RESOURCE)) | 197 | if (!capable(CAP_SYS_RESOURCE)) |
227 | return -EPERM; | 198 | return -EPERM; |
@@ -235,8 +206,10 @@ setversion_out: | |||
235 | 206 | ||
236 | err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); | 207 | err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); |
237 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 208 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
238 | jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 209 | err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
239 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 210 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
211 | if (err == 0) | ||
212 | err = err2; | ||
240 | mnt_drop_write(filp->f_path.mnt); | 213 | mnt_drop_write(filp->f_path.mnt); |
241 | 214 | ||
242 | return err; | 215 | return err; |
@@ -244,7 +217,7 @@ setversion_out: | |||
244 | case EXT4_IOC_GROUP_ADD: { | 217 | case EXT4_IOC_GROUP_ADD: { |
245 | struct ext4_new_group_data input; | 218 | struct ext4_new_group_data input; |
246 | struct super_block *sb = inode->i_sb; | 219 | struct super_block *sb = inode->i_sb; |
247 | int err; | 220 | int err, err2; |
248 | 221 | ||
249 | if (!capable(CAP_SYS_RESOURCE)) | 222 | if (!capable(CAP_SYS_RESOURCE)) |
250 | return -EPERM; | 223 | return -EPERM; |
@@ -259,15 +232,36 @@ setversion_out: | |||
259 | 232 | ||
260 | err = ext4_group_add(sb, &input); | 233 | err = ext4_group_add(sb, &input); |
261 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 234 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
262 | jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 235 | err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
263 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 236 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
237 | if (err == 0) | ||
238 | err = err2; | ||
264 | mnt_drop_write(filp->f_path.mnt); | 239 | mnt_drop_write(filp->f_path.mnt); |
265 | 240 | ||
266 | return err; | 241 | return err; |
267 | } | 242 | } |
268 | 243 | ||
269 | case EXT4_IOC_MIGRATE: | 244 | case EXT4_IOC_MIGRATE: |
270 | return ext4_ext_migrate(inode, filp, cmd, arg); | 245 | { |
246 | int err; | ||
247 | if (!is_owner_or_cap(inode)) | ||
248 | return -EACCES; | ||
249 | |||
250 | err = mnt_want_write(filp->f_path.mnt); | ||
251 | if (err) | ||
252 | return err; | ||
253 | /* | ||
254 | * inode_mutex prevent write and truncate on the file. | ||
255 | * Read still goes through. We take i_data_sem in | ||
256 | * ext4_ext_swap_inode_data before we switch the | ||
257 | * inode format to prevent read. | ||
258 | */ | ||
259 | mutex_lock(&(inode->i_mutex)); | ||
260 | err = ext4_ext_migrate(inode); | ||
261 | mutex_unlock(&(inode->i_mutex)); | ||
262 | mnt_drop_write(filp->f_path.mnt); | ||
263 | return err; | ||
264 | } | ||
271 | 265 | ||
272 | default: | 266 | default: |
273 | return -ENOTTY; | 267 | return -ENOTTY; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 865e9ddb44d4..dfe17a134052 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -477,9 +477,10 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) | |||
477 | b2 = (unsigned char *) bitmap; | 477 | b2 = (unsigned char *) bitmap; |
478 | for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { | 478 | for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { |
479 | if (b1[i] != b2[i]) { | 479 | if (b1[i] != b2[i]) { |
480 | printk("corruption in group %lu at byte %u(%u):" | 480 | printk(KERN_ERR "corruption in group %lu " |
481 | " %x in copy != %x on disk/prealloc\n", | 481 | "at byte %u(%u): %x in copy != %x " |
482 | e4b->bd_group, i, i * 8, b1[i], b2[i]); | 482 | "on disk/prealloc\n", |
483 | e4b->bd_group, i, i * 8, b1[i], b2[i]); | ||
483 | BUG(); | 484 | BUG(); |
484 | } | 485 | } |
485 | } | 486 | } |
@@ -533,9 +534,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
533 | void *buddy; | 534 | void *buddy; |
534 | void *buddy2; | 535 | void *buddy2; |
535 | 536 | ||
536 | if (!test_opt(sb, MBALLOC)) | ||
537 | return 0; | ||
538 | |||
539 | { | 537 | { |
540 | static int mb_check_counter; | 538 | static int mb_check_counter; |
541 | if (mb_check_counter++ % 100 != 0) | 539 | if (mb_check_counter++ % 100 != 0) |
@@ -784,9 +782,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
784 | if (bh[i] == NULL) | 782 | if (bh[i] == NULL) |
785 | goto out; | 783 | goto out; |
786 | 784 | ||
787 | if (bh_uptodate_or_lock(bh[i])) | 785 | if (buffer_uptodate(bh[i]) && |
786 | !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) | ||
788 | continue; | 787 | continue; |
789 | 788 | ||
789 | lock_buffer(bh[i]); | ||
790 | spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); | 790 | spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); |
791 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 791 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
792 | ext4_init_block_bitmap(sb, bh[i], | 792 | ext4_init_block_bitmap(sb, bh[i], |
@@ -2169,9 +2169,10 @@ static void ext4_mb_history_release(struct super_block *sb) | |||
2169 | { | 2169 | { |
2170 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2170 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2171 | 2171 | ||
2172 | remove_proc_entry("mb_groups", sbi->s_mb_proc); | 2172 | if (sbi->s_proc != NULL) { |
2173 | remove_proc_entry("mb_history", sbi->s_mb_proc); | 2173 | remove_proc_entry("mb_groups", sbi->s_proc); |
2174 | 2174 | remove_proc_entry("mb_history", sbi->s_proc); | |
2175 | } | ||
2175 | kfree(sbi->s_mb_history); | 2176 | kfree(sbi->s_mb_history); |
2176 | } | 2177 | } |
2177 | 2178 | ||
@@ -2180,10 +2181,10 @@ static void ext4_mb_history_init(struct super_block *sb) | |||
2180 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2181 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2181 | int i; | 2182 | int i; |
2182 | 2183 | ||
2183 | if (sbi->s_mb_proc != NULL) { | 2184 | if (sbi->s_proc != NULL) { |
2184 | proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc, | 2185 | proc_create_data("mb_history", S_IRUGO, sbi->s_proc, |
2185 | &ext4_mb_seq_history_fops, sb); | 2186 | &ext4_mb_seq_history_fops, sb); |
2186 | proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc, | 2187 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, |
2187 | &ext4_mb_seq_groups_fops, sb); | 2188 | &ext4_mb_seq_groups_fops, sb); |
2188 | } | 2189 | } |
2189 | 2190 | ||
@@ -2299,6 +2300,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2299 | } | 2300 | } |
2300 | 2301 | ||
2301 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2302 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
2303 | meta_group_info[i]->bb_free_root.rb_node = NULL;; | ||
2302 | 2304 | ||
2303 | #ifdef DOUBLE_CHECK | 2305 | #ifdef DOUBLE_CHECK |
2304 | { | 2306 | { |
@@ -2485,19 +2487,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2485 | unsigned max; | 2487 | unsigned max; |
2486 | int ret; | 2488 | int ret; |
2487 | 2489 | ||
2488 | if (!test_opt(sb, MBALLOC)) | ||
2489 | return 0; | ||
2490 | |||
2491 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); | 2490 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); |
2492 | 2491 | ||
2493 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); | 2492 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); |
2494 | if (sbi->s_mb_offsets == NULL) { | 2493 | if (sbi->s_mb_offsets == NULL) { |
2495 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2496 | return -ENOMEM; | 2494 | return -ENOMEM; |
2497 | } | 2495 | } |
2498 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | 2496 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); |
2499 | if (sbi->s_mb_maxs == NULL) { | 2497 | if (sbi->s_mb_maxs == NULL) { |
2500 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2501 | kfree(sbi->s_mb_maxs); | 2498 | kfree(sbi->s_mb_maxs); |
2502 | return -ENOMEM; | 2499 | return -ENOMEM; |
2503 | } | 2500 | } |
@@ -2520,16 +2517,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2520 | /* init file for buddy data */ | 2517 | /* init file for buddy data */ |
2521 | ret = ext4_mb_init_backend(sb); | 2518 | ret = ext4_mb_init_backend(sb); |
2522 | if (ret != 0) { | 2519 | if (ret != 0) { |
2523 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2524 | kfree(sbi->s_mb_offsets); | 2520 | kfree(sbi->s_mb_offsets); |
2525 | kfree(sbi->s_mb_maxs); | 2521 | kfree(sbi->s_mb_maxs); |
2526 | return ret; | 2522 | return ret; |
2527 | } | 2523 | } |
2528 | 2524 | ||
2529 | spin_lock_init(&sbi->s_md_lock); | 2525 | spin_lock_init(&sbi->s_md_lock); |
2530 | INIT_LIST_HEAD(&sbi->s_active_transaction); | ||
2531 | INIT_LIST_HEAD(&sbi->s_closed_transaction); | ||
2532 | INIT_LIST_HEAD(&sbi->s_committed_transaction); | ||
2533 | spin_lock_init(&sbi->s_bal_lock); | 2526 | spin_lock_init(&sbi->s_bal_lock); |
2534 | 2527 | ||
2535 | sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; | 2528 | sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; |
@@ -2540,17 +2533,15 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2540 | sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; | 2533 | sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; |
2541 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; | 2534 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; |
2542 | 2535 | ||
2543 | i = sizeof(struct ext4_locality_group) * nr_cpu_ids; | 2536 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); |
2544 | sbi->s_locality_groups = kmalloc(i, GFP_KERNEL); | ||
2545 | if (sbi->s_locality_groups == NULL) { | 2537 | if (sbi->s_locality_groups == NULL) { |
2546 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2547 | kfree(sbi->s_mb_offsets); | 2538 | kfree(sbi->s_mb_offsets); |
2548 | kfree(sbi->s_mb_maxs); | 2539 | kfree(sbi->s_mb_maxs); |
2549 | return -ENOMEM; | 2540 | return -ENOMEM; |
2550 | } | 2541 | } |
2551 | for (i = 0; i < nr_cpu_ids; i++) { | 2542 | for_each_possible_cpu(i) { |
2552 | struct ext4_locality_group *lg; | 2543 | struct ext4_locality_group *lg; |
2553 | lg = &sbi->s_locality_groups[i]; | 2544 | lg = per_cpu_ptr(sbi->s_locality_groups, i); |
2554 | mutex_init(&lg->lg_mutex); | 2545 | mutex_init(&lg->lg_mutex); |
2555 | for (j = 0; j < PREALLOC_TB_SIZE; j++) | 2546 | for (j = 0; j < PREALLOC_TB_SIZE; j++) |
2556 | INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); | 2547 | INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); |
@@ -2560,7 +2551,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2560 | ext4_mb_init_per_dev_proc(sb); | 2551 | ext4_mb_init_per_dev_proc(sb); |
2561 | ext4_mb_history_init(sb); | 2552 | ext4_mb_history_init(sb); |
2562 | 2553 | ||
2563 | printk("EXT4-fs: mballoc enabled\n"); | 2554 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; |
2555 | |||
2556 | printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); | ||
2564 | return 0; | 2557 | return 0; |
2565 | } | 2558 | } |
2566 | 2559 | ||
@@ -2575,7 +2568,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) | |||
2575 | pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); | 2568 | pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); |
2576 | list_del(&pa->pa_group_list); | 2569 | list_del(&pa->pa_group_list); |
2577 | count++; | 2570 | count++; |
2578 | kfree(pa); | 2571 | kmem_cache_free(ext4_pspace_cachep, pa); |
2579 | } | 2572 | } |
2580 | if (count) | 2573 | if (count) |
2581 | mb_debug("mballoc: %u PAs left\n", count); | 2574 | mb_debug("mballoc: %u PAs left\n", count); |
@@ -2589,18 +2582,6 @@ int ext4_mb_release(struct super_block *sb) | |||
2589 | struct ext4_group_info *grinfo; | 2582 | struct ext4_group_info *grinfo; |
2590 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2583 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2591 | 2584 | ||
2592 | if (!test_opt(sb, MBALLOC)) | ||
2593 | return 0; | ||
2594 | |||
2595 | /* release freed, non-committed blocks */ | ||
2596 | spin_lock(&sbi->s_md_lock); | ||
2597 | list_splice_init(&sbi->s_closed_transaction, | ||
2598 | &sbi->s_committed_transaction); | ||
2599 | list_splice_init(&sbi->s_active_transaction, | ||
2600 | &sbi->s_committed_transaction); | ||
2601 | spin_unlock(&sbi->s_md_lock); | ||
2602 | ext4_mb_free_committed_blocks(sb); | ||
2603 | |||
2604 | if (sbi->s_group_info) { | 2585 | if (sbi->s_group_info) { |
2605 | for (i = 0; i < sbi->s_groups_count; i++) { | 2586 | for (i = 0; i < sbi->s_groups_count; i++) { |
2606 | grinfo = ext4_get_group_info(sb, i); | 2587 | grinfo = ext4_get_group_info(sb, i); |
@@ -2647,69 +2628,64 @@ int ext4_mb_release(struct super_block *sb) | |||
2647 | atomic_read(&sbi->s_mb_discarded)); | 2628 | atomic_read(&sbi->s_mb_discarded)); |
2648 | } | 2629 | } |
2649 | 2630 | ||
2650 | kfree(sbi->s_locality_groups); | 2631 | free_percpu(sbi->s_locality_groups); |
2651 | |||
2652 | ext4_mb_history_release(sb); | 2632 | ext4_mb_history_release(sb); |
2653 | ext4_mb_destroy_per_dev_proc(sb); | 2633 | ext4_mb_destroy_per_dev_proc(sb); |
2654 | 2634 | ||
2655 | return 0; | 2635 | return 0; |
2656 | } | 2636 | } |
2657 | 2637 | ||
2658 | static noinline_for_stack void | 2638 | /* |
2659 | ext4_mb_free_committed_blocks(struct super_block *sb) | 2639 | * This function is called by the jbd2 layer once the commit has finished, |
2640 | * so we know we can free the blocks that were released with that commit. | ||
2641 | */ | ||
2642 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | ||
2660 | { | 2643 | { |
2661 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2644 | struct super_block *sb = journal->j_private; |
2662 | int err; | ||
2663 | int i; | ||
2664 | int count = 0; | ||
2665 | int count2 = 0; | ||
2666 | struct ext4_free_metadata *md; | ||
2667 | struct ext4_buddy e4b; | 2645 | struct ext4_buddy e4b; |
2646 | struct ext4_group_info *db; | ||
2647 | int err, count = 0, count2 = 0; | ||
2648 | struct ext4_free_data *entry; | ||
2649 | ext4_fsblk_t discard_block; | ||
2650 | struct list_head *l, *ltmp; | ||
2668 | 2651 | ||
2669 | if (list_empty(&sbi->s_committed_transaction)) | 2652 | list_for_each_safe(l, ltmp, &txn->t_private_list) { |
2670 | return; | 2653 | entry = list_entry(l, struct ext4_free_data, list); |
2671 | |||
2672 | /* there is committed blocks to be freed yet */ | ||
2673 | do { | ||
2674 | /* get next array of blocks */ | ||
2675 | md = NULL; | ||
2676 | spin_lock(&sbi->s_md_lock); | ||
2677 | if (!list_empty(&sbi->s_committed_transaction)) { | ||
2678 | md = list_entry(sbi->s_committed_transaction.next, | ||
2679 | struct ext4_free_metadata, list); | ||
2680 | list_del(&md->list); | ||
2681 | } | ||
2682 | spin_unlock(&sbi->s_md_lock); | ||
2683 | |||
2684 | if (md == NULL) | ||
2685 | break; | ||
2686 | 2654 | ||
2687 | mb_debug("gonna free %u blocks in group %lu (0x%p):", | 2655 | mb_debug("gonna free %u blocks in group %lu (0x%p):", |
2688 | md->num, md->group, md); | 2656 | entry->count, entry->group, entry); |
2689 | 2657 | ||
2690 | err = ext4_mb_load_buddy(sb, md->group, &e4b); | 2658 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
2691 | /* we expect to find existing buddy because it's pinned */ | 2659 | /* we expect to find existing buddy because it's pinned */ |
2692 | BUG_ON(err != 0); | 2660 | BUG_ON(err != 0); |
2693 | 2661 | ||
2662 | db = e4b.bd_info; | ||
2694 | /* there are blocks to put in buddy to make them really free */ | 2663 | /* there are blocks to put in buddy to make them really free */ |
2695 | count += md->num; | 2664 | count += entry->count; |
2696 | count2++; | 2665 | count2++; |
2697 | ext4_lock_group(sb, md->group); | 2666 | ext4_lock_group(sb, entry->group); |
2698 | for (i = 0; i < md->num; i++) { | 2667 | /* Take it out of per group rb tree */ |
2699 | mb_debug(" %u", md->blocks[i]); | 2668 | rb_erase(&entry->node, &(db->bb_free_root)); |
2700 | mb_free_blocks(NULL, &e4b, md->blocks[i], 1); | 2669 | mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); |
2670 | |||
2671 | if (!db->bb_free_root.rb_node) { | ||
2672 | /* No more items in the per group rb tree | ||
2673 | * balance refcounts from ext4_mb_free_metadata() | ||
2674 | */ | ||
2675 | page_cache_release(e4b.bd_buddy_page); | ||
2676 | page_cache_release(e4b.bd_bitmap_page); | ||
2701 | } | 2677 | } |
2702 | mb_debug("\n"); | 2678 | ext4_unlock_group(sb, entry->group); |
2703 | ext4_unlock_group(sb, md->group); | 2679 | discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) |
2704 | 2680 | + entry->start_blk | |
2705 | /* balance refcounts from ext4_mb_free_metadata() */ | 2681 | + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
2706 | page_cache_release(e4b.bd_buddy_page); | 2682 | trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id, |
2707 | page_cache_release(e4b.bd_bitmap_page); | 2683 | (unsigned long long) discard_block, entry->count); |
2708 | 2684 | sb_issue_discard(sb, discard_block, entry->count); | |
2709 | kfree(md); | 2685 | |
2686 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
2710 | ext4_mb_release_desc(&e4b); | 2687 | ext4_mb_release_desc(&e4b); |
2711 | 2688 | } | |
2712 | } while (md); | ||
2713 | 2689 | ||
2714 | mb_debug("freed %u blocks in %u structures\n", count, count2); | 2690 | mb_debug("freed %u blocks in %u structures\n", count, count2); |
2715 | } | 2691 | } |
@@ -2721,119 +2697,52 @@ ext4_mb_free_committed_blocks(struct super_block *sb) | |||
2721 | #define EXT4_MB_STREAM_REQ "stream_req" | 2697 | #define EXT4_MB_STREAM_REQ "stream_req" |
2722 | #define EXT4_MB_GROUP_PREALLOC "group_prealloc" | 2698 | #define EXT4_MB_GROUP_PREALLOC "group_prealloc" |
2723 | 2699 | ||
2724 | |||
2725 | |||
2726 | #define MB_PROC_FOPS(name) \ | ||
2727 | static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \ | ||
2728 | { \ | ||
2729 | struct ext4_sb_info *sbi = m->private; \ | ||
2730 | \ | ||
2731 | seq_printf(m, "%ld\n", sbi->s_mb_##name); \ | ||
2732 | return 0; \ | ||
2733 | } \ | ||
2734 | \ | ||
2735 | static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\ | ||
2736 | { \ | ||
2737 | return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\ | ||
2738 | } \ | ||
2739 | \ | ||
2740 | static ssize_t ext4_mb_##name##_proc_write(struct file *file, \ | ||
2741 | const char __user *buf, size_t cnt, loff_t *ppos) \ | ||
2742 | { \ | ||
2743 | struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\ | ||
2744 | char str[32]; \ | ||
2745 | long value; \ | ||
2746 | if (cnt >= sizeof(str)) \ | ||
2747 | return -EINVAL; \ | ||
2748 | if (copy_from_user(str, buf, cnt)) \ | ||
2749 | return -EFAULT; \ | ||
2750 | value = simple_strtol(str, NULL, 0); \ | ||
2751 | if (value <= 0) \ | ||
2752 | return -ERANGE; \ | ||
2753 | sbi->s_mb_##name = value; \ | ||
2754 | return cnt; \ | ||
2755 | } \ | ||
2756 | \ | ||
2757 | static const struct file_operations ext4_mb_##name##_proc_fops = { \ | ||
2758 | .owner = THIS_MODULE, \ | ||
2759 | .open = ext4_mb_##name##_proc_open, \ | ||
2760 | .read = seq_read, \ | ||
2761 | .llseek = seq_lseek, \ | ||
2762 | .release = single_release, \ | ||
2763 | .write = ext4_mb_##name##_proc_write, \ | ||
2764 | }; | ||
2765 | |||
2766 | MB_PROC_FOPS(stats); | ||
2767 | MB_PROC_FOPS(max_to_scan); | ||
2768 | MB_PROC_FOPS(min_to_scan); | ||
2769 | MB_PROC_FOPS(order2_reqs); | ||
2770 | MB_PROC_FOPS(stream_request); | ||
2771 | MB_PROC_FOPS(group_prealloc); | ||
2772 | |||
2773 | #define MB_PROC_HANDLER(name, var) \ | ||
2774 | do { \ | ||
2775 | proc = proc_create_data(name, mode, sbi->s_mb_proc, \ | ||
2776 | &ext4_mb_##var##_proc_fops, sbi); \ | ||
2777 | if (proc == NULL) { \ | ||
2778 | printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \ | ||
2779 | goto err_out; \ | ||
2780 | } \ | ||
2781 | } while (0) | ||
2782 | |||
2783 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) | 2700 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) |
2784 | { | 2701 | { |
2702 | #ifdef CONFIG_PROC_FS | ||
2785 | mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; | 2703 | mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; |
2786 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2704 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2787 | struct proc_dir_entry *proc; | 2705 | struct proc_dir_entry *proc; |
2788 | char devname[64]; | ||
2789 | 2706 | ||
2790 | if (proc_root_ext4 == NULL) { | 2707 | if (sbi->s_proc == NULL) |
2791 | sbi->s_mb_proc = NULL; | ||
2792 | return -EINVAL; | 2708 | return -EINVAL; |
2793 | } | ||
2794 | bdevname(sb->s_bdev, devname); | ||
2795 | sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); | ||
2796 | |||
2797 | MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats); | ||
2798 | MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan); | ||
2799 | MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan); | ||
2800 | MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs); | ||
2801 | MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request); | ||
2802 | MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc); | ||
2803 | 2709 | ||
2710 | EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats); | ||
2711 | EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan); | ||
2712 | EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan); | ||
2713 | EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs); | ||
2714 | EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request); | ||
2715 | EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc); | ||
2804 | return 0; | 2716 | return 0; |
2805 | 2717 | ||
2806 | err_out: | 2718 | err_out: |
2807 | printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname); | 2719 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); |
2808 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); | 2720 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); |
2809 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); | 2721 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); |
2810 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); | 2722 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); |
2811 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); | 2723 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); |
2812 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); | 2724 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); |
2813 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); | ||
2814 | remove_proc_entry(devname, proc_root_ext4); | ||
2815 | sbi->s_mb_proc = NULL; | ||
2816 | |||
2817 | return -ENOMEM; | 2725 | return -ENOMEM; |
2726 | #else | ||
2727 | return 0; | ||
2728 | #endif | ||
2818 | } | 2729 | } |
2819 | 2730 | ||
2820 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) | 2731 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) |
2821 | { | 2732 | { |
2733 | #ifdef CONFIG_PROC_FS | ||
2822 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2734 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2823 | char devname[64]; | ||
2824 | 2735 | ||
2825 | if (sbi->s_mb_proc == NULL) | 2736 | if (sbi->s_proc == NULL) |
2826 | return -EINVAL; | 2737 | return -EINVAL; |
2827 | 2738 | ||
2828 | bdevname(sb->s_bdev, devname); | 2739 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); |
2829 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); | 2740 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); |
2830 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); | 2741 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); |
2831 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); | 2742 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); |
2832 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); | 2743 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); |
2833 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); | 2744 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); |
2834 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); | 2745 | #endif |
2835 | remove_proc_entry(devname, proc_root_ext4); | ||
2836 | |||
2837 | return 0; | 2746 | return 0; |
2838 | } | 2747 | } |
2839 | 2748 | ||
@@ -2854,11 +2763,16 @@ int __init init_ext4_mballoc(void) | |||
2854 | kmem_cache_destroy(ext4_pspace_cachep); | 2763 | kmem_cache_destroy(ext4_pspace_cachep); |
2855 | return -ENOMEM; | 2764 | return -ENOMEM; |
2856 | } | 2765 | } |
2857 | #ifdef CONFIG_PROC_FS | 2766 | |
2858 | proc_root_ext4 = proc_mkdir("fs/ext4", NULL); | 2767 | ext4_free_ext_cachep = |
2859 | if (proc_root_ext4 == NULL) | 2768 | kmem_cache_create("ext4_free_block_extents", |
2860 | printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n"); | 2769 | sizeof(struct ext4_free_data), |
2861 | #endif | 2770 | 0, SLAB_RECLAIM_ACCOUNT, NULL); |
2771 | if (ext4_free_ext_cachep == NULL) { | ||
2772 | kmem_cache_destroy(ext4_pspace_cachep); | ||
2773 | kmem_cache_destroy(ext4_ac_cachep); | ||
2774 | return -ENOMEM; | ||
2775 | } | ||
2862 | return 0; | 2776 | return 0; |
2863 | } | 2777 | } |
2864 | 2778 | ||
@@ -2867,9 +2781,7 @@ void exit_ext4_mballoc(void) | |||
2867 | /* XXX: synchronize_rcu(); */ | 2781 | /* XXX: synchronize_rcu(); */ |
2868 | kmem_cache_destroy(ext4_pspace_cachep); | 2782 | kmem_cache_destroy(ext4_pspace_cachep); |
2869 | kmem_cache_destroy(ext4_ac_cachep); | 2783 | kmem_cache_destroy(ext4_ac_cachep); |
2870 | #ifdef CONFIG_PROC_FS | 2784 | kmem_cache_destroy(ext4_free_ext_cachep); |
2871 | remove_proc_entry("fs/ext4", NULL); | ||
2872 | #endif | ||
2873 | } | 2785 | } |
2874 | 2786 | ||
2875 | 2787 | ||
@@ -2879,7 +2791,7 @@ void exit_ext4_mballoc(void) | |||
2879 | */ | 2791 | */ |
2880 | static noinline_for_stack int | 2792 | static noinline_for_stack int |
2881 | ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | 2793 | ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, |
2882 | handle_t *handle) | 2794 | handle_t *handle, unsigned long reserv_blks) |
2883 | { | 2795 | { |
2884 | struct buffer_head *bitmap_bh = NULL; | 2796 | struct buffer_head *bitmap_bh = NULL; |
2885 | struct ext4_super_block *es; | 2797 | struct ext4_super_block *es; |
@@ -2968,15 +2880,16 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2968 | le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); | 2880 | le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); |
2969 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); | 2881 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); |
2970 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); | 2882 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); |
2971 | 2883 | percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); | |
2972 | /* | 2884 | /* |
2973 | * free blocks account has already be reduced/reserved | 2885 | * Now reduce the dirty block count also. Should not go negative |
2974 | * at write_begin() time for delayed allocation | ||
2975 | * do not double accounting | ||
2976 | */ | 2886 | */ |
2977 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) | 2887 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) |
2978 | percpu_counter_sub(&sbi->s_freeblocks_counter, | 2888 | /* release all the reserved blocks if non delalloc */ |
2979 | ac->ac_b_ex.fe_len); | 2889 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); |
2890 | else | ||
2891 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | ||
2892 | ac->ac_b_ex.fe_len); | ||
2980 | 2893 | ||
2981 | if (sbi->s_log_groups_per_flex) { | 2894 | if (sbi->s_log_groups_per_flex) { |
2982 | ext4_group_t flex_group = ext4_flex_group(sbi, | 2895 | ext4_group_t flex_group = ext4_flex_group(sbi, |
@@ -3282,6 +3195,35 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, | |||
3282 | } | 3195 | } |
3283 | 3196 | ||
3284 | /* | 3197 | /* |
3198 | * Return the prealloc space that have minimal distance | ||
3199 | * from the goal block. @cpa is the prealloc | ||
3200 | * space that is having currently known minimal distance | ||
3201 | * from the goal block. | ||
3202 | */ | ||
3203 | static struct ext4_prealloc_space * | ||
3204 | ext4_mb_check_group_pa(ext4_fsblk_t goal_block, | ||
3205 | struct ext4_prealloc_space *pa, | ||
3206 | struct ext4_prealloc_space *cpa) | ||
3207 | { | ||
3208 | ext4_fsblk_t cur_distance, new_distance; | ||
3209 | |||
3210 | if (cpa == NULL) { | ||
3211 | atomic_inc(&pa->pa_count); | ||
3212 | return pa; | ||
3213 | } | ||
3214 | cur_distance = abs(goal_block - cpa->pa_pstart); | ||
3215 | new_distance = abs(goal_block - pa->pa_pstart); | ||
3216 | |||
3217 | if (cur_distance < new_distance) | ||
3218 | return cpa; | ||
3219 | |||
3220 | /* drop the previous reference */ | ||
3221 | atomic_dec(&cpa->pa_count); | ||
3222 | atomic_inc(&pa->pa_count); | ||
3223 | return pa; | ||
3224 | } | ||
3225 | |||
3226 | /* | ||
3285 | * search goal blocks in preallocated space | 3227 | * search goal blocks in preallocated space |
3286 | */ | 3228 | */ |
3287 | static noinline_for_stack int | 3229 | static noinline_for_stack int |
@@ -3290,7 +3232,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3290 | int order, i; | 3232 | int order, i; |
3291 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); | 3233 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); |
3292 | struct ext4_locality_group *lg; | 3234 | struct ext4_locality_group *lg; |
3293 | struct ext4_prealloc_space *pa; | 3235 | struct ext4_prealloc_space *pa, *cpa = NULL; |
3236 | ext4_fsblk_t goal_block; | ||
3294 | 3237 | ||
3295 | /* only data can be preallocated */ | 3238 | /* only data can be preallocated */ |
3296 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) | 3239 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) |
@@ -3333,6 +3276,13 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3333 | /* The max size of hash table is PREALLOC_TB_SIZE */ | 3276 | /* The max size of hash table is PREALLOC_TB_SIZE */ |
3334 | order = PREALLOC_TB_SIZE - 1; | 3277 | order = PREALLOC_TB_SIZE - 1; |
3335 | 3278 | ||
3279 | goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) + | ||
3280 | ac->ac_g_ex.fe_start + | ||
3281 | le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block); | ||
3282 | /* | ||
3283 | * search for the prealloc space that is having | ||
3284 | * minimal distance from the goal block. | ||
3285 | */ | ||
3336 | for (i = order; i < PREALLOC_TB_SIZE; i++) { | 3286 | for (i = order; i < PREALLOC_TB_SIZE; i++) { |
3337 | rcu_read_lock(); | 3287 | rcu_read_lock(); |
3338 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], | 3288 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], |
@@ -3340,17 +3290,19 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3340 | spin_lock(&pa->pa_lock); | 3290 | spin_lock(&pa->pa_lock); |
3341 | if (pa->pa_deleted == 0 && | 3291 | if (pa->pa_deleted == 0 && |
3342 | pa->pa_free >= ac->ac_o_ex.fe_len) { | 3292 | pa->pa_free >= ac->ac_o_ex.fe_len) { |
3343 | atomic_inc(&pa->pa_count); | 3293 | |
3344 | ext4_mb_use_group_pa(ac, pa); | 3294 | cpa = ext4_mb_check_group_pa(goal_block, |
3345 | spin_unlock(&pa->pa_lock); | 3295 | pa, cpa); |
3346 | ac->ac_criteria = 20; | ||
3347 | rcu_read_unlock(); | ||
3348 | return 1; | ||
3349 | } | 3296 | } |
3350 | spin_unlock(&pa->pa_lock); | 3297 | spin_unlock(&pa->pa_lock); |
3351 | } | 3298 | } |
3352 | rcu_read_unlock(); | 3299 | rcu_read_unlock(); |
3353 | } | 3300 | } |
3301 | if (cpa) { | ||
3302 | ext4_mb_use_group_pa(ac, cpa); | ||
3303 | ac->ac_criteria = 20; | ||
3304 | return 1; | ||
3305 | } | ||
3354 | return 0; | 3306 | return 0; |
3355 | } | 3307 | } |
3356 | 3308 | ||
@@ -3845,7 +3797,7 @@ out: | |||
3845 | * | 3797 | * |
3846 | * FIXME!! Make sure it is valid at all the call sites | 3798 | * FIXME!! Make sure it is valid at all the call sites |
3847 | */ | 3799 | */ |
3848 | void ext4_mb_discard_inode_preallocations(struct inode *inode) | 3800 | void ext4_discard_preallocations(struct inode *inode) |
3849 | { | 3801 | { |
3850 | struct ext4_inode_info *ei = EXT4_I(inode); | 3802 | struct ext4_inode_info *ei = EXT4_I(inode); |
3851 | struct super_block *sb = inode->i_sb; | 3803 | struct super_block *sb = inode->i_sb; |
@@ -3857,7 +3809,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode) | |||
3857 | struct ext4_buddy e4b; | 3809 | struct ext4_buddy e4b; |
3858 | int err; | 3810 | int err; |
3859 | 3811 | ||
3860 | if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) { | 3812 | if (!S_ISREG(inode->i_mode)) { |
3861 | /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ | 3813 | /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ |
3862 | return; | 3814 | return; |
3863 | } | 3815 | } |
@@ -4055,8 +4007,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | |||
4055 | * per cpu locality group is to reduce the contention between block | 4007 | * per cpu locality group is to reduce the contention between block |
4056 | * request from multiple CPUs. | 4008 | * request from multiple CPUs. |
4057 | */ | 4009 | */ |
4058 | ac->ac_lg = &sbi->s_locality_groups[get_cpu()]; | 4010 | ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id()); |
4059 | put_cpu(); | ||
4060 | 4011 | ||
4061 | /* we're going to use group allocation */ | 4012 | /* we're going to use group allocation */ |
4062 | ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; | 4013 | ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; |
@@ -4330,33 +4281,32 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) | |||
4330 | ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | 4281 | ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, |
4331 | struct ext4_allocation_request *ar, int *errp) | 4282 | struct ext4_allocation_request *ar, int *errp) |
4332 | { | 4283 | { |
4284 | int freed; | ||
4333 | struct ext4_allocation_context *ac = NULL; | 4285 | struct ext4_allocation_context *ac = NULL; |
4334 | struct ext4_sb_info *sbi; | 4286 | struct ext4_sb_info *sbi; |
4335 | struct super_block *sb; | 4287 | struct super_block *sb; |
4336 | ext4_fsblk_t block = 0; | 4288 | ext4_fsblk_t block = 0; |
4337 | int freed; | 4289 | unsigned long inquota; |
4338 | int inquota; | 4290 | unsigned long reserv_blks = 0; |
4339 | 4291 | ||
4340 | sb = ar->inode->i_sb; | 4292 | sb = ar->inode->i_sb; |
4341 | sbi = EXT4_SB(sb); | 4293 | sbi = EXT4_SB(sb); |
4342 | 4294 | ||
4343 | if (!test_opt(sb, MBALLOC)) { | ||
4344 | block = ext4_old_new_blocks(handle, ar->inode, ar->goal, | ||
4345 | &(ar->len), errp); | ||
4346 | return block; | ||
4347 | } | ||
4348 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { | 4295 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { |
4349 | /* | 4296 | /* |
4350 | * With delalloc we already reserved the blocks | 4297 | * With delalloc we already reserved the blocks |
4351 | */ | 4298 | */ |
4352 | ar->len = ext4_has_free_blocks(sbi, ar->len); | 4299 | while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) { |
4353 | } | 4300 | /* let others to free the space */ |
4354 | 4301 | yield(); | |
4355 | if (ar->len == 0) { | 4302 | ar->len = ar->len >> 1; |
4356 | *errp = -ENOSPC; | 4303 | } |
4357 | return 0; | 4304 | if (!ar->len) { |
4305 | *errp = -ENOSPC; | ||
4306 | return 0; | ||
4307 | } | ||
4308 | reserv_blks = ar->len; | ||
4358 | } | 4309 | } |
4359 | |||
4360 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { | 4310 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { |
4361 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | 4311 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; |
4362 | ar->len--; | 4312 | ar->len--; |
@@ -4377,8 +4327,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4377 | goto out1; | 4327 | goto out1; |
4378 | } | 4328 | } |
4379 | 4329 | ||
4380 | ext4_mb_poll_new_transaction(sb, handle); | ||
4381 | |||
4382 | *errp = ext4_mb_initialize_context(ac, ar); | 4330 | *errp = ext4_mb_initialize_context(ac, ar); |
4383 | if (*errp) { | 4331 | if (*errp) { |
4384 | ar->len = 0; | 4332 | ar->len = 0; |
@@ -4402,7 +4350,7 @@ repeat: | |||
4402 | } | 4350 | } |
4403 | 4351 | ||
4404 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { | 4352 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { |
4405 | *errp = ext4_mb_mark_diskspace_used(ac, handle); | 4353 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); |
4406 | if (*errp == -EAGAIN) { | 4354 | if (*errp == -EAGAIN) { |
4407 | ac->ac_b_ex.fe_group = 0; | 4355 | ac->ac_b_ex.fe_group = 0; |
4408 | ac->ac_b_ex.fe_start = 0; | 4356 | ac->ac_b_ex.fe_start = 0; |
@@ -4437,35 +4385,20 @@ out1: | |||
4437 | 4385 | ||
4438 | return block; | 4386 | return block; |
4439 | } | 4387 | } |
4440 | static void ext4_mb_poll_new_transaction(struct super_block *sb, | ||
4441 | handle_t *handle) | ||
4442 | { | ||
4443 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
4444 | 4388 | ||
4445 | if (sbi->s_last_transaction == handle->h_transaction->t_tid) | 4389 | /* |
4446 | return; | 4390 | * We can merge two free data extents only if the physical blocks |
4447 | 4391 | * are contiguous, AND the extents were freed by the same transaction, | |
4448 | /* new transaction! time to close last one and free blocks for | 4392 | * AND the blocks are associated with the same group. |
4449 | * committed transaction. we know that only transaction can be | 4393 | */ |
4450 | * active, so previos transaction can be being logged and we | 4394 | static int can_merge(struct ext4_free_data *entry1, |
4451 | * know that transaction before previous is known to be already | 4395 | struct ext4_free_data *entry2) |
4452 | * logged. this means that now we may free blocks freed in all | 4396 | { |
4453 | * transactions before previous one. hope I'm clear enough ... */ | 4397 | if ((entry1->t_tid == entry2->t_tid) && |
4454 | 4398 | (entry1->group == entry2->group) && | |
4455 | spin_lock(&sbi->s_md_lock); | 4399 | ((entry1->start_blk + entry1->count) == entry2->start_blk)) |
4456 | if (sbi->s_last_transaction != handle->h_transaction->t_tid) { | 4400 | return 1; |
4457 | mb_debug("new transaction %lu, old %lu\n", | 4401 | return 0; |
4458 | (unsigned long) handle->h_transaction->t_tid, | ||
4459 | (unsigned long) sbi->s_last_transaction); | ||
4460 | list_splice_init(&sbi->s_closed_transaction, | ||
4461 | &sbi->s_committed_transaction); | ||
4462 | list_splice_init(&sbi->s_active_transaction, | ||
4463 | &sbi->s_closed_transaction); | ||
4464 | sbi->s_last_transaction = handle->h_transaction->t_tid; | ||
4465 | } | ||
4466 | spin_unlock(&sbi->s_md_lock); | ||
4467 | |||
4468 | ext4_mb_free_committed_blocks(sb); | ||
4469 | } | 4402 | } |
4470 | 4403 | ||
4471 | static noinline_for_stack int | 4404 | static noinline_for_stack int |
@@ -4475,57 +4408,80 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4475 | struct ext4_group_info *db = e4b->bd_info; | 4408 | struct ext4_group_info *db = e4b->bd_info; |
4476 | struct super_block *sb = e4b->bd_sb; | 4409 | struct super_block *sb = e4b->bd_sb; |
4477 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4410 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
4478 | struct ext4_free_metadata *md; | 4411 | struct ext4_free_data *entry, *new_entry; |
4479 | int i; | 4412 | struct rb_node **n = &db->bb_free_root.rb_node, *node; |
4413 | struct rb_node *parent = NULL, *new_node; | ||
4414 | |||
4480 | 4415 | ||
4481 | BUG_ON(e4b->bd_bitmap_page == NULL); | 4416 | BUG_ON(e4b->bd_bitmap_page == NULL); |
4482 | BUG_ON(e4b->bd_buddy_page == NULL); | 4417 | BUG_ON(e4b->bd_buddy_page == NULL); |
4483 | 4418 | ||
4419 | new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); | ||
4420 | new_entry->start_blk = block; | ||
4421 | new_entry->group = group; | ||
4422 | new_entry->count = count; | ||
4423 | new_entry->t_tid = handle->h_transaction->t_tid; | ||
4424 | new_node = &new_entry->node; | ||
4425 | |||
4484 | ext4_lock_group(sb, group); | 4426 | ext4_lock_group(sb, group); |
4485 | for (i = 0; i < count; i++) { | 4427 | if (!*n) { |
4486 | md = db->bb_md_cur; | 4428 | /* first free block exent. We need to |
4487 | if (md && db->bb_tid != handle->h_transaction->t_tid) { | 4429 | protect buddy cache from being freed, |
4488 | db->bb_md_cur = NULL; | 4430 | * otherwise we'll refresh it from |
4489 | md = NULL; | 4431 | * on-disk bitmap and lose not-yet-available |
4432 | * blocks */ | ||
4433 | page_cache_get(e4b->bd_buddy_page); | ||
4434 | page_cache_get(e4b->bd_bitmap_page); | ||
4435 | } | ||
4436 | while (*n) { | ||
4437 | parent = *n; | ||
4438 | entry = rb_entry(parent, struct ext4_free_data, node); | ||
4439 | if (block < entry->start_blk) | ||
4440 | n = &(*n)->rb_left; | ||
4441 | else if (block >= (entry->start_blk + entry->count)) | ||
4442 | n = &(*n)->rb_right; | ||
4443 | else { | ||
4444 | ext4_error(sb, __func__, | ||
4445 | "Double free of blocks %d (%d %d)\n", | ||
4446 | block, entry->start_blk, entry->count); | ||
4447 | return 0; | ||
4490 | } | 4448 | } |
4449 | } | ||
4491 | 4450 | ||
4492 | if (md == NULL) { | 4451 | rb_link_node(new_node, parent, n); |
4493 | ext4_unlock_group(sb, group); | 4452 | rb_insert_color(new_node, &db->bb_free_root); |
4494 | md = kmalloc(sizeof(*md), GFP_NOFS); | 4453 | |
4495 | if (md == NULL) | 4454 | /* Now try to see the extent can be merged to left and right */ |
4496 | return -ENOMEM; | 4455 | node = rb_prev(new_node); |
4497 | md->num = 0; | 4456 | if (node) { |
4498 | md->group = group; | 4457 | entry = rb_entry(node, struct ext4_free_data, node); |
4499 | 4458 | if (can_merge(entry, new_entry)) { | |
4500 | ext4_lock_group(sb, group); | 4459 | new_entry->start_blk = entry->start_blk; |
4501 | if (db->bb_md_cur == NULL) { | 4460 | new_entry->count += entry->count; |
4502 | spin_lock(&sbi->s_md_lock); | 4461 | rb_erase(node, &(db->bb_free_root)); |
4503 | list_add(&md->list, &sbi->s_active_transaction); | 4462 | spin_lock(&sbi->s_md_lock); |
4504 | spin_unlock(&sbi->s_md_lock); | 4463 | list_del(&entry->list); |
4505 | /* protect buddy cache from being freed, | 4464 | spin_unlock(&sbi->s_md_lock); |
4506 | * otherwise we'll refresh it from | 4465 | kmem_cache_free(ext4_free_ext_cachep, entry); |
4507 | * on-disk bitmap and lose not-yet-available | ||
4508 | * blocks */ | ||
4509 | page_cache_get(e4b->bd_buddy_page); | ||
4510 | page_cache_get(e4b->bd_bitmap_page); | ||
4511 | db->bb_md_cur = md; | ||
4512 | db->bb_tid = handle->h_transaction->t_tid; | ||
4513 | mb_debug("new md 0x%p for group %lu\n", | ||
4514 | md, md->group); | ||
4515 | } else { | ||
4516 | kfree(md); | ||
4517 | md = db->bb_md_cur; | ||
4518 | } | ||
4519 | } | 4466 | } |
4467 | } | ||
4520 | 4468 | ||
4521 | BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS); | 4469 | node = rb_next(new_node); |
4522 | md->blocks[md->num] = block + i; | 4470 | if (node) { |
4523 | md->num++; | 4471 | entry = rb_entry(node, struct ext4_free_data, node); |
4524 | if (md->num == EXT4_BB_MAX_BLOCKS) { | 4472 | if (can_merge(new_entry, entry)) { |
4525 | /* no more space, put full container on a sb's list */ | 4473 | new_entry->count += entry->count; |
4526 | db->bb_md_cur = NULL; | 4474 | rb_erase(node, &(db->bb_free_root)); |
4475 | spin_lock(&sbi->s_md_lock); | ||
4476 | list_del(&entry->list); | ||
4477 | spin_unlock(&sbi->s_md_lock); | ||
4478 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
4527 | } | 4479 | } |
4528 | } | 4480 | } |
4481 | /* Add the extent to transaction's private list */ | ||
4482 | spin_lock(&sbi->s_md_lock); | ||
4483 | list_add(&new_entry->list, &handle->h_transaction->t_private_list); | ||
4484 | spin_unlock(&sbi->s_md_lock); | ||
4529 | ext4_unlock_group(sb, group); | 4485 | ext4_unlock_group(sb, group); |
4530 | return 0; | 4486 | return 0; |
4531 | } | 4487 | } |
@@ -4553,8 +4509,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, | |||
4553 | 4509 | ||
4554 | *freed = 0; | 4510 | *freed = 0; |
4555 | 4511 | ||
4556 | ext4_mb_poll_new_transaction(sb, handle); | ||
4557 | |||
4558 | sbi = EXT4_SB(sb); | 4512 | sbi = EXT4_SB(sb); |
4559 | es = EXT4_SB(sb)->s_es; | 4513 | es = EXT4_SB(sb)->s_es; |
4560 | if (block < le32_to_cpu(es->s_first_data_block) || | 4514 | if (block < le32_to_cpu(es->s_first_data_block) || |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index c7c9906c2a75..b5dff1fff1e5 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <linux/pagemap.h> | 18 | #include <linux/pagemap.h> |
19 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
20 | #include <linux/version.h> | 20 | #include <linux/version.h> |
21 | #include <linux/blkdev.h> | ||
22 | #include <linux/marker.h> | ||
21 | #include "ext4_jbd2.h" | 23 | #include "ext4_jbd2.h" |
22 | #include "ext4.h" | 24 | #include "ext4.h" |
23 | #include "group.h" | 25 | #include "group.h" |
@@ -98,23 +100,29 @@ | |||
98 | 100 | ||
99 | static struct kmem_cache *ext4_pspace_cachep; | 101 | static struct kmem_cache *ext4_pspace_cachep; |
100 | static struct kmem_cache *ext4_ac_cachep; | 102 | static struct kmem_cache *ext4_ac_cachep; |
103 | static struct kmem_cache *ext4_free_ext_cachep; | ||
101 | 104 | ||
102 | #ifdef EXT4_BB_MAX_BLOCKS | 105 | struct ext4_free_data { |
103 | #undef EXT4_BB_MAX_BLOCKS | 106 | /* this links the free block information from group_info */ |
104 | #endif | 107 | struct rb_node node; |
105 | #define EXT4_BB_MAX_BLOCKS 30 | ||
106 | 108 | ||
107 | struct ext4_free_metadata { | 109 | /* this links the free block information from ext4_sb_info */ |
108 | ext4_group_t group; | ||
109 | unsigned short num; | ||
110 | ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS]; | ||
111 | struct list_head list; | 110 | struct list_head list; |
111 | |||
112 | /* group which free block extent belongs */ | ||
113 | ext4_group_t group; | ||
114 | |||
115 | /* free block extent */ | ||
116 | ext4_grpblk_t start_blk; | ||
117 | ext4_grpblk_t count; | ||
118 | |||
119 | /* transaction which freed this extent */ | ||
120 | tid_t t_tid; | ||
112 | }; | 121 | }; |
113 | 122 | ||
114 | struct ext4_group_info { | 123 | struct ext4_group_info { |
115 | unsigned long bb_state; | 124 | unsigned long bb_state; |
116 | unsigned long bb_tid; | 125 | struct rb_root bb_free_root; |
117 | struct ext4_free_metadata *bb_md_cur; | ||
118 | unsigned short bb_first_free; | 126 | unsigned short bb_first_free; |
119 | unsigned short bb_free; | 127 | unsigned short bb_free; |
120 | unsigned short bb_fragments; | 128 | unsigned short bb_fragments; |
@@ -257,13 +265,10 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac); | |||
257 | 265 | ||
258 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | 266 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) |
259 | 267 | ||
260 | static struct proc_dir_entry *proc_root_ext4; | ||
261 | struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); | 268 | struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); |
262 | 269 | ||
263 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | 270 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
264 | ext4_group_t group); | 271 | ext4_group_t group); |
265 | static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *); | ||
266 | static void ext4_mb_free_committed_blocks(struct super_block *); | ||
267 | static void ext4_mb_return_to_preallocation(struct inode *inode, | 272 | static void ext4_mb_return_to_preallocation(struct inode *inode, |
268 | struct ext4_buddy *e4b, sector_t block, | 273 | struct ext4_buddy *e4b, sector_t block, |
269 | int count); | 274 | int count); |
@@ -271,6 +276,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *, | |||
271 | struct super_block *, struct ext4_prealloc_space *pa); | 276 | struct super_block *, struct ext4_prealloc_space *pa); |
272 | static int ext4_mb_init_per_dev_proc(struct super_block *sb); | 277 | static int ext4_mb_init_per_dev_proc(struct super_block *sb); |
273 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); | 278 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); |
279 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); | ||
274 | 280 | ||
275 | 281 | ||
276 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) | 282 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index b9e077ba07e9..f2a9cf498ecd 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -53,7 +53,8 @@ static int finish_range(handle_t *handle, struct inode *inode, | |||
53 | * credit. But below we try to not accumalate too much | 53 | * credit. But below we try to not accumalate too much |
54 | * of them by restarting the journal. | 54 | * of them by restarting the journal. |
55 | */ | 55 | */ |
56 | needed = ext4_ext_calc_credits_for_insert(inode, path); | 56 | needed = ext4_ext_calc_credits_for_single_extent(inode, |
57 | lb->last_block - lb->first_block + 1, path); | ||
57 | 58 | ||
58 | /* | 59 | /* |
59 | * Make sure the credit we accumalated is not really high | 60 | * Make sure the credit we accumalated is not really high |
@@ -446,8 +447,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode) | |||
446 | 447 | ||
447 | } | 448 | } |
448 | 449 | ||
449 | int ext4_ext_migrate(struct inode *inode, struct file *filp, | 450 | int ext4_ext_migrate(struct inode *inode) |
450 | unsigned int cmd, unsigned long arg) | ||
451 | { | 451 | { |
452 | handle_t *handle; | 452 | handle_t *handle; |
453 | int retval = 0, i; | 453 | int retval = 0, i; |
@@ -515,12 +515,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, | |||
515 | * when we add extents we extent the journal | 515 | * when we add extents we extent the journal |
516 | */ | 516 | */ |
517 | /* | 517 | /* |
518 | * inode_mutex prevent write and truncate on the file. Read still goes | ||
519 | * through. We take i_data_sem in ext4_ext_swap_inode_data before we | ||
520 | * switch the inode format to prevent read. | ||
521 | */ | ||
522 | mutex_lock(&(inode->i_mutex)); | ||
523 | /* | ||
524 | * Even though we take i_mutex we can still cause block allocation | 518 | * Even though we take i_mutex we can still cause block allocation |
525 | * via mmap write to holes. If we have allocated new blocks we fail | 519 | * via mmap write to holes. If we have allocated new blocks we fail |
526 | * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. | 520 | * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. |
@@ -622,7 +616,6 @@ err_out: | |||
622 | tmp_inode->i_nlink = 0; | 616 | tmp_inode->i_nlink = 0; |
623 | 617 | ||
624 | ext4_journal_stop(handle); | 618 | ext4_journal_stop(handle); |
625 | mutex_unlock(&(inode->i_mutex)); | ||
626 | 619 | ||
627 | if (tmp_inode) | 620 | if (tmp_inode) |
628 | iput(tmp_inode); | 621 | iput(tmp_inode); |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 387ad98350c3..92db9e945147 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -151,34 +151,36 @@ struct dx_map_entry | |||
151 | 151 | ||
152 | static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); | 152 | static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); |
153 | static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); | 153 | static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); |
154 | static inline unsigned dx_get_hash (struct dx_entry *entry); | 154 | static inline unsigned dx_get_hash(struct dx_entry *entry); |
155 | static void dx_set_hash (struct dx_entry *entry, unsigned value); | 155 | static void dx_set_hash(struct dx_entry *entry, unsigned value); |
156 | static unsigned dx_get_count (struct dx_entry *entries); | 156 | static unsigned dx_get_count(struct dx_entry *entries); |
157 | static unsigned dx_get_limit (struct dx_entry *entries); | 157 | static unsigned dx_get_limit(struct dx_entry *entries); |
158 | static void dx_set_count (struct dx_entry *entries, unsigned value); | 158 | static void dx_set_count(struct dx_entry *entries, unsigned value); |
159 | static void dx_set_limit (struct dx_entry *entries, unsigned value); | 159 | static void dx_set_limit(struct dx_entry *entries, unsigned value); |
160 | static unsigned dx_root_limit (struct inode *dir, unsigned infosize); | 160 | static unsigned dx_root_limit(struct inode *dir, unsigned infosize); |
161 | static unsigned dx_node_limit (struct inode *dir); | 161 | static unsigned dx_node_limit(struct inode *dir); |
162 | static struct dx_frame *dx_probe(struct dentry *dentry, | 162 | static struct dx_frame *dx_probe(const struct qstr *d_name, |
163 | struct inode *dir, | 163 | struct inode *dir, |
164 | struct dx_hash_info *hinfo, | 164 | struct dx_hash_info *hinfo, |
165 | struct dx_frame *frame, | 165 | struct dx_frame *frame, |
166 | int *err); | 166 | int *err); |
167 | static void dx_release (struct dx_frame *frames); | 167 | static void dx_release(struct dx_frame *frames); |
168 | static int dx_make_map (struct ext4_dir_entry_2 *de, int size, | 168 | static int dx_make_map(struct ext4_dir_entry_2 *de, int size, |
169 | struct dx_hash_info *hinfo, struct dx_map_entry map[]); | 169 | struct dx_hash_info *hinfo, struct dx_map_entry map[]); |
170 | static void dx_sort_map(struct dx_map_entry *map, unsigned count); | 170 | static void dx_sort_map(struct dx_map_entry *map, unsigned count); |
171 | static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to, | 171 | static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to, |
172 | struct dx_map_entry *offsets, int count); | 172 | struct dx_map_entry *offsets, int count); |
173 | static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size); | 173 | static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size); |
174 | static void dx_insert_block(struct dx_frame *frame, | 174 | static void dx_insert_block(struct dx_frame *frame, |
175 | u32 hash, ext4_lblk_t block); | 175 | u32 hash, ext4_lblk_t block); |
176 | static int ext4_htree_next_block(struct inode *dir, __u32 hash, | 176 | static int ext4_htree_next_block(struct inode *dir, __u32 hash, |
177 | struct dx_frame *frame, | 177 | struct dx_frame *frame, |
178 | struct dx_frame *frames, | 178 | struct dx_frame *frames, |
179 | __u32 *start_hash); | 179 | __u32 *start_hash); |
180 | static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | 180 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, |
181 | struct ext4_dir_entry_2 **res_dir, int *err); | 181 | const struct qstr *d_name, |
182 | struct ext4_dir_entry_2 **res_dir, | ||
183 | int *err); | ||
182 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | 184 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, |
183 | struct inode *inode); | 185 | struct inode *inode); |
184 | 186 | ||
@@ -207,44 +209,44 @@ static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value) | |||
207 | entry->block = cpu_to_le32(value); | 209 | entry->block = cpu_to_le32(value); |
208 | } | 210 | } |
209 | 211 | ||
210 | static inline unsigned dx_get_hash (struct dx_entry *entry) | 212 | static inline unsigned dx_get_hash(struct dx_entry *entry) |
211 | { | 213 | { |
212 | return le32_to_cpu(entry->hash); | 214 | return le32_to_cpu(entry->hash); |
213 | } | 215 | } |
214 | 216 | ||
215 | static inline void dx_set_hash (struct dx_entry *entry, unsigned value) | 217 | static inline void dx_set_hash(struct dx_entry *entry, unsigned value) |
216 | { | 218 | { |
217 | entry->hash = cpu_to_le32(value); | 219 | entry->hash = cpu_to_le32(value); |
218 | } | 220 | } |
219 | 221 | ||
220 | static inline unsigned dx_get_count (struct dx_entry *entries) | 222 | static inline unsigned dx_get_count(struct dx_entry *entries) |
221 | { | 223 | { |
222 | return le16_to_cpu(((struct dx_countlimit *) entries)->count); | 224 | return le16_to_cpu(((struct dx_countlimit *) entries)->count); |
223 | } | 225 | } |
224 | 226 | ||
225 | static inline unsigned dx_get_limit (struct dx_entry *entries) | 227 | static inline unsigned dx_get_limit(struct dx_entry *entries) |
226 | { | 228 | { |
227 | return le16_to_cpu(((struct dx_countlimit *) entries)->limit); | 229 | return le16_to_cpu(((struct dx_countlimit *) entries)->limit); |
228 | } | 230 | } |
229 | 231 | ||
230 | static inline void dx_set_count (struct dx_entry *entries, unsigned value) | 232 | static inline void dx_set_count(struct dx_entry *entries, unsigned value) |
231 | { | 233 | { |
232 | ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); | 234 | ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); |
233 | } | 235 | } |
234 | 236 | ||
235 | static inline void dx_set_limit (struct dx_entry *entries, unsigned value) | 237 | static inline void dx_set_limit(struct dx_entry *entries, unsigned value) |
236 | { | 238 | { |
237 | ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); | 239 | ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); |
238 | } | 240 | } |
239 | 241 | ||
240 | static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) | 242 | static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) |
241 | { | 243 | { |
242 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - | 244 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - |
243 | EXT4_DIR_REC_LEN(2) - infosize; | 245 | EXT4_DIR_REC_LEN(2) - infosize; |
244 | return entry_space / sizeof(struct dx_entry); | 246 | return entry_space / sizeof(struct dx_entry); |
245 | } | 247 | } |
246 | 248 | ||
247 | static inline unsigned dx_node_limit (struct inode *dir) | 249 | static inline unsigned dx_node_limit(struct inode *dir) |
248 | { | 250 | { |
249 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); | 251 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); |
250 | return entry_space / sizeof(struct dx_entry); | 252 | return entry_space / sizeof(struct dx_entry); |
@@ -254,12 +256,12 @@ static inline unsigned dx_node_limit (struct inode *dir) | |||
254 | * Debug | 256 | * Debug |
255 | */ | 257 | */ |
256 | #ifdef DX_DEBUG | 258 | #ifdef DX_DEBUG |
257 | static void dx_show_index (char * label, struct dx_entry *entries) | 259 | static void dx_show_index(char * label, struct dx_entry *entries) |
258 | { | 260 | { |
259 | int i, n = dx_get_count (entries); | 261 | int i, n = dx_get_count (entries); |
260 | printk("%s index ", label); | 262 | printk(KERN_DEBUG "%s index ", label); |
261 | for (i = 0; i < n; i++) { | 263 | for (i = 0; i < n; i++) { |
262 | printk("%x->%lu ", i? dx_get_hash(entries + i) : | 264 | printk("%x->%lu ", i ? dx_get_hash(entries + i) : |
263 | 0, (unsigned long)dx_get_block(entries + i)); | 265 | 0, (unsigned long)dx_get_block(entries + i)); |
264 | } | 266 | } |
265 | printk("\n"); | 267 | printk("\n"); |
@@ -306,7 +308,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
306 | struct dx_entry *entries, int levels) | 308 | struct dx_entry *entries, int levels) |
307 | { | 309 | { |
308 | unsigned blocksize = dir->i_sb->s_blocksize; | 310 | unsigned blocksize = dir->i_sb->s_blocksize; |
309 | unsigned count = dx_get_count (entries), names = 0, space = 0, i; | 311 | unsigned count = dx_get_count(entries), names = 0, space = 0, i; |
310 | unsigned bcount = 0; | 312 | unsigned bcount = 0; |
311 | struct buffer_head *bh; | 313 | struct buffer_head *bh; |
312 | int err; | 314 | int err; |
@@ -325,11 +327,12 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
325 | names += stats.names; | 327 | names += stats.names; |
326 | space += stats.space; | 328 | space += stats.space; |
327 | bcount += stats.bcount; | 329 | bcount += stats.bcount; |
328 | brelse (bh); | 330 | brelse(bh); |
329 | } | 331 | } |
330 | if (bcount) | 332 | if (bcount) |
331 | printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", | 333 | printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", |
332 | names, space/bcount,(space/bcount)*100/blocksize); | 334 | levels ? "" : " ", names, space/bcount, |
335 | (space/bcount)*100/blocksize); | ||
333 | return (struct stats) { names, space, bcount}; | 336 | return (struct stats) { names, space, bcount}; |
334 | } | 337 | } |
335 | #endif /* DX_DEBUG */ | 338 | #endif /* DX_DEBUG */ |
@@ -344,7 +347,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
344 | * back to userspace. | 347 | * back to userspace. |
345 | */ | 348 | */ |
346 | static struct dx_frame * | 349 | static struct dx_frame * |
347 | dx_probe(struct dentry *dentry, struct inode *dir, | 350 | dx_probe(const struct qstr *d_name, struct inode *dir, |
348 | struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) | 351 | struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) |
349 | { | 352 | { |
350 | unsigned count, indirect; | 353 | unsigned count, indirect; |
@@ -355,8 +358,6 @@ dx_probe(struct dentry *dentry, struct inode *dir, | |||
355 | u32 hash; | 358 | u32 hash; |
356 | 359 | ||
357 | frame->bh = NULL; | 360 | frame->bh = NULL; |
358 | if (dentry) | ||
359 | dir = dentry->d_parent->d_inode; | ||
360 | if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) | 361 | if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) |
361 | goto fail; | 362 | goto fail; |
362 | root = (struct dx_root *) bh->b_data; | 363 | root = (struct dx_root *) bh->b_data; |
@@ -372,8 +373,8 @@ dx_probe(struct dentry *dentry, struct inode *dir, | |||
372 | } | 373 | } |
373 | hinfo->hash_version = root->info.hash_version; | 374 | hinfo->hash_version = root->info.hash_version; |
374 | hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; | 375 | hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; |
375 | if (dentry) | 376 | if (d_name) |
376 | ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); | 377 | ext4fs_dirhash(d_name->name, d_name->len, hinfo); |
377 | hash = hinfo->hash; | 378 | hash = hinfo->hash; |
378 | 379 | ||
379 | if (root->info.unused_flags & 1) { | 380 | if (root->info.unused_flags & 1) { |
@@ -406,7 +407,7 @@ dx_probe(struct dentry *dentry, struct inode *dir, | |||
406 | goto fail; | 407 | goto fail; |
407 | } | 408 | } |
408 | 409 | ||
409 | dxtrace (printk("Look up %x", hash)); | 410 | dxtrace(printk("Look up %x", hash)); |
410 | while (1) | 411 | while (1) |
411 | { | 412 | { |
412 | count = dx_get_count(entries); | 413 | count = dx_get_count(entries); |
@@ -555,7 +556,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, | |||
555 | 0, &err))) | 556 | 0, &err))) |
556 | return err; /* Failure */ | 557 | return err; /* Failure */ |
557 | p++; | 558 | p++; |
558 | brelse (p->bh); | 559 | brelse(p->bh); |
559 | p->bh = bh; | 560 | p->bh = bh; |
560 | p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; | 561 | p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; |
561 | } | 562 | } |
@@ -593,7 +594,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
593 | /* On error, skip the f_pos to the next block. */ | 594 | /* On error, skip the f_pos to the next block. */ |
594 | dir_file->f_pos = (dir_file->f_pos | | 595 | dir_file->f_pos = (dir_file->f_pos | |
595 | (dir->i_sb->s_blocksize - 1)) + 1; | 596 | (dir->i_sb->s_blocksize - 1)) + 1; |
596 | brelse (bh); | 597 | brelse(bh); |
597 | return count; | 598 | return count; |
598 | } | 599 | } |
599 | ext4fs_dirhash(de->name, de->name_len, hinfo); | 600 | ext4fs_dirhash(de->name, de->name_len, hinfo); |
@@ -635,8 +636,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
635 | int ret, err; | 636 | int ret, err; |
636 | __u32 hashval; | 637 | __u32 hashval; |
637 | 638 | ||
638 | dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, | 639 | dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", |
639 | start_minor_hash)); | 640 | start_hash, start_minor_hash)); |
640 | dir = dir_file->f_path.dentry->d_inode; | 641 | dir = dir_file->f_path.dentry->d_inode; |
641 | if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { | 642 | if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { |
642 | hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; | 643 | hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; |
@@ -648,7 +649,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
648 | } | 649 | } |
649 | hinfo.hash = start_hash; | 650 | hinfo.hash = start_hash; |
650 | hinfo.minor_hash = 0; | 651 | hinfo.minor_hash = 0; |
651 | frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err); | 652 | frame = dx_probe(NULL, dir, &hinfo, frames, &err); |
652 | if (!frame) | 653 | if (!frame) |
653 | return err; | 654 | return err; |
654 | 655 | ||
@@ -694,8 +695,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
694 | break; | 695 | break; |
695 | } | 696 | } |
696 | dx_release(frames); | 697 | dx_release(frames); |
697 | dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", | 698 | dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, " |
698 | count, *next_hash)); | 699 | "next hash: %x\n", count, *next_hash)); |
699 | return count; | 700 | return count; |
700 | errout: | 701 | errout: |
701 | dx_release(frames); | 702 | dx_release(frames); |
@@ -802,17 +803,17 @@ static inline int ext4_match (int len, const char * const name, | |||
802 | /* | 803 | /* |
803 | * Returns 0 if not found, -1 on failure, and 1 on success | 804 | * Returns 0 if not found, -1 on failure, and 1 on success |
804 | */ | 805 | */ |
805 | static inline int search_dirblock(struct buffer_head * bh, | 806 | static inline int search_dirblock(struct buffer_head *bh, |
806 | struct inode *dir, | 807 | struct inode *dir, |
807 | struct dentry *dentry, | 808 | const struct qstr *d_name, |
808 | unsigned long offset, | 809 | unsigned long offset, |
809 | struct ext4_dir_entry_2 ** res_dir) | 810 | struct ext4_dir_entry_2 ** res_dir) |
810 | { | 811 | { |
811 | struct ext4_dir_entry_2 * de; | 812 | struct ext4_dir_entry_2 * de; |
812 | char * dlimit; | 813 | char * dlimit; |
813 | int de_len; | 814 | int de_len; |
814 | const char *name = dentry->d_name.name; | 815 | const char *name = d_name->name; |
815 | int namelen = dentry->d_name.len; | 816 | int namelen = d_name->len; |
816 | 817 | ||
817 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 818 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
818 | dlimit = bh->b_data + dir->i_sb->s_blocksize; | 819 | dlimit = bh->b_data + dir->i_sb->s_blocksize; |
@@ -851,12 +852,13 @@ static inline int search_dirblock(struct buffer_head * bh, | |||
851 | * The returned buffer_head has ->b_count elevated. The caller is expected | 852 | * The returned buffer_head has ->b_count elevated. The caller is expected |
852 | * to brelse() it when appropriate. | 853 | * to brelse() it when appropriate. |
853 | */ | 854 | */ |
854 | static struct buffer_head * ext4_find_entry (struct dentry *dentry, | 855 | static struct buffer_head * ext4_find_entry (struct inode *dir, |
856 | const struct qstr *d_name, | ||
855 | struct ext4_dir_entry_2 ** res_dir) | 857 | struct ext4_dir_entry_2 ** res_dir) |
856 | { | 858 | { |
857 | struct super_block * sb; | 859 | struct super_block *sb; |
858 | struct buffer_head * bh_use[NAMEI_RA_SIZE]; | 860 | struct buffer_head *bh_use[NAMEI_RA_SIZE]; |
859 | struct buffer_head * bh, *ret = NULL; | 861 | struct buffer_head *bh, *ret = NULL; |
860 | ext4_lblk_t start, block, b; | 862 | ext4_lblk_t start, block, b; |
861 | int ra_max = 0; /* Number of bh's in the readahead | 863 | int ra_max = 0; /* Number of bh's in the readahead |
862 | buffer, bh_use[] */ | 864 | buffer, bh_use[] */ |
@@ -865,16 +867,15 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry, | |||
865 | int num = 0; | 867 | int num = 0; |
866 | ext4_lblk_t nblocks; | 868 | ext4_lblk_t nblocks; |
867 | int i, err; | 869 | int i, err; |
868 | struct inode *dir = dentry->d_parent->d_inode; | ||
869 | int namelen; | 870 | int namelen; |
870 | 871 | ||
871 | *res_dir = NULL; | 872 | *res_dir = NULL; |
872 | sb = dir->i_sb; | 873 | sb = dir->i_sb; |
873 | namelen = dentry->d_name.len; | 874 | namelen = d_name->len; |
874 | if (namelen > EXT4_NAME_LEN) | 875 | if (namelen > EXT4_NAME_LEN) |
875 | return NULL; | 876 | return NULL; |
876 | if (is_dx(dir)) { | 877 | if (is_dx(dir)) { |
877 | bh = ext4_dx_find_entry(dentry, res_dir, &err); | 878 | bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); |
878 | /* | 879 | /* |
879 | * On success, or if the error was file not found, | 880 | * On success, or if the error was file not found, |
880 | * return. Otherwise, fall back to doing a search the | 881 | * return. Otherwise, fall back to doing a search the |
@@ -882,7 +883,8 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry, | |||
882 | */ | 883 | */ |
883 | if (bh || (err != ERR_BAD_DX_DIR)) | 884 | if (bh || (err != ERR_BAD_DX_DIR)) |
884 | return bh; | 885 | return bh; |
885 | dxtrace(printk("ext4_find_entry: dx failed, falling back\n")); | 886 | dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " |
887 | "falling back\n")); | ||
886 | } | 888 | } |
887 | nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); | 889 | nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); |
888 | start = EXT4_I(dir)->i_dir_start_lookup; | 890 | start = EXT4_I(dir)->i_dir_start_lookup; |
@@ -926,7 +928,7 @@ restart: | |||
926 | brelse(bh); | 928 | brelse(bh); |
927 | goto next; | 929 | goto next; |
928 | } | 930 | } |
929 | i = search_dirblock(bh, dir, dentry, | 931 | i = search_dirblock(bh, dir, d_name, |
930 | block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); | 932 | block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); |
931 | if (i == 1) { | 933 | if (i == 1) { |
932 | EXT4_I(dir)->i_dir_start_lookup = block; | 934 | EXT4_I(dir)->i_dir_start_lookup = block; |
@@ -956,11 +958,11 @@ restart: | |||
956 | cleanup_and_exit: | 958 | cleanup_and_exit: |
957 | /* Clean up the read-ahead blocks */ | 959 | /* Clean up the read-ahead blocks */ |
958 | for (; ra_ptr < ra_max; ra_ptr++) | 960 | for (; ra_ptr < ra_max; ra_ptr++) |
959 | brelse (bh_use[ra_ptr]); | 961 | brelse(bh_use[ra_ptr]); |
960 | return ret; | 962 | return ret; |
961 | } | 963 | } |
962 | 964 | ||
963 | static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | 965 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, |
964 | struct ext4_dir_entry_2 **res_dir, int *err) | 966 | struct ext4_dir_entry_2 **res_dir, int *err) |
965 | { | 967 | { |
966 | struct super_block * sb; | 968 | struct super_block * sb; |
@@ -971,14 +973,13 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | |||
971 | struct buffer_head *bh; | 973 | struct buffer_head *bh; |
972 | ext4_lblk_t block; | 974 | ext4_lblk_t block; |
973 | int retval; | 975 | int retval; |
974 | int namelen = dentry->d_name.len; | 976 | int namelen = d_name->len; |
975 | const u8 *name = dentry->d_name.name; | 977 | const u8 *name = d_name->name; |
976 | struct inode *dir = dentry->d_parent->d_inode; | ||
977 | 978 | ||
978 | sb = dir->i_sb; | 979 | sb = dir->i_sb; |
979 | /* NFS may look up ".." - look at dx_root directory block */ | 980 | /* NFS may look up ".." - look at dx_root directory block */ |
980 | if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ | 981 | if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ |
981 | if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) | 982 | if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) |
982 | return NULL; | 983 | return NULL; |
983 | } else { | 984 | } else { |
984 | frame = frames; | 985 | frame = frames; |
@@ -1010,7 +1011,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | |||
1010 | return bh; | 1011 | return bh; |
1011 | } | 1012 | } |
1012 | } | 1013 | } |
1013 | brelse (bh); | 1014 | brelse(bh); |
1014 | /* Check to see if we should continue to search */ | 1015 | /* Check to see if we should continue to search */ |
1015 | retval = ext4_htree_next_block(dir, hash, frame, | 1016 | retval = ext4_htree_next_block(dir, hash, frame, |
1016 | frames, NULL); | 1017 | frames, NULL); |
@@ -1025,25 +1026,25 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | |||
1025 | 1026 | ||
1026 | *err = -ENOENT; | 1027 | *err = -ENOENT; |
1027 | errout: | 1028 | errout: |
1028 | dxtrace(printk("%s not found\n", name)); | 1029 | dxtrace(printk(KERN_DEBUG "%s not found\n", name)); |
1029 | dx_release (frames); | 1030 | dx_release (frames); |
1030 | return NULL; | 1031 | return NULL; |
1031 | } | 1032 | } |
1032 | 1033 | ||
1033 | static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) | 1034 | static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) |
1034 | { | 1035 | { |
1035 | struct inode * inode; | 1036 | struct inode *inode; |
1036 | struct ext4_dir_entry_2 * de; | 1037 | struct ext4_dir_entry_2 *de; |
1037 | struct buffer_head * bh; | 1038 | struct buffer_head *bh; |
1038 | 1039 | ||
1039 | if (dentry->d_name.len > EXT4_NAME_LEN) | 1040 | if (dentry->d_name.len > EXT4_NAME_LEN) |
1040 | return ERR_PTR(-ENAMETOOLONG); | 1041 | return ERR_PTR(-ENAMETOOLONG); |
1041 | 1042 | ||
1042 | bh = ext4_find_entry(dentry, &de); | 1043 | bh = ext4_find_entry(dir, &dentry->d_name, &de); |
1043 | inode = NULL; | 1044 | inode = NULL; |
1044 | if (bh) { | 1045 | if (bh) { |
1045 | unsigned long ino = le32_to_cpu(de->inode); | 1046 | unsigned long ino = le32_to_cpu(de->inode); |
1046 | brelse (bh); | 1047 | brelse(bh); |
1047 | if (!ext4_valid_inum(dir->i_sb, ino)) { | 1048 | if (!ext4_valid_inum(dir->i_sb, ino)) { |
1048 | ext4_error(dir->i_sb, "ext4_lookup", | 1049 | ext4_error(dir->i_sb, "ext4_lookup", |
1049 | "bad inode number: %lu", ino); | 1050 | "bad inode number: %lu", ino); |
@@ -1062,15 +1063,14 @@ struct dentry *ext4_get_parent(struct dentry *child) | |||
1062 | unsigned long ino; | 1063 | unsigned long ino; |
1063 | struct dentry *parent; | 1064 | struct dentry *parent; |
1064 | struct inode *inode; | 1065 | struct inode *inode; |
1065 | struct dentry dotdot; | 1066 | static const struct qstr dotdot = { |
1067 | .name = "..", | ||
1068 | .len = 2, | ||
1069 | }; | ||
1066 | struct ext4_dir_entry_2 * de; | 1070 | struct ext4_dir_entry_2 * de; |
1067 | struct buffer_head *bh; | 1071 | struct buffer_head *bh; |
1068 | 1072 | ||
1069 | dotdot.d_name.name = ".."; | 1073 | bh = ext4_find_entry(child->d_inode, &dotdot, &de); |
1070 | dotdot.d_name.len = 2; | ||
1071 | dotdot.d_parent = child; /* confusing, isn't it! */ | ||
1072 | |||
1073 | bh = ext4_find_entry(&dotdot, &de); | ||
1074 | inode = NULL; | 1074 | inode = NULL; |
1075 | if (!bh) | 1075 | if (!bh) |
1076 | return ERR_PTR(-ENOENT); | 1076 | return ERR_PTR(-ENOENT); |
@@ -1201,10 +1201,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1201 | 1201 | ||
1202 | /* create map in the end of data2 block */ | 1202 | /* create map in the end of data2 block */ |
1203 | map = (struct dx_map_entry *) (data2 + blocksize); | 1203 | map = (struct dx_map_entry *) (data2 + blocksize); |
1204 | count = dx_make_map ((struct ext4_dir_entry_2 *) data1, | 1204 | count = dx_make_map((struct ext4_dir_entry_2 *) data1, |
1205 | blocksize, hinfo, map); | 1205 | blocksize, hinfo, map); |
1206 | map -= count; | 1206 | map -= count; |
1207 | dx_sort_map (map, count); | 1207 | dx_sort_map(map, count); |
1208 | /* Split the existing block in the middle, size-wise */ | 1208 | /* Split the existing block in the middle, size-wise */ |
1209 | size = 0; | 1209 | size = 0; |
1210 | move = 0; | 1210 | move = 0; |
@@ -1225,7 +1225,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1225 | 1225 | ||
1226 | /* Fancy dance to stay within two buffers */ | 1226 | /* Fancy dance to stay within two buffers */ |
1227 | de2 = dx_move_dirents(data1, data2, map + split, count - split); | 1227 | de2 = dx_move_dirents(data1, data2, map + split, count - split); |
1228 | de = dx_pack_dirents(data1,blocksize); | 1228 | de = dx_pack_dirents(data1, blocksize); |
1229 | de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); | 1229 | de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); |
1230 | de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2); | 1230 | de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2); |
1231 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); | 1231 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); |
@@ -1237,15 +1237,15 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1237 | swap(*bh, bh2); | 1237 | swap(*bh, bh2); |
1238 | de = de2; | 1238 | de = de2; |
1239 | } | 1239 | } |
1240 | dx_insert_block (frame, hash2 + continued, newblock); | 1240 | dx_insert_block(frame, hash2 + continued, newblock); |
1241 | err = ext4_journal_dirty_metadata (handle, bh2); | 1241 | err = ext4_journal_dirty_metadata(handle, bh2); |
1242 | if (err) | 1242 | if (err) |
1243 | goto journal_error; | 1243 | goto journal_error; |
1244 | err = ext4_journal_dirty_metadata (handle, frame->bh); | 1244 | err = ext4_journal_dirty_metadata(handle, frame->bh); |
1245 | if (err) | 1245 | if (err) |
1246 | goto journal_error; | 1246 | goto journal_error; |
1247 | brelse (bh2); | 1247 | brelse(bh2); |
1248 | dxtrace(dx_show_index ("frame", frame->entries)); | 1248 | dxtrace(dx_show_index("frame", frame->entries)); |
1249 | return de; | 1249 | return de; |
1250 | 1250 | ||
1251 | journal_error: | 1251 | journal_error: |
@@ -1271,7 +1271,7 @@ errout: | |||
1271 | */ | 1271 | */ |
1272 | static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | 1272 | static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, |
1273 | struct inode *inode, struct ext4_dir_entry_2 *de, | 1273 | struct inode *inode, struct ext4_dir_entry_2 *de, |
1274 | struct buffer_head * bh) | 1274 | struct buffer_head *bh) |
1275 | { | 1275 | { |
1276 | struct inode *dir = dentry->d_parent->d_inode; | 1276 | struct inode *dir = dentry->d_parent->d_inode; |
1277 | const char *name = dentry->d_name.name; | 1277 | const char *name = dentry->d_name.name; |
@@ -1288,11 +1288,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1288 | while ((char *) de <= top) { | 1288 | while ((char *) de <= top) { |
1289 | if (!ext4_check_dir_entry("ext4_add_entry", dir, de, | 1289 | if (!ext4_check_dir_entry("ext4_add_entry", dir, de, |
1290 | bh, offset)) { | 1290 | bh, offset)) { |
1291 | brelse (bh); | 1291 | brelse(bh); |
1292 | return -EIO; | 1292 | return -EIO; |
1293 | } | 1293 | } |
1294 | if (ext4_match (namelen, name, de)) { | 1294 | if (ext4_match(namelen, name, de)) { |
1295 | brelse (bh); | 1295 | brelse(bh); |
1296 | return -EEXIST; | 1296 | return -EEXIST; |
1297 | } | 1297 | } |
1298 | nlen = EXT4_DIR_REC_LEN(de->name_len); | 1298 | nlen = EXT4_DIR_REC_LEN(de->name_len); |
@@ -1329,7 +1329,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1329 | } else | 1329 | } else |
1330 | de->inode = 0; | 1330 | de->inode = 0; |
1331 | de->name_len = namelen; | 1331 | de->name_len = namelen; |
1332 | memcpy (de->name, name, namelen); | 1332 | memcpy(de->name, name, namelen); |
1333 | /* | 1333 | /* |
1334 | * XXX shouldn't update any times until successful | 1334 | * XXX shouldn't update any times until successful |
1335 | * completion of syscall, but too many callers depend | 1335 | * completion of syscall, but too many callers depend |
@@ -1377,7 +1377,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1377 | struct fake_dirent *fde; | 1377 | struct fake_dirent *fde; |
1378 | 1378 | ||
1379 | blocksize = dir->i_sb->s_blocksize; | 1379 | blocksize = dir->i_sb->s_blocksize; |
1380 | dxtrace(printk("Creating index\n")); | 1380 | dxtrace(printk(KERN_DEBUG "Creating index\n")); |
1381 | retval = ext4_journal_get_write_access(handle, bh); | 1381 | retval = ext4_journal_get_write_access(handle, bh); |
1382 | if (retval) { | 1382 | if (retval) { |
1383 | ext4_std_error(dir->i_sb, retval); | 1383 | ext4_std_error(dir->i_sb, retval); |
@@ -1386,7 +1386,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1386 | } | 1386 | } |
1387 | root = (struct dx_root *) bh->b_data; | 1387 | root = (struct dx_root *) bh->b_data; |
1388 | 1388 | ||
1389 | bh2 = ext4_append (handle, dir, &block, &retval); | 1389 | bh2 = ext4_append(handle, dir, &block, &retval); |
1390 | if (!(bh2)) { | 1390 | if (!(bh2)) { |
1391 | brelse(bh); | 1391 | brelse(bh); |
1392 | return retval; | 1392 | return retval; |
@@ -1412,9 +1412,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1412 | root->info.info_length = sizeof(root->info); | 1412 | root->info.info_length = sizeof(root->info); |
1413 | root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; | 1413 | root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; |
1414 | entries = root->entries; | 1414 | entries = root->entries; |
1415 | dx_set_block (entries, 1); | 1415 | dx_set_block(entries, 1); |
1416 | dx_set_count (entries, 1); | 1416 | dx_set_count(entries, 1); |
1417 | dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); | 1417 | dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info))); |
1418 | 1418 | ||
1419 | /* Initialize as for dx_probe */ | 1419 | /* Initialize as for dx_probe */ |
1420 | hinfo.hash_version = root->info.hash_version; | 1420 | hinfo.hash_version = root->info.hash_version; |
@@ -1443,14 +1443,14 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1443 | * may not sleep between calling this and putting something into | 1443 | * may not sleep between calling this and putting something into |
1444 | * the entry, as someone else might have used it while you slept. | 1444 | * the entry, as someone else might have used it while you slept. |
1445 | */ | 1445 | */ |
1446 | static int ext4_add_entry (handle_t *handle, struct dentry *dentry, | 1446 | static int ext4_add_entry(handle_t *handle, struct dentry *dentry, |
1447 | struct inode *inode) | 1447 | struct inode *inode) |
1448 | { | 1448 | { |
1449 | struct inode *dir = dentry->d_parent->d_inode; | 1449 | struct inode *dir = dentry->d_parent->d_inode; |
1450 | unsigned long offset; | 1450 | unsigned long offset; |
1451 | struct buffer_head * bh; | 1451 | struct buffer_head *bh; |
1452 | struct ext4_dir_entry_2 *de; | 1452 | struct ext4_dir_entry_2 *de; |
1453 | struct super_block * sb; | 1453 | struct super_block *sb; |
1454 | int retval; | 1454 | int retval; |
1455 | int dx_fallback=0; | 1455 | int dx_fallback=0; |
1456 | unsigned blocksize; | 1456 | unsigned blocksize; |
@@ -1500,13 +1500,13 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1500 | struct dx_frame frames[2], *frame; | 1500 | struct dx_frame frames[2], *frame; |
1501 | struct dx_entry *entries, *at; | 1501 | struct dx_entry *entries, *at; |
1502 | struct dx_hash_info hinfo; | 1502 | struct dx_hash_info hinfo; |
1503 | struct buffer_head * bh; | 1503 | struct buffer_head *bh; |
1504 | struct inode *dir = dentry->d_parent->d_inode; | 1504 | struct inode *dir = dentry->d_parent->d_inode; |
1505 | struct super_block * sb = dir->i_sb; | 1505 | struct super_block *sb = dir->i_sb; |
1506 | struct ext4_dir_entry_2 *de; | 1506 | struct ext4_dir_entry_2 *de; |
1507 | int err; | 1507 | int err; |
1508 | 1508 | ||
1509 | frame = dx_probe(dentry, NULL, &hinfo, frames, &err); | 1509 | frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); |
1510 | if (!frame) | 1510 | if (!frame) |
1511 | return err; | 1511 | return err; |
1512 | entries = frame->entries; | 1512 | entries = frame->entries; |
@@ -1527,7 +1527,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1527 | } | 1527 | } |
1528 | 1528 | ||
1529 | /* Block full, should compress but for now just split */ | 1529 | /* Block full, should compress but for now just split */ |
1530 | dxtrace(printk("using %u of %u node entries\n", | 1530 | dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", |
1531 | dx_get_count(entries), dx_get_limit(entries))); | 1531 | dx_get_count(entries), dx_get_limit(entries))); |
1532 | /* Need to split index? */ | 1532 | /* Need to split index? */ |
1533 | if (dx_get_count(entries) == dx_get_limit(entries)) { | 1533 | if (dx_get_count(entries) == dx_get_limit(entries)) { |
@@ -1559,7 +1559,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1559 | if (levels) { | 1559 | if (levels) { |
1560 | unsigned icount1 = icount/2, icount2 = icount - icount1; | 1560 | unsigned icount1 = icount/2, icount2 = icount - icount1; |
1561 | unsigned hash2 = dx_get_hash(entries + icount1); | 1561 | unsigned hash2 = dx_get_hash(entries + icount1); |
1562 | dxtrace(printk("Split index %i/%i\n", icount1, icount2)); | 1562 | dxtrace(printk(KERN_DEBUG "Split index %i/%i\n", |
1563 | icount1, icount2)); | ||
1563 | 1564 | ||
1564 | BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ | 1565 | BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ |
1565 | err = ext4_journal_get_write_access(handle, | 1566 | err = ext4_journal_get_write_access(handle, |
@@ -1567,11 +1568,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1567 | if (err) | 1568 | if (err) |
1568 | goto journal_error; | 1569 | goto journal_error; |
1569 | 1570 | ||
1570 | memcpy ((char *) entries2, (char *) (entries + icount1), | 1571 | memcpy((char *) entries2, (char *) (entries + icount1), |
1571 | icount2 * sizeof(struct dx_entry)); | 1572 | icount2 * sizeof(struct dx_entry)); |
1572 | dx_set_count (entries, icount1); | 1573 | dx_set_count(entries, icount1); |
1573 | dx_set_count (entries2, icount2); | 1574 | dx_set_count(entries2, icount2); |
1574 | dx_set_limit (entries2, dx_node_limit(dir)); | 1575 | dx_set_limit(entries2, dx_node_limit(dir)); |
1575 | 1576 | ||
1576 | /* Which index block gets the new entry? */ | 1577 | /* Which index block gets the new entry? */ |
1577 | if (at - entries >= icount1) { | 1578 | if (at - entries >= icount1) { |
@@ -1579,16 +1580,17 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1579 | frame->entries = entries = entries2; | 1580 | frame->entries = entries = entries2; |
1580 | swap(frame->bh, bh2); | 1581 | swap(frame->bh, bh2); |
1581 | } | 1582 | } |
1582 | dx_insert_block (frames + 0, hash2, newblock); | 1583 | dx_insert_block(frames + 0, hash2, newblock); |
1583 | dxtrace(dx_show_index ("node", frames[1].entries)); | 1584 | dxtrace(dx_show_index("node", frames[1].entries)); |
1584 | dxtrace(dx_show_index ("node", | 1585 | dxtrace(dx_show_index("node", |
1585 | ((struct dx_node *) bh2->b_data)->entries)); | 1586 | ((struct dx_node *) bh2->b_data)->entries)); |
1586 | err = ext4_journal_dirty_metadata(handle, bh2); | 1587 | err = ext4_journal_dirty_metadata(handle, bh2); |
1587 | if (err) | 1588 | if (err) |
1588 | goto journal_error; | 1589 | goto journal_error; |
1589 | brelse (bh2); | 1590 | brelse (bh2); |
1590 | } else { | 1591 | } else { |
1591 | dxtrace(printk("Creating second level index...\n")); | 1592 | dxtrace(printk(KERN_DEBUG |
1593 | "Creating second level index...\n")); | ||
1592 | memcpy((char *) entries2, (char *) entries, | 1594 | memcpy((char *) entries2, (char *) entries, |
1593 | icount * sizeof(struct dx_entry)); | 1595 | icount * sizeof(struct dx_entry)); |
1594 | dx_set_limit(entries2, dx_node_limit(dir)); | 1596 | dx_set_limit(entries2, dx_node_limit(dir)); |
@@ -1630,12 +1632,12 @@ cleanup: | |||
1630 | * ext4_delete_entry deletes a directory entry by merging it with the | 1632 | * ext4_delete_entry deletes a directory entry by merging it with the |
1631 | * previous entry | 1633 | * previous entry |
1632 | */ | 1634 | */ |
1633 | static int ext4_delete_entry (handle_t *handle, | 1635 | static int ext4_delete_entry(handle_t *handle, |
1634 | struct inode * dir, | 1636 | struct inode *dir, |
1635 | struct ext4_dir_entry_2 * de_del, | 1637 | struct ext4_dir_entry_2 *de_del, |
1636 | struct buffer_head * bh) | 1638 | struct buffer_head *bh) |
1637 | { | 1639 | { |
1638 | struct ext4_dir_entry_2 * de, * pde; | 1640 | struct ext4_dir_entry_2 *de, *pde; |
1639 | int i; | 1641 | int i; |
1640 | 1642 | ||
1641 | i = 0; | 1643 | i = 0; |
@@ -1716,11 +1718,11 @@ static int ext4_add_nondir(handle_t *handle, | |||
1716 | * If the create succeeds, we fill in the inode information | 1718 | * If the create succeeds, we fill in the inode information |
1717 | * with d_instantiate(). | 1719 | * with d_instantiate(). |
1718 | */ | 1720 | */ |
1719 | static int ext4_create (struct inode * dir, struct dentry * dentry, int mode, | 1721 | static int ext4_create(struct inode *dir, struct dentry *dentry, int mode, |
1720 | struct nameidata *nd) | 1722 | struct nameidata *nd) |
1721 | { | 1723 | { |
1722 | handle_t *handle; | 1724 | handle_t *handle; |
1723 | struct inode * inode; | 1725 | struct inode *inode; |
1724 | int err, retries = 0; | 1726 | int err, retries = 0; |
1725 | 1727 | ||
1726 | retry: | 1728 | retry: |
@@ -1747,8 +1749,8 @@ retry: | |||
1747 | return err; | 1749 | return err; |
1748 | } | 1750 | } |
1749 | 1751 | ||
1750 | static int ext4_mknod (struct inode * dir, struct dentry *dentry, | 1752 | static int ext4_mknod(struct inode *dir, struct dentry *dentry, |
1751 | int mode, dev_t rdev) | 1753 | int mode, dev_t rdev) |
1752 | { | 1754 | { |
1753 | handle_t *handle; | 1755 | handle_t *handle; |
1754 | struct inode *inode; | 1756 | struct inode *inode; |
@@ -1767,11 +1769,11 @@ retry: | |||
1767 | if (IS_DIRSYNC(dir)) | 1769 | if (IS_DIRSYNC(dir)) |
1768 | handle->h_sync = 1; | 1770 | handle->h_sync = 1; |
1769 | 1771 | ||
1770 | inode = ext4_new_inode (handle, dir, mode); | 1772 | inode = ext4_new_inode(handle, dir, mode); |
1771 | err = PTR_ERR(inode); | 1773 | err = PTR_ERR(inode); |
1772 | if (!IS_ERR(inode)) { | 1774 | if (!IS_ERR(inode)) { |
1773 | init_special_inode(inode, inode->i_mode, rdev); | 1775 | init_special_inode(inode, inode->i_mode, rdev); |
1774 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 1776 | #ifdef CONFIG_EXT4_FS_XATTR |
1775 | inode->i_op = &ext4_special_inode_operations; | 1777 | inode->i_op = &ext4_special_inode_operations; |
1776 | #endif | 1778 | #endif |
1777 | err = ext4_add_nondir(handle, dentry, inode); | 1779 | err = ext4_add_nondir(handle, dentry, inode); |
@@ -1782,12 +1784,12 @@ retry: | |||
1782 | return err; | 1784 | return err; |
1783 | } | 1785 | } |
1784 | 1786 | ||
1785 | static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode) | 1787 | static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) |
1786 | { | 1788 | { |
1787 | handle_t *handle; | 1789 | handle_t *handle; |
1788 | struct inode * inode; | 1790 | struct inode *inode; |
1789 | struct buffer_head * dir_block; | 1791 | struct buffer_head *dir_block; |
1790 | struct ext4_dir_entry_2 * de; | 1792 | struct ext4_dir_entry_2 *de; |
1791 | int err, retries = 0; | 1793 | int err, retries = 0; |
1792 | 1794 | ||
1793 | if (EXT4_DIR_LINK_MAX(dir)) | 1795 | if (EXT4_DIR_LINK_MAX(dir)) |
@@ -1803,7 +1805,7 @@ retry: | |||
1803 | if (IS_DIRSYNC(dir)) | 1805 | if (IS_DIRSYNC(dir)) |
1804 | handle->h_sync = 1; | 1806 | handle->h_sync = 1; |
1805 | 1807 | ||
1806 | inode = ext4_new_inode (handle, dir, S_IFDIR | mode); | 1808 | inode = ext4_new_inode(handle, dir, S_IFDIR | mode); |
1807 | err = PTR_ERR(inode); | 1809 | err = PTR_ERR(inode); |
1808 | if (IS_ERR(inode)) | 1810 | if (IS_ERR(inode)) |
1809 | goto out_stop; | 1811 | goto out_stop; |
@@ -1811,7 +1813,7 @@ retry: | |||
1811 | inode->i_op = &ext4_dir_inode_operations; | 1813 | inode->i_op = &ext4_dir_inode_operations; |
1812 | inode->i_fop = &ext4_dir_operations; | 1814 | inode->i_fop = &ext4_dir_operations; |
1813 | inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; | 1815 | inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; |
1814 | dir_block = ext4_bread (handle, inode, 0, 1, &err); | 1816 | dir_block = ext4_bread(handle, inode, 0, 1, &err); |
1815 | if (!dir_block) | 1817 | if (!dir_block) |
1816 | goto out_clear_inode; | 1818 | goto out_clear_inode; |
1817 | BUFFER_TRACE(dir_block, "get_write_access"); | 1819 | BUFFER_TRACE(dir_block, "get_write_access"); |
@@ -1820,26 +1822,26 @@ retry: | |||
1820 | de->inode = cpu_to_le32(inode->i_ino); | 1822 | de->inode = cpu_to_le32(inode->i_ino); |
1821 | de->name_len = 1; | 1823 | de->name_len = 1; |
1822 | de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len)); | 1824 | de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len)); |
1823 | strcpy (de->name, "."); | 1825 | strcpy(de->name, "."); |
1824 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); | 1826 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); |
1825 | de = ext4_next_entry(de); | 1827 | de = ext4_next_entry(de); |
1826 | de->inode = cpu_to_le32(dir->i_ino); | 1828 | de->inode = cpu_to_le32(dir->i_ino); |
1827 | de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize - | 1829 | de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize - |
1828 | EXT4_DIR_REC_LEN(1)); | 1830 | EXT4_DIR_REC_LEN(1)); |
1829 | de->name_len = 2; | 1831 | de->name_len = 2; |
1830 | strcpy (de->name, ".."); | 1832 | strcpy(de->name, ".."); |
1831 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); | 1833 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); |
1832 | inode->i_nlink = 2; | 1834 | inode->i_nlink = 2; |
1833 | BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); | 1835 | BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); |
1834 | ext4_journal_dirty_metadata(handle, dir_block); | 1836 | ext4_journal_dirty_metadata(handle, dir_block); |
1835 | brelse (dir_block); | 1837 | brelse(dir_block); |
1836 | ext4_mark_inode_dirty(handle, inode); | 1838 | ext4_mark_inode_dirty(handle, inode); |
1837 | err = ext4_add_entry (handle, dentry, inode); | 1839 | err = ext4_add_entry(handle, dentry, inode); |
1838 | if (err) { | 1840 | if (err) { |
1839 | out_clear_inode: | 1841 | out_clear_inode: |
1840 | clear_nlink(inode); | 1842 | clear_nlink(inode); |
1841 | ext4_mark_inode_dirty(handle, inode); | 1843 | ext4_mark_inode_dirty(handle, inode); |
1842 | iput (inode); | 1844 | iput(inode); |
1843 | goto out_stop; | 1845 | goto out_stop; |
1844 | } | 1846 | } |
1845 | ext4_inc_count(handle, dir); | 1847 | ext4_inc_count(handle, dir); |
@@ -1856,17 +1858,17 @@ out_stop: | |||
1856 | /* | 1858 | /* |
1857 | * routine to check that the specified directory is empty (for rmdir) | 1859 | * routine to check that the specified directory is empty (for rmdir) |
1858 | */ | 1860 | */ |
1859 | static int empty_dir (struct inode * inode) | 1861 | static int empty_dir(struct inode *inode) |
1860 | { | 1862 | { |
1861 | unsigned long offset; | 1863 | unsigned long offset; |
1862 | struct buffer_head * bh; | 1864 | struct buffer_head *bh; |
1863 | struct ext4_dir_entry_2 * de, * de1; | 1865 | struct ext4_dir_entry_2 *de, *de1; |
1864 | struct super_block * sb; | 1866 | struct super_block *sb; |
1865 | int err = 0; | 1867 | int err = 0; |
1866 | 1868 | ||
1867 | sb = inode->i_sb; | 1869 | sb = inode->i_sb; |
1868 | if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || | 1870 | if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || |
1869 | !(bh = ext4_bread (NULL, inode, 0, 0, &err))) { | 1871 | !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { |
1870 | if (err) | 1872 | if (err) |
1871 | ext4_error(inode->i_sb, __func__, | 1873 | ext4_error(inode->i_sb, __func__, |
1872 | "error %d reading directory #%lu offset 0", | 1874 | "error %d reading directory #%lu offset 0", |
@@ -1881,23 +1883,23 @@ static int empty_dir (struct inode * inode) | |||
1881 | de1 = ext4_next_entry(de); | 1883 | de1 = ext4_next_entry(de); |
1882 | if (le32_to_cpu(de->inode) != inode->i_ino || | 1884 | if (le32_to_cpu(de->inode) != inode->i_ino || |
1883 | !le32_to_cpu(de1->inode) || | 1885 | !le32_to_cpu(de1->inode) || |
1884 | strcmp (".", de->name) || | 1886 | strcmp(".", de->name) || |
1885 | strcmp ("..", de1->name)) { | 1887 | strcmp("..", de1->name)) { |
1886 | ext4_warning (inode->i_sb, "empty_dir", | 1888 | ext4_warning(inode->i_sb, "empty_dir", |
1887 | "bad directory (dir #%lu) - no `.' or `..'", | 1889 | "bad directory (dir #%lu) - no `.' or `..'", |
1888 | inode->i_ino); | 1890 | inode->i_ino); |
1889 | brelse (bh); | 1891 | brelse(bh); |
1890 | return 1; | 1892 | return 1; |
1891 | } | 1893 | } |
1892 | offset = ext4_rec_len_from_disk(de->rec_len) + | 1894 | offset = ext4_rec_len_from_disk(de->rec_len) + |
1893 | ext4_rec_len_from_disk(de1->rec_len); | 1895 | ext4_rec_len_from_disk(de1->rec_len); |
1894 | de = ext4_next_entry(de1); | 1896 | de = ext4_next_entry(de1); |
1895 | while (offset < inode->i_size ) { | 1897 | while (offset < inode->i_size) { |
1896 | if (!bh || | 1898 | if (!bh || |
1897 | (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { | 1899 | (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { |
1898 | err = 0; | 1900 | err = 0; |
1899 | brelse (bh); | 1901 | brelse(bh); |
1900 | bh = ext4_bread (NULL, inode, | 1902 | bh = ext4_bread(NULL, inode, |
1901 | offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); | 1903 | offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); |
1902 | if (!bh) { | 1904 | if (!bh) { |
1903 | if (err) | 1905 | if (err) |
@@ -1917,13 +1919,13 @@ static int empty_dir (struct inode * inode) | |||
1917 | continue; | 1919 | continue; |
1918 | } | 1920 | } |
1919 | if (le32_to_cpu(de->inode)) { | 1921 | if (le32_to_cpu(de->inode)) { |
1920 | brelse (bh); | 1922 | brelse(bh); |
1921 | return 0; | 1923 | return 0; |
1922 | } | 1924 | } |
1923 | offset += ext4_rec_len_from_disk(de->rec_len); | 1925 | offset += ext4_rec_len_from_disk(de->rec_len); |
1924 | de = ext4_next_entry(de); | 1926 | de = ext4_next_entry(de); |
1925 | } | 1927 | } |
1926 | brelse (bh); | 1928 | brelse(bh); |
1927 | return 1; | 1929 | return 1; |
1928 | } | 1930 | } |
1929 | 1931 | ||
@@ -1954,8 +1956,8 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
1954 | * ->i_nlink. For, say it, character device. Not a regular file, | 1956 | * ->i_nlink. For, say it, character device. Not a regular file, |
1955 | * not a directory, not a symlink and ->i_nlink > 0. | 1957 | * not a directory, not a symlink and ->i_nlink > 0. |
1956 | */ | 1958 | */ |
1957 | J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 1959 | J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
1958 | S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); | 1960 | S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); |
1959 | 1961 | ||
1960 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); | 1962 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); |
1961 | err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); | 1963 | err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); |
@@ -2069,12 +2071,12 @@ out_brelse: | |||
2069 | goto out_err; | 2071 | goto out_err; |
2070 | } | 2072 | } |
2071 | 2073 | ||
2072 | static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | 2074 | static int ext4_rmdir(struct inode *dir, struct dentry *dentry) |
2073 | { | 2075 | { |
2074 | int retval; | 2076 | int retval; |
2075 | struct inode * inode; | 2077 | struct inode *inode; |
2076 | struct buffer_head * bh; | 2078 | struct buffer_head *bh; |
2077 | struct ext4_dir_entry_2 * de; | 2079 | struct ext4_dir_entry_2 *de; |
2078 | handle_t *handle; | 2080 | handle_t *handle; |
2079 | 2081 | ||
2080 | /* Initialize quotas before so that eventual writes go in | 2082 | /* Initialize quotas before so that eventual writes go in |
@@ -2085,7 +2087,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | |||
2085 | return PTR_ERR(handle); | 2087 | return PTR_ERR(handle); |
2086 | 2088 | ||
2087 | retval = -ENOENT; | 2089 | retval = -ENOENT; |
2088 | bh = ext4_find_entry (dentry, &de); | 2090 | bh = ext4_find_entry(dir, &dentry->d_name, &de); |
2089 | if (!bh) | 2091 | if (!bh) |
2090 | goto end_rmdir; | 2092 | goto end_rmdir; |
2091 | 2093 | ||
@@ -2099,16 +2101,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | |||
2099 | goto end_rmdir; | 2101 | goto end_rmdir; |
2100 | 2102 | ||
2101 | retval = -ENOTEMPTY; | 2103 | retval = -ENOTEMPTY; |
2102 | if (!empty_dir (inode)) | 2104 | if (!empty_dir(inode)) |
2103 | goto end_rmdir; | 2105 | goto end_rmdir; |
2104 | 2106 | ||
2105 | retval = ext4_delete_entry(handle, dir, de, bh); | 2107 | retval = ext4_delete_entry(handle, dir, de, bh); |
2106 | if (retval) | 2108 | if (retval) |
2107 | goto end_rmdir; | 2109 | goto end_rmdir; |
2108 | if (!EXT4_DIR_LINK_EMPTY(inode)) | 2110 | if (!EXT4_DIR_LINK_EMPTY(inode)) |
2109 | ext4_warning (inode->i_sb, "ext4_rmdir", | 2111 | ext4_warning(inode->i_sb, "ext4_rmdir", |
2110 | "empty directory has too many links (%d)", | 2112 | "empty directory has too many links (%d)", |
2111 | inode->i_nlink); | 2113 | inode->i_nlink); |
2112 | inode->i_version++; | 2114 | inode->i_version++; |
2113 | clear_nlink(inode); | 2115 | clear_nlink(inode); |
2114 | /* There's no need to set i_disksize: the fact that i_nlink is | 2116 | /* There's no need to set i_disksize: the fact that i_nlink is |
@@ -2124,16 +2126,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | |||
2124 | 2126 | ||
2125 | end_rmdir: | 2127 | end_rmdir: |
2126 | ext4_journal_stop(handle); | 2128 | ext4_journal_stop(handle); |
2127 | brelse (bh); | 2129 | brelse(bh); |
2128 | return retval; | 2130 | return retval; |
2129 | } | 2131 | } |
2130 | 2132 | ||
2131 | static int ext4_unlink(struct inode * dir, struct dentry *dentry) | 2133 | static int ext4_unlink(struct inode *dir, struct dentry *dentry) |
2132 | { | 2134 | { |
2133 | int retval; | 2135 | int retval; |
2134 | struct inode * inode; | 2136 | struct inode *inode; |
2135 | struct buffer_head * bh; | 2137 | struct buffer_head *bh; |
2136 | struct ext4_dir_entry_2 * de; | 2138 | struct ext4_dir_entry_2 *de; |
2137 | handle_t *handle; | 2139 | handle_t *handle; |
2138 | 2140 | ||
2139 | /* Initialize quotas before so that eventual writes go | 2141 | /* Initialize quotas before so that eventual writes go |
@@ -2147,7 +2149,7 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) | |||
2147 | handle->h_sync = 1; | 2149 | handle->h_sync = 1; |
2148 | 2150 | ||
2149 | retval = -ENOENT; | 2151 | retval = -ENOENT; |
2150 | bh = ext4_find_entry (dentry, &de); | 2152 | bh = ext4_find_entry(dir, &dentry->d_name, &de); |
2151 | if (!bh) | 2153 | if (!bh) |
2152 | goto end_unlink; | 2154 | goto end_unlink; |
2153 | 2155 | ||
@@ -2158,9 +2160,9 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) | |||
2158 | goto end_unlink; | 2160 | goto end_unlink; |
2159 | 2161 | ||
2160 | if (!inode->i_nlink) { | 2162 | if (!inode->i_nlink) { |
2161 | ext4_warning (inode->i_sb, "ext4_unlink", | 2163 | ext4_warning(inode->i_sb, "ext4_unlink", |
2162 | "Deleting nonexistent file (%lu), %d", | 2164 | "Deleting nonexistent file (%lu), %d", |
2163 | inode->i_ino, inode->i_nlink); | 2165 | inode->i_ino, inode->i_nlink); |
2164 | inode->i_nlink = 1; | 2166 | inode->i_nlink = 1; |
2165 | } | 2167 | } |
2166 | retval = ext4_delete_entry(handle, dir, de, bh); | 2168 | retval = ext4_delete_entry(handle, dir, de, bh); |
@@ -2178,15 +2180,15 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) | |||
2178 | 2180 | ||
2179 | end_unlink: | 2181 | end_unlink: |
2180 | ext4_journal_stop(handle); | 2182 | ext4_journal_stop(handle); |
2181 | brelse (bh); | 2183 | brelse(bh); |
2182 | return retval; | 2184 | return retval; |
2183 | } | 2185 | } |
2184 | 2186 | ||
2185 | static int ext4_symlink (struct inode * dir, | 2187 | static int ext4_symlink(struct inode *dir, |
2186 | struct dentry *dentry, const char * symname) | 2188 | struct dentry *dentry, const char *symname) |
2187 | { | 2189 | { |
2188 | handle_t *handle; | 2190 | handle_t *handle; |
2189 | struct inode * inode; | 2191 | struct inode *inode; |
2190 | int l, err, retries = 0; | 2192 | int l, err, retries = 0; |
2191 | 2193 | ||
2192 | l = strlen(symname)+1; | 2194 | l = strlen(symname)+1; |
@@ -2203,12 +2205,12 @@ retry: | |||
2203 | if (IS_DIRSYNC(dir)) | 2205 | if (IS_DIRSYNC(dir)) |
2204 | handle->h_sync = 1; | 2206 | handle->h_sync = 1; |
2205 | 2207 | ||
2206 | inode = ext4_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); | 2208 | inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO); |
2207 | err = PTR_ERR(inode); | 2209 | err = PTR_ERR(inode); |
2208 | if (IS_ERR(inode)) | 2210 | if (IS_ERR(inode)) |
2209 | goto out_stop; | 2211 | goto out_stop; |
2210 | 2212 | ||
2211 | if (l > sizeof (EXT4_I(inode)->i_data)) { | 2213 | if (l > sizeof(EXT4_I(inode)->i_data)) { |
2212 | inode->i_op = &ext4_symlink_inode_operations; | 2214 | inode->i_op = &ext4_symlink_inode_operations; |
2213 | ext4_set_aops(inode); | 2215 | ext4_set_aops(inode); |
2214 | /* | 2216 | /* |
@@ -2221,14 +2223,14 @@ retry: | |||
2221 | if (err) { | 2223 | if (err) { |
2222 | clear_nlink(inode); | 2224 | clear_nlink(inode); |
2223 | ext4_mark_inode_dirty(handle, inode); | 2225 | ext4_mark_inode_dirty(handle, inode); |
2224 | iput (inode); | 2226 | iput(inode); |
2225 | goto out_stop; | 2227 | goto out_stop; |
2226 | } | 2228 | } |
2227 | } else { | 2229 | } else { |
2228 | /* clear the extent format for fast symlink */ | 2230 | /* clear the extent format for fast symlink */ |
2229 | EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; | 2231 | EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; |
2230 | inode->i_op = &ext4_fast_symlink_inode_operations; | 2232 | inode->i_op = &ext4_fast_symlink_inode_operations; |
2231 | memcpy((char*)&EXT4_I(inode)->i_data,symname,l); | 2233 | memcpy((char *)&EXT4_I(inode)->i_data, symname, l); |
2232 | inode->i_size = l-1; | 2234 | inode->i_size = l-1; |
2233 | } | 2235 | } |
2234 | EXT4_I(inode)->i_disksize = inode->i_size; | 2236 | EXT4_I(inode)->i_disksize = inode->i_size; |
@@ -2240,8 +2242,8 @@ out_stop: | |||
2240 | return err; | 2242 | return err; |
2241 | } | 2243 | } |
2242 | 2244 | ||
2243 | static int ext4_link (struct dentry * old_dentry, | 2245 | static int ext4_link(struct dentry *old_dentry, |
2244 | struct inode * dir, struct dentry *dentry) | 2246 | struct inode *dir, struct dentry *dentry) |
2245 | { | 2247 | { |
2246 | handle_t *handle; | 2248 | handle_t *handle; |
2247 | struct inode *inode = old_dentry->d_inode; | 2249 | struct inode *inode = old_dentry->d_inode; |
@@ -2284,13 +2286,13 @@ retry: | |||
2284 | * Anybody can rename anything with this: the permission checks are left to the | 2286 | * Anybody can rename anything with this: the permission checks are left to the |
2285 | * higher-level routines. | 2287 | * higher-level routines. |
2286 | */ | 2288 | */ |
2287 | static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | 2289 | static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, |
2288 | struct inode * new_dir,struct dentry *new_dentry) | 2290 | struct inode *new_dir, struct dentry *new_dentry) |
2289 | { | 2291 | { |
2290 | handle_t *handle; | 2292 | handle_t *handle; |
2291 | struct inode * old_inode, * new_inode; | 2293 | struct inode *old_inode, *new_inode; |
2292 | struct buffer_head * old_bh, * new_bh, * dir_bh; | 2294 | struct buffer_head *old_bh, *new_bh, *dir_bh; |
2293 | struct ext4_dir_entry_2 * old_de, * new_de; | 2295 | struct ext4_dir_entry_2 *old_de, *new_de; |
2294 | int retval; | 2296 | int retval; |
2295 | 2297 | ||
2296 | old_bh = new_bh = dir_bh = NULL; | 2298 | old_bh = new_bh = dir_bh = NULL; |
@@ -2308,7 +2310,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2308 | if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) | 2310 | if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) |
2309 | handle->h_sync = 1; | 2311 | handle->h_sync = 1; |
2310 | 2312 | ||
2311 | old_bh = ext4_find_entry (old_dentry, &old_de); | 2313 | old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de); |
2312 | /* | 2314 | /* |
2313 | * Check for inode number is _not_ due to possible IO errors. | 2315 | * Check for inode number is _not_ due to possible IO errors. |
2314 | * We might rmdir the source, keep it as pwd of some process | 2316 | * We might rmdir the source, keep it as pwd of some process |
@@ -2321,32 +2323,32 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2321 | goto end_rename; | 2323 | goto end_rename; |
2322 | 2324 | ||
2323 | new_inode = new_dentry->d_inode; | 2325 | new_inode = new_dentry->d_inode; |
2324 | new_bh = ext4_find_entry (new_dentry, &new_de); | 2326 | new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, &new_de); |
2325 | if (new_bh) { | 2327 | if (new_bh) { |
2326 | if (!new_inode) { | 2328 | if (!new_inode) { |
2327 | brelse (new_bh); | 2329 | brelse(new_bh); |
2328 | new_bh = NULL; | 2330 | new_bh = NULL; |
2329 | } | 2331 | } |
2330 | } | 2332 | } |
2331 | if (S_ISDIR(old_inode->i_mode)) { | 2333 | if (S_ISDIR(old_inode->i_mode)) { |
2332 | if (new_inode) { | 2334 | if (new_inode) { |
2333 | retval = -ENOTEMPTY; | 2335 | retval = -ENOTEMPTY; |
2334 | if (!empty_dir (new_inode)) | 2336 | if (!empty_dir(new_inode)) |
2335 | goto end_rename; | 2337 | goto end_rename; |
2336 | } | 2338 | } |
2337 | retval = -EIO; | 2339 | retval = -EIO; |
2338 | dir_bh = ext4_bread (handle, old_inode, 0, 0, &retval); | 2340 | dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); |
2339 | if (!dir_bh) | 2341 | if (!dir_bh) |
2340 | goto end_rename; | 2342 | goto end_rename; |
2341 | if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) | 2343 | if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) |
2342 | goto end_rename; | 2344 | goto end_rename; |
2343 | retval = -EMLINK; | 2345 | retval = -EMLINK; |
2344 | if (!new_inode && new_dir!=old_dir && | 2346 | if (!new_inode && new_dir != old_dir && |
2345 | new_dir->i_nlink >= EXT4_LINK_MAX) | 2347 | new_dir->i_nlink >= EXT4_LINK_MAX) |
2346 | goto end_rename; | 2348 | goto end_rename; |
2347 | } | 2349 | } |
2348 | if (!new_bh) { | 2350 | if (!new_bh) { |
2349 | retval = ext4_add_entry (handle, new_dentry, old_inode); | 2351 | retval = ext4_add_entry(handle, new_dentry, old_inode); |
2350 | if (retval) | 2352 | if (retval) |
2351 | goto end_rename; | 2353 | goto end_rename; |
2352 | } else { | 2354 | } else { |
@@ -2388,7 +2390,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2388 | struct buffer_head *old_bh2; | 2390 | struct buffer_head *old_bh2; |
2389 | struct ext4_dir_entry_2 *old_de2; | 2391 | struct ext4_dir_entry_2 *old_de2; |
2390 | 2392 | ||
2391 | old_bh2 = ext4_find_entry(old_dentry, &old_de2); | 2393 | old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de2); |
2392 | if (old_bh2) { | 2394 | if (old_bh2) { |
2393 | retval = ext4_delete_entry(handle, old_dir, | 2395 | retval = ext4_delete_entry(handle, old_dir, |
2394 | old_de2, old_bh2); | 2396 | old_de2, old_bh2); |
@@ -2433,9 +2435,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
2433 | retval = 0; | 2435 | retval = 0; |
2434 | 2436 | ||
2435 | end_rename: | 2437 | end_rename: |
2436 | brelse (dir_bh); | 2438 | brelse(dir_bh); |
2437 | brelse (old_bh); | 2439 | brelse(old_bh); |
2438 | brelse (new_bh); | 2440 | brelse(new_bh); |
2439 | ext4_journal_stop(handle); | 2441 | ext4_journal_stop(handle); |
2440 | return retval; | 2442 | return retval; |
2441 | } | 2443 | } |
@@ -2454,7 +2456,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
2454 | .mknod = ext4_mknod, | 2456 | .mknod = ext4_mknod, |
2455 | .rename = ext4_rename, | 2457 | .rename = ext4_rename, |
2456 | .setattr = ext4_setattr, | 2458 | .setattr = ext4_setattr, |
2457 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 2459 | #ifdef CONFIG_EXT4_FS_XATTR |
2458 | .setxattr = generic_setxattr, | 2460 | .setxattr = generic_setxattr, |
2459 | .getxattr = generic_getxattr, | 2461 | .getxattr = generic_getxattr, |
2460 | .listxattr = ext4_listxattr, | 2462 | .listxattr = ext4_listxattr, |
@@ -2465,7 +2467,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
2465 | 2467 | ||
2466 | const struct inode_operations ext4_special_inode_operations = { | 2468 | const struct inode_operations ext4_special_inode_operations = { |
2467 | .setattr = ext4_setattr, | 2469 | .setattr = ext4_setattr, |
2468 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 2470 | #ifdef CONFIG_EXT4_FS_XATTR |
2469 | .setxattr = generic_setxattr, | 2471 | .setxattr = generic_setxattr, |
2470 | .getxattr = generic_getxattr, | 2472 | .getxattr = generic_getxattr, |
2471 | .listxattr = ext4_listxattr, | 2473 | .listxattr = ext4_listxattr, |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 0a9265164265..b6ec1843a015 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -416,8 +416,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
416 | "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", | 416 | "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", |
417 | gdb_num); | 417 | gdb_num); |
418 | 418 | ||
419 | /* | 419 | /* |
420 | * If we are not using the primary superblock/GDT copy don't resize, | 420 | * If we are not using the primary superblock/GDT copy don't resize, |
421 | * because the user tools have no way of handling this. Probably a | 421 | * because the user tools have no way of handling this. Probably a |
422 | * bad time to do it anyways. | 422 | * bad time to do it anyways. |
423 | */ | 423 | */ |
@@ -773,7 +773,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
773 | 773 | ||
774 | if (reserved_gdb || gdb_off == 0) { | 774 | if (reserved_gdb || gdb_off == 0) { |
775 | if (!EXT4_HAS_COMPAT_FEATURE(sb, | 775 | if (!EXT4_HAS_COMPAT_FEATURE(sb, |
776 | EXT4_FEATURE_COMPAT_RESIZE_INODE)){ | 776 | EXT4_FEATURE_COMPAT_RESIZE_INODE) |
777 | || !le16_to_cpu(es->s_reserved_gdt_blocks)) { | ||
777 | ext4_warning(sb, __func__, | 778 | ext4_warning(sb, __func__, |
778 | "No reserved GDT blocks, can't resize"); | 779 | "No reserved GDT blocks, can't resize"); |
779 | return -EPERM; | 780 | return -EPERM; |
@@ -869,11 +870,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
869 | * We can allocate memory for mb_alloc based on the new group | 870 | * We can allocate memory for mb_alloc based on the new group |
870 | * descriptor | 871 | * descriptor |
871 | */ | 872 | */ |
872 | if (test_opt(sb, MBALLOC)) { | 873 | err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); |
873 | err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); | 874 | if (err) |
874 | if (err) | 875 | goto exit_journal; |
875 | goto exit_journal; | 876 | |
876 | } | ||
877 | /* | 877 | /* |
878 | * Make the new blocks and inodes valid next. We do this before | 878 | * Make the new blocks and inodes valid next. We do this before |
879 | * increasing the group count so that once the group is enabled, | 879 | * increasing the group count so that once the group is enabled, |
@@ -928,6 +928,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
928 | percpu_counter_add(&sbi->s_freeinodes_counter, | 928 | percpu_counter_add(&sbi->s_freeinodes_counter, |
929 | EXT4_INODES_PER_GROUP(sb)); | 929 | EXT4_INODES_PER_GROUP(sb)); |
930 | 930 | ||
931 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { | ||
932 | ext4_group_t flex_group; | ||
933 | flex_group = ext4_flex_group(sbi, input->group); | ||
934 | sbi->s_flex_groups[flex_group].free_blocks += | ||
935 | input->free_blocks_count; | ||
936 | sbi->s_flex_groups[flex_group].free_inodes += | ||
937 | EXT4_INODES_PER_GROUP(sb); | ||
938 | } | ||
939 | |||
931 | ext4_journal_dirty_metadata(handle, sbi->s_sbh); | 940 | ext4_journal_dirty_metadata(handle, sbi->s_sbh); |
932 | sb->s_dirt = 1; | 941 | sb->s_dirt = 1; |
933 | 942 | ||
@@ -963,7 +972,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
963 | ext4_group_t o_groups_count; | 972 | ext4_group_t o_groups_count; |
964 | ext4_grpblk_t last; | 973 | ext4_grpblk_t last; |
965 | ext4_grpblk_t add; | 974 | ext4_grpblk_t add; |
966 | struct buffer_head * bh; | 975 | struct buffer_head *bh; |
967 | handle_t *handle; | 976 | handle_t *handle; |
968 | int err; | 977 | int err; |
969 | unsigned long freed_blocks; | 978 | unsigned long freed_blocks; |
@@ -1076,8 +1085,15 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1076 | /* | 1085 | /* |
1077 | * Mark mballoc pages as not up to date so that they will be updated | 1086 | * Mark mballoc pages as not up to date so that they will be updated |
1078 | * next time they are loaded by ext4_mb_load_buddy. | 1087 | * next time they are loaded by ext4_mb_load_buddy. |
1088 | * | ||
1089 | * XXX Bad, Bad, BAD!!! We should not be overloading the | ||
1090 | * Uptodate flag, particularly on thte bitmap bh, as way of | ||
1091 | * hinting to ext4_mb_load_buddy() that it needs to be | ||
1092 | * overloaded. A user could take a LVM snapshot, then do an | ||
1093 | * on-line fsck, and clear the uptodate flag, and this would | ||
1094 | * not be a bug in userspace, but a bug in the kernel. FIXME!!! | ||
1079 | */ | 1095 | */ |
1080 | if (test_opt(sb, MBALLOC)) { | 1096 | { |
1081 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1097 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1082 | struct inode *inode = sbi->s_buddy_cache; | 1098 | struct inode *inode = sbi->s_buddy_cache; |
1083 | int blocks_per_page; | 1099 | int blocks_per_page; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d5d77958b861..9b2b2bc4ec17 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -34,6 +34,8 @@ | |||
34 | #include <linux/namei.h> | 34 | #include <linux/namei.h> |
35 | #include <linux/quotaops.h> | 35 | #include <linux/quotaops.h> |
36 | #include <linux/seq_file.h> | 36 | #include <linux/seq_file.h> |
37 | #include <linux/proc_fs.h> | ||
38 | #include <linux/marker.h> | ||
37 | #include <linux/log2.h> | 39 | #include <linux/log2.h> |
38 | #include <linux/crc16.h> | 40 | #include <linux/crc16.h> |
39 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
@@ -45,6 +47,8 @@ | |||
45 | #include "namei.h" | 47 | #include "namei.h" |
46 | #include "group.h" | 48 | #include "group.h" |
47 | 49 | ||
50 | struct proc_dir_entry *ext4_proc_root; | ||
51 | |||
48 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 52 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
49 | unsigned long journal_devnum); | 53 | unsigned long journal_devnum); |
50 | static int ext4_create_journal(struct super_block *, struct ext4_super_block *, | 54 | static int ext4_create_journal(struct super_block *, struct ext4_super_block *, |
@@ -370,66 +374,6 @@ void ext4_update_dynamic_rev(struct super_block *sb) | |||
370 | */ | 374 | */ |
371 | } | 375 | } |
372 | 376 | ||
373 | int ext4_update_compat_feature(handle_t *handle, | ||
374 | struct super_block *sb, __u32 compat) | ||
375 | { | ||
376 | int err = 0; | ||
377 | if (!EXT4_HAS_COMPAT_FEATURE(sb, compat)) { | ||
378 | err = ext4_journal_get_write_access(handle, | ||
379 | EXT4_SB(sb)->s_sbh); | ||
380 | if (err) | ||
381 | return err; | ||
382 | EXT4_SET_COMPAT_FEATURE(sb, compat); | ||
383 | sb->s_dirt = 1; | ||
384 | handle->h_sync = 1; | ||
385 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, | ||
386 | "call ext4_journal_dirty_met adata"); | ||
387 | err = ext4_journal_dirty_metadata(handle, | ||
388 | EXT4_SB(sb)->s_sbh); | ||
389 | } | ||
390 | return err; | ||
391 | } | ||
392 | |||
393 | int ext4_update_rocompat_feature(handle_t *handle, | ||
394 | struct super_block *sb, __u32 rocompat) | ||
395 | { | ||
396 | int err = 0; | ||
397 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, rocompat)) { | ||
398 | err = ext4_journal_get_write_access(handle, | ||
399 | EXT4_SB(sb)->s_sbh); | ||
400 | if (err) | ||
401 | return err; | ||
402 | EXT4_SET_RO_COMPAT_FEATURE(sb, rocompat); | ||
403 | sb->s_dirt = 1; | ||
404 | handle->h_sync = 1; | ||
405 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, | ||
406 | "call ext4_journal_dirty_met adata"); | ||
407 | err = ext4_journal_dirty_metadata(handle, | ||
408 | EXT4_SB(sb)->s_sbh); | ||
409 | } | ||
410 | return err; | ||
411 | } | ||
412 | |||
413 | int ext4_update_incompat_feature(handle_t *handle, | ||
414 | struct super_block *sb, __u32 incompat) | ||
415 | { | ||
416 | int err = 0; | ||
417 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, incompat)) { | ||
418 | err = ext4_journal_get_write_access(handle, | ||
419 | EXT4_SB(sb)->s_sbh); | ||
420 | if (err) | ||
421 | return err; | ||
422 | EXT4_SET_INCOMPAT_FEATURE(sb, incompat); | ||
423 | sb->s_dirt = 1; | ||
424 | handle->h_sync = 1; | ||
425 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, | ||
426 | "call ext4_journal_dirty_met adata"); | ||
427 | err = ext4_journal_dirty_metadata(handle, | ||
428 | EXT4_SB(sb)->s_sbh); | ||
429 | } | ||
430 | return err; | ||
431 | } | ||
432 | |||
433 | /* | 377 | /* |
434 | * Open the external journal device | 378 | * Open the external journal device |
435 | */ | 379 | */ |
@@ -503,15 +447,18 @@ static void ext4_put_super(struct super_block *sb) | |||
503 | ext4_mb_release(sb); | 447 | ext4_mb_release(sb); |
504 | ext4_ext_release(sb); | 448 | ext4_ext_release(sb); |
505 | ext4_xattr_put_super(sb); | 449 | ext4_xattr_put_super(sb); |
506 | jbd2_journal_destroy(sbi->s_journal); | 450 | if (jbd2_journal_destroy(sbi->s_journal) < 0) |
451 | ext4_abort(sb, __func__, "Couldn't clean up the journal"); | ||
507 | sbi->s_journal = NULL; | 452 | sbi->s_journal = NULL; |
508 | if (!(sb->s_flags & MS_RDONLY)) { | 453 | if (!(sb->s_flags & MS_RDONLY)) { |
509 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 454 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
510 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 455 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
511 | BUFFER_TRACE(sbi->s_sbh, "marking dirty"); | ||
512 | mark_buffer_dirty(sbi->s_sbh); | ||
513 | ext4_commit_super(sb, es, 1); | 456 | ext4_commit_super(sb, es, 1); |
514 | } | 457 | } |
458 | if (sbi->s_proc) { | ||
459 | remove_proc_entry("inode_readahead_blks", sbi->s_proc); | ||
460 | remove_proc_entry(sb->s_id, ext4_proc_root); | ||
461 | } | ||
515 | 462 | ||
516 | for (i = 0; i < sbi->s_gdb_count; i++) | 463 | for (i = 0; i < sbi->s_gdb_count; i++) |
517 | brelse(sbi->s_group_desc[i]); | 464 | brelse(sbi->s_group_desc[i]); |
@@ -520,6 +467,7 @@ static void ext4_put_super(struct super_block *sb) | |||
520 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 467 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
521 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 468 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
522 | percpu_counter_destroy(&sbi->s_dirs_counter); | 469 | percpu_counter_destroy(&sbi->s_dirs_counter); |
470 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
523 | brelse(sbi->s_sbh); | 471 | brelse(sbi->s_sbh); |
524 | #ifdef CONFIG_QUOTA | 472 | #ifdef CONFIG_QUOTA |
525 | for (i = 0; i < MAXQUOTAS; i++) | 473 | for (i = 0; i < MAXQUOTAS; i++) |
@@ -562,12 +510,12 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
562 | ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); | 510 | ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); |
563 | if (!ei) | 511 | if (!ei) |
564 | return NULL; | 512 | return NULL; |
565 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 513 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
566 | ei->i_acl = EXT4_ACL_NOT_CACHED; | 514 | ei->i_acl = EXT4_ACL_NOT_CACHED; |
567 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; | 515 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; |
568 | #endif | 516 | #endif |
569 | ei->i_block_alloc_info = NULL; | ||
570 | ei->vfs_inode.i_version = 1; | 517 | ei->vfs_inode.i_version = 1; |
518 | ei->vfs_inode.i_data.writeback_index = 0; | ||
571 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); | 519 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); |
572 | INIT_LIST_HEAD(&ei->i_prealloc_list); | 520 | INIT_LIST_HEAD(&ei->i_prealloc_list); |
573 | spin_lock_init(&ei->i_prealloc_lock); | 521 | spin_lock_init(&ei->i_prealloc_lock); |
@@ -598,7 +546,7 @@ static void init_once(void *foo) | |||
598 | struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; | 546 | struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; |
599 | 547 | ||
600 | INIT_LIST_HEAD(&ei->i_orphan); | 548 | INIT_LIST_HEAD(&ei->i_orphan); |
601 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 549 | #ifdef CONFIG_EXT4_FS_XATTR |
602 | init_rwsem(&ei->xattr_sem); | 550 | init_rwsem(&ei->xattr_sem); |
603 | #endif | 551 | #endif |
604 | init_rwsem(&ei->i_data_sem); | 552 | init_rwsem(&ei->i_data_sem); |
@@ -624,8 +572,7 @@ static void destroy_inodecache(void) | |||
624 | 572 | ||
625 | static void ext4_clear_inode(struct inode *inode) | 573 | static void ext4_clear_inode(struct inode *inode) |
626 | { | 574 | { |
627 | struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info; | 575 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
628 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | ||
629 | if (EXT4_I(inode)->i_acl && | 576 | if (EXT4_I(inode)->i_acl && |
630 | EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { | 577 | EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { |
631 | posix_acl_release(EXT4_I(inode)->i_acl); | 578 | posix_acl_release(EXT4_I(inode)->i_acl); |
@@ -637,10 +584,7 @@ static void ext4_clear_inode(struct inode *inode) | |||
637 | EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; | 584 | EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; |
638 | } | 585 | } |
639 | #endif | 586 | #endif |
640 | ext4_discard_reservation(inode); | 587 | ext4_discard_preallocations(inode); |
641 | EXT4_I(inode)->i_block_alloc_info = NULL; | ||
642 | if (unlikely(rsv)) | ||
643 | kfree(rsv); | ||
644 | jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, | 588 | jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, |
645 | &EXT4_I(inode)->jinode); | 589 | &EXT4_I(inode)->jinode); |
646 | } | 590 | } |
@@ -653,7 +597,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq, | |||
653 | 597 | ||
654 | if (sbi->s_jquota_fmt) | 598 | if (sbi->s_jquota_fmt) |
655 | seq_printf(seq, ",jqfmt=%s", | 599 | seq_printf(seq, ",jqfmt=%s", |
656 | (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); | 600 | (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0"); |
657 | 601 | ||
658 | if (sbi->s_qf_names[USRQUOTA]) | 602 | if (sbi->s_qf_names[USRQUOTA]) |
659 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); | 603 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); |
@@ -717,7 +661,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
717 | seq_puts(seq, ",debug"); | 661 | seq_puts(seq, ",debug"); |
718 | if (test_opt(sb, OLDALLOC)) | 662 | if (test_opt(sb, OLDALLOC)) |
719 | seq_puts(seq, ",oldalloc"); | 663 | seq_puts(seq, ",oldalloc"); |
720 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 664 | #ifdef CONFIG_EXT4_FS_XATTR |
721 | if (test_opt(sb, XATTR_USER) && | 665 | if (test_opt(sb, XATTR_USER) && |
722 | !(def_mount_opts & EXT4_DEFM_XATTR_USER)) | 666 | !(def_mount_opts & EXT4_DEFM_XATTR_USER)) |
723 | seq_puts(seq, ",user_xattr"); | 667 | seq_puts(seq, ",user_xattr"); |
@@ -726,7 +670,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
726 | seq_puts(seq, ",nouser_xattr"); | 670 | seq_puts(seq, ",nouser_xattr"); |
727 | } | 671 | } |
728 | #endif | 672 | #endif |
729 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 673 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
730 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) | 674 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) |
731 | seq_puts(seq, ",acl"); | 675 | seq_puts(seq, ",acl"); |
732 | if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) | 676 | if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) |
@@ -751,8 +695,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
751 | seq_puts(seq, ",nobh"); | 695 | seq_puts(seq, ",nobh"); |
752 | if (!test_opt(sb, EXTENTS)) | 696 | if (!test_opt(sb, EXTENTS)) |
753 | seq_puts(seq, ",noextents"); | 697 | seq_puts(seq, ",noextents"); |
754 | if (!test_opt(sb, MBALLOC)) | ||
755 | seq_puts(seq, ",nomballoc"); | ||
756 | if (test_opt(sb, I_VERSION)) | 698 | if (test_opt(sb, I_VERSION)) |
757 | seq_puts(seq, ",i_version"); | 699 | seq_puts(seq, ",i_version"); |
758 | if (!test_opt(sb, DELALLOC)) | 700 | if (!test_opt(sb, DELALLOC)) |
@@ -772,6 +714,13 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
772 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | 714 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) |
773 | seq_puts(seq, ",data=writeback"); | 715 | seq_puts(seq, ",data=writeback"); |
774 | 716 | ||
717 | if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) | ||
718 | seq_printf(seq, ",inode_readahead_blks=%u", | ||
719 | sbi->s_inode_readahead_blks); | ||
720 | |||
721 | if (test_opt(sb, DATA_ERR_ABORT)) | ||
722 | seq_puts(seq, ",data_err=abort"); | ||
723 | |||
775 | ext4_show_quota_options(seq, sb); | 724 | ext4_show_quota_options(seq, sb); |
776 | return 0; | 725 | return 0; |
777 | } | 726 | } |
@@ -821,7 +770,7 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
821 | } | 770 | } |
822 | 771 | ||
823 | #ifdef CONFIG_QUOTA | 772 | #ifdef CONFIG_QUOTA |
824 | #define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group") | 773 | #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") |
825 | #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) | 774 | #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) |
826 | 775 | ||
827 | static int ext4_dquot_initialize(struct inode *inode, int type); | 776 | static int ext4_dquot_initialize(struct inode *inode, int type); |
@@ -895,20 +844,22 @@ static const struct export_operations ext4_export_ops = { | |||
895 | enum { | 844 | enum { |
896 | Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, | 845 | Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, |
897 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, | 846 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, |
898 | Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, | 847 | Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, |
899 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, | 848 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, |
900 | Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, | 849 | Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, |
901 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, | 850 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, |
902 | Opt_journal_checksum, Opt_journal_async_commit, | 851 | Opt_journal_checksum, Opt_journal_async_commit, |
903 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 852 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
853 | Opt_data_err_abort, Opt_data_err_ignore, | ||
904 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 854 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
905 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | 855 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, |
906 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, | 856 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, |
907 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, | 857 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, |
908 | Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, | 858 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, |
859 | Opt_inode_readahead_blks | ||
909 | }; | 860 | }; |
910 | 861 | ||
911 | static match_table_t tokens = { | 862 | static const match_table_t tokens = { |
912 | {Opt_bsd_df, "bsddf"}, | 863 | {Opt_bsd_df, "bsddf"}, |
913 | {Opt_minix_df, "minixdf"}, | 864 | {Opt_minix_df, "minixdf"}, |
914 | {Opt_grpid, "grpid"}, | 865 | {Opt_grpid, "grpid"}, |
@@ -922,8 +873,6 @@ static match_table_t tokens = { | |||
922 | {Opt_err_panic, "errors=panic"}, | 873 | {Opt_err_panic, "errors=panic"}, |
923 | {Opt_err_ro, "errors=remount-ro"}, | 874 | {Opt_err_ro, "errors=remount-ro"}, |
924 | {Opt_nouid32, "nouid32"}, | 875 | {Opt_nouid32, "nouid32"}, |
925 | {Opt_nocheck, "nocheck"}, | ||
926 | {Opt_nocheck, "check=none"}, | ||
927 | {Opt_debug, "debug"}, | 876 | {Opt_debug, "debug"}, |
928 | {Opt_oldalloc, "oldalloc"}, | 877 | {Opt_oldalloc, "oldalloc"}, |
929 | {Opt_orlov, "orlov"}, | 878 | {Opt_orlov, "orlov"}, |
@@ -946,6 +895,8 @@ static match_table_t tokens = { | |||
946 | {Opt_data_journal, "data=journal"}, | 895 | {Opt_data_journal, "data=journal"}, |
947 | {Opt_data_ordered, "data=ordered"}, | 896 | {Opt_data_ordered, "data=ordered"}, |
948 | {Opt_data_writeback, "data=writeback"}, | 897 | {Opt_data_writeback, "data=writeback"}, |
898 | {Opt_data_err_abort, "data_err=abort"}, | ||
899 | {Opt_data_err_ignore, "data_err=ignore"}, | ||
949 | {Opt_offusrjquota, "usrjquota="}, | 900 | {Opt_offusrjquota, "usrjquota="}, |
950 | {Opt_usrjquota, "usrjquota=%s"}, | 901 | {Opt_usrjquota, "usrjquota=%s"}, |
951 | {Opt_offgrpjquota, "grpjquota="}, | 902 | {Opt_offgrpjquota, "grpjquota="}, |
@@ -960,12 +911,11 @@ static match_table_t tokens = { | |||
960 | {Opt_extents, "extents"}, | 911 | {Opt_extents, "extents"}, |
961 | {Opt_noextents, "noextents"}, | 912 | {Opt_noextents, "noextents"}, |
962 | {Opt_i_version, "i_version"}, | 913 | {Opt_i_version, "i_version"}, |
963 | {Opt_mballoc, "mballoc"}, | ||
964 | {Opt_nomballoc, "nomballoc"}, | ||
965 | {Opt_stripe, "stripe=%u"}, | 914 | {Opt_stripe, "stripe=%u"}, |
966 | {Opt_resize, "resize"}, | 915 | {Opt_resize, "resize"}, |
967 | {Opt_delalloc, "delalloc"}, | 916 | {Opt_delalloc, "delalloc"}, |
968 | {Opt_nodelalloc, "nodelalloc"}, | 917 | {Opt_nodelalloc, "nodelalloc"}, |
918 | {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, | ||
969 | {Opt_err, NULL}, | 919 | {Opt_err, NULL}, |
970 | }; | 920 | }; |
971 | 921 | ||
@@ -980,7 +930,7 @@ static ext4_fsblk_t get_sb_block(void **data) | |||
980 | /*todo: use simple_strtoll with >32bit ext4 */ | 930 | /*todo: use simple_strtoll with >32bit ext4 */ |
981 | sb_block = simple_strtoul(options, &options, 0); | 931 | sb_block = simple_strtoul(options, &options, 0); |
982 | if (*options && *options != ',') { | 932 | if (*options && *options != ',') { |
983 | printk("EXT4-fs: Invalid sb specification: %s\n", | 933 | printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", |
984 | (char *) *data); | 934 | (char *) *data); |
985 | return 1; | 935 | return 1; |
986 | } | 936 | } |
@@ -1059,9 +1009,6 @@ static int parse_options(char *options, struct super_block *sb, | |||
1059 | case Opt_nouid32: | 1009 | case Opt_nouid32: |
1060 | set_opt(sbi->s_mount_opt, NO_UID32); | 1010 | set_opt(sbi->s_mount_opt, NO_UID32); |
1061 | break; | 1011 | break; |
1062 | case Opt_nocheck: | ||
1063 | clear_opt(sbi->s_mount_opt, CHECK); | ||
1064 | break; | ||
1065 | case Opt_debug: | 1012 | case Opt_debug: |
1066 | set_opt(sbi->s_mount_opt, DEBUG); | 1013 | set_opt(sbi->s_mount_opt, DEBUG); |
1067 | break; | 1014 | break; |
@@ -1071,7 +1018,7 @@ static int parse_options(char *options, struct super_block *sb, | |||
1071 | case Opt_orlov: | 1018 | case Opt_orlov: |
1072 | clear_opt(sbi->s_mount_opt, OLDALLOC); | 1019 | clear_opt(sbi->s_mount_opt, OLDALLOC); |
1073 | break; | 1020 | break; |
1074 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 1021 | #ifdef CONFIG_EXT4_FS_XATTR |
1075 | case Opt_user_xattr: | 1022 | case Opt_user_xattr: |
1076 | set_opt(sbi->s_mount_opt, XATTR_USER); | 1023 | set_opt(sbi->s_mount_opt, XATTR_USER); |
1077 | break; | 1024 | break; |
@@ -1081,10 +1028,11 @@ static int parse_options(char *options, struct super_block *sb, | |||
1081 | #else | 1028 | #else |
1082 | case Opt_user_xattr: | 1029 | case Opt_user_xattr: |
1083 | case Opt_nouser_xattr: | 1030 | case Opt_nouser_xattr: |
1084 | printk("EXT4 (no)user_xattr options not supported\n"); | 1031 | printk(KERN_ERR "EXT4 (no)user_xattr options " |
1032 | "not supported\n"); | ||
1085 | break; | 1033 | break; |
1086 | #endif | 1034 | #endif |
1087 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 1035 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
1088 | case Opt_acl: | 1036 | case Opt_acl: |
1089 | set_opt(sbi->s_mount_opt, POSIX_ACL); | 1037 | set_opt(sbi->s_mount_opt, POSIX_ACL); |
1090 | break; | 1038 | break; |
@@ -1094,7 +1042,8 @@ static int parse_options(char *options, struct super_block *sb, | |||
1094 | #else | 1042 | #else |
1095 | case Opt_acl: | 1043 | case Opt_acl: |
1096 | case Opt_noacl: | 1044 | case Opt_noacl: |
1097 | printk("EXT4 (no)acl options not supported\n"); | 1045 | printk(KERN_ERR "EXT4 (no)acl options " |
1046 | "not supported\n"); | ||
1098 | break; | 1047 | break; |
1099 | #endif | 1048 | #endif |
1100 | case Opt_reservation: | 1049 | case Opt_reservation: |
@@ -1177,6 +1126,12 @@ static int parse_options(char *options, struct super_block *sb, | |||
1177 | sbi->s_mount_opt |= data_opt; | 1126 | sbi->s_mount_opt |= data_opt; |
1178 | } | 1127 | } |
1179 | break; | 1128 | break; |
1129 | case Opt_data_err_abort: | ||
1130 | set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); | ||
1131 | break; | ||
1132 | case Opt_data_err_ignore: | ||
1133 | clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); | ||
1134 | break; | ||
1180 | #ifdef CONFIG_QUOTA | 1135 | #ifdef CONFIG_QUOTA |
1181 | case Opt_usrjquota: | 1136 | case Opt_usrjquota: |
1182 | qtype = USRQUOTA; | 1137 | qtype = USRQUOTA; |
@@ -1188,8 +1143,8 @@ set_qf_name: | |||
1188 | sb_any_quota_suspended(sb)) && | 1143 | sb_any_quota_suspended(sb)) && |
1189 | !sbi->s_qf_names[qtype]) { | 1144 | !sbi->s_qf_names[qtype]) { |
1190 | printk(KERN_ERR | 1145 | printk(KERN_ERR |
1191 | "EXT4-fs: Cannot change journaled " | 1146 | "EXT4-fs: Cannot change journaled " |
1192 | "quota options when quota turned on.\n"); | 1147 | "quota options when quota turned on.\n"); |
1193 | return 0; | 1148 | return 0; |
1194 | } | 1149 | } |
1195 | qname = match_strdup(&args[0]); | 1150 | qname = match_strdup(&args[0]); |
@@ -1356,12 +1311,6 @@ set_qf_format: | |||
1356 | case Opt_nodelalloc: | 1311 | case Opt_nodelalloc: |
1357 | clear_opt(sbi->s_mount_opt, DELALLOC); | 1312 | clear_opt(sbi->s_mount_opt, DELALLOC); |
1358 | break; | 1313 | break; |
1359 | case Opt_mballoc: | ||
1360 | set_opt(sbi->s_mount_opt, MBALLOC); | ||
1361 | break; | ||
1362 | case Opt_nomballoc: | ||
1363 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
1364 | break; | ||
1365 | case Opt_stripe: | 1314 | case Opt_stripe: |
1366 | if (match_int(&args[0], &option)) | 1315 | if (match_int(&args[0], &option)) |
1367 | return 0; | 1316 | return 0; |
@@ -1372,6 +1321,13 @@ set_qf_format: | |||
1372 | case Opt_delalloc: | 1321 | case Opt_delalloc: |
1373 | set_opt(sbi->s_mount_opt, DELALLOC); | 1322 | set_opt(sbi->s_mount_opt, DELALLOC); |
1374 | break; | 1323 | break; |
1324 | case Opt_inode_readahead_blks: | ||
1325 | if (match_int(&args[0], &option)) | ||
1326 | return 0; | ||
1327 | if (option < 0 || option > (1 << 30)) | ||
1328 | return 0; | ||
1329 | sbi->s_inode_readahead_blks = option; | ||
1330 | break; | ||
1375 | default: | 1331 | default: |
1376 | printk(KERN_ERR | 1332 | printk(KERN_ERR |
1377 | "EXT4-fs: Unrecognized mount option \"%s\" " | 1333 | "EXT4-fs: Unrecognized mount option \"%s\" " |
@@ -1472,15 +1428,9 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
1472 | EXT4_INODES_PER_GROUP(sb), | 1428 | EXT4_INODES_PER_GROUP(sb), |
1473 | sbi->s_mount_opt); | 1429 | sbi->s_mount_opt); |
1474 | 1430 | ||
1475 | printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id); | 1431 | printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", |
1476 | if (EXT4_SB(sb)->s_journal->j_inode == NULL) { | 1432 | sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : |
1477 | char b[BDEVNAME_SIZE]; | 1433 | "external", EXT4_SB(sb)->s_journal->j_devname); |
1478 | |||
1479 | printk("external journal on %s\n", | ||
1480 | bdevname(EXT4_SB(sb)->s_journal->j_dev, b)); | ||
1481 | } else { | ||
1482 | printk("internal journal\n"); | ||
1483 | } | ||
1484 | return res; | 1434 | return res; |
1485 | } | 1435 | } |
1486 | 1436 | ||
@@ -1503,8 +1453,11 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1503 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; | 1453 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; |
1504 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; | 1454 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; |
1505 | 1455 | ||
1506 | flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) / | 1456 | /* We allocate both existing and potentially added groups */ |
1507 | groups_per_flex; | 1457 | flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + |
1458 | ((sbi->s_es->s_reserved_gdt_blocks +1 ) << | ||
1459 | EXT4_DESC_PER_BLOCK_BITS(sb))) / | ||
1460 | groups_per_flex; | ||
1508 | sbi->s_flex_groups = kzalloc(flex_group_count * | 1461 | sbi->s_flex_groups = kzalloc(flex_group_count * |
1509 | sizeof(struct flex_groups), GFP_KERNEL); | 1462 | sizeof(struct flex_groups), GFP_KERNEL); |
1510 | if (sbi->s_flex_groups == NULL) { | 1463 | if (sbi->s_flex_groups == NULL) { |
@@ -1583,7 +1536,7 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1583 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) | 1536 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) |
1584 | flexbg_flag = 1; | 1537 | flexbg_flag = 1; |
1585 | 1538 | ||
1586 | ext4_debug ("Checking group descriptors"); | 1539 | ext4_debug("Checking group descriptors"); |
1587 | 1540 | ||
1588 | for (i = 0; i < sbi->s_groups_count; i++) { | 1541 | for (i = 0; i < sbi->s_groups_count; i++) { |
1589 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); | 1542 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); |
@@ -1598,14 +1551,14 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1598 | if (block_bitmap < first_block || block_bitmap > last_block) { | 1551 | if (block_bitmap < first_block || block_bitmap > last_block) { |
1599 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1552 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " |
1600 | "Block bitmap for group %lu not in group " | 1553 | "Block bitmap for group %lu not in group " |
1601 | "(block %llu)!", i, block_bitmap); | 1554 | "(block %llu)!\n", i, block_bitmap); |
1602 | return 0; | 1555 | return 0; |
1603 | } | 1556 | } |
1604 | inode_bitmap = ext4_inode_bitmap(sb, gdp); | 1557 | inode_bitmap = ext4_inode_bitmap(sb, gdp); |
1605 | if (inode_bitmap < first_block || inode_bitmap > last_block) { | 1558 | if (inode_bitmap < first_block || inode_bitmap > last_block) { |
1606 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1559 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " |
1607 | "Inode bitmap for group %lu not in group " | 1560 | "Inode bitmap for group %lu not in group " |
1608 | "(block %llu)!", i, inode_bitmap); | 1561 | "(block %llu)!\n", i, inode_bitmap); |
1609 | return 0; | 1562 | return 0; |
1610 | } | 1563 | } |
1611 | inode_table = ext4_inode_table(sb, gdp); | 1564 | inode_table = ext4_inode_table(sb, gdp); |
@@ -1613,7 +1566,7 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1613 | inode_table + sbi->s_itb_per_group - 1 > last_block) { | 1566 | inode_table + sbi->s_itb_per_group - 1 > last_block) { |
1614 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1567 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " |
1615 | "Inode table for group %lu not in group " | 1568 | "Inode table for group %lu not in group " |
1616 | "(block %llu)!", i, inode_table); | 1569 | "(block %llu)!\n", i, inode_table); |
1617 | return 0; | 1570 | return 0; |
1618 | } | 1571 | } |
1619 | spin_lock(sb_bgl_lock(sbi, i)); | 1572 | spin_lock(sb_bgl_lock(sbi, i)); |
@@ -1622,8 +1575,10 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1622 | "Checksum for group %lu failed (%u!=%u)\n", | 1575 | "Checksum for group %lu failed (%u!=%u)\n", |
1623 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, | 1576 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, |
1624 | gdp)), le16_to_cpu(gdp->bg_checksum)); | 1577 | gdp)), le16_to_cpu(gdp->bg_checksum)); |
1625 | if (!(sb->s_flags & MS_RDONLY)) | 1578 | if (!(sb->s_flags & MS_RDONLY)) { |
1579 | spin_unlock(sb_bgl_lock(sbi, i)); | ||
1626 | return 0; | 1580 | return 0; |
1581 | } | ||
1627 | } | 1582 | } |
1628 | spin_unlock(sb_bgl_lock(sbi, i)); | 1583 | spin_unlock(sb_bgl_lock(sbi, i)); |
1629 | if (!flexbg_flag) | 1584 | if (!flexbg_flag) |
@@ -1713,9 +1668,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1713 | DQUOT_INIT(inode); | 1668 | DQUOT_INIT(inode); |
1714 | if (inode->i_nlink) { | 1669 | if (inode->i_nlink) { |
1715 | printk(KERN_DEBUG | 1670 | printk(KERN_DEBUG |
1716 | "%s: truncating inode %lu to %Ld bytes\n", | 1671 | "%s: truncating inode %lu to %lld bytes\n", |
1717 | __func__, inode->i_ino, inode->i_size); | 1672 | __func__, inode->i_ino, inode->i_size); |
1718 | jbd_debug(2, "truncating inode %lu to %Ld bytes\n", | 1673 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", |
1719 | inode->i_ino, inode->i_size); | 1674 | inode->i_ino, inode->i_size); |
1720 | ext4_truncate(inode); | 1675 | ext4_truncate(inode); |
1721 | nr_truncates++; | 1676 | nr_truncates++; |
@@ -1756,13 +1711,13 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1756 | * | 1711 | * |
1757 | * Note, this does *not* consider any metadata overhead for vfs i_blocks. | 1712 | * Note, this does *not* consider any metadata overhead for vfs i_blocks. |
1758 | */ | 1713 | */ |
1759 | static loff_t ext4_max_size(int blkbits) | 1714 | static loff_t ext4_max_size(int blkbits, int has_huge_files) |
1760 | { | 1715 | { |
1761 | loff_t res; | 1716 | loff_t res; |
1762 | loff_t upper_limit = MAX_LFS_FILESIZE; | 1717 | loff_t upper_limit = MAX_LFS_FILESIZE; |
1763 | 1718 | ||
1764 | /* small i_blocks in vfs inode? */ | 1719 | /* small i_blocks in vfs inode? */ |
1765 | if (sizeof(blkcnt_t) < sizeof(u64)) { | 1720 | if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { |
1766 | /* | 1721 | /* |
1767 | * CONFIG_LSF is not enabled implies the inode | 1722 | * CONFIG_LSF is not enabled implies the inode |
1768 | * i_block represent total blocks in 512 bytes | 1723 | * i_block represent total blocks in 512 bytes |
@@ -1792,7 +1747,7 @@ static loff_t ext4_max_size(int blkbits) | |||
1792 | * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. | 1747 | * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. |
1793 | * We need to be 1 filesystem block less than the 2^48 sector limit. | 1748 | * We need to be 1 filesystem block less than the 2^48 sector limit. |
1794 | */ | 1749 | */ |
1795 | static loff_t ext4_max_bitmap_size(int bits) | 1750 | static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) |
1796 | { | 1751 | { |
1797 | loff_t res = EXT4_NDIR_BLOCKS; | 1752 | loff_t res = EXT4_NDIR_BLOCKS; |
1798 | int meta_blocks; | 1753 | int meta_blocks; |
@@ -1805,11 +1760,11 @@ static loff_t ext4_max_bitmap_size(int bits) | |||
1805 | * total number of 512 bytes blocks of the file | 1760 | * total number of 512 bytes blocks of the file |
1806 | */ | 1761 | */ |
1807 | 1762 | ||
1808 | if (sizeof(blkcnt_t) < sizeof(u64)) { | 1763 | if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { |
1809 | /* | 1764 | /* |
1810 | * CONFIG_LSF is not enabled implies the inode | 1765 | * !has_huge_files or CONFIG_LSF is not enabled |
1811 | * i_block represent total blocks in 512 bytes | 1766 | * implies the inode i_block represent total blocks in |
1812 | * 32 == size of vfs inode i_blocks * 8 | 1767 | * 512 bytes 32 == size of vfs inode i_blocks * 8 |
1813 | */ | 1768 | */ |
1814 | upper_limit = (1LL << 32) - 1; | 1769 | upper_limit = (1LL << 32) - 1; |
1815 | 1770 | ||
@@ -1913,11 +1868,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
1913 | unsigned long journal_devnum = 0; | 1868 | unsigned long journal_devnum = 0; |
1914 | unsigned long def_mount_opts; | 1869 | unsigned long def_mount_opts; |
1915 | struct inode *root; | 1870 | struct inode *root; |
1871 | char *cp; | ||
1916 | int ret = -EINVAL; | 1872 | int ret = -EINVAL; |
1917 | int blocksize; | 1873 | int blocksize; |
1918 | int db_count; | 1874 | int db_count; |
1919 | int i; | 1875 | int i; |
1920 | int needs_recovery; | 1876 | int needs_recovery, has_huge_files; |
1921 | __le32 features; | 1877 | __le32 features; |
1922 | __u64 blocks_count; | 1878 | __u64 blocks_count; |
1923 | int err; | 1879 | int err; |
@@ -1929,10 +1885,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
1929 | sbi->s_mount_opt = 0; | 1885 | sbi->s_mount_opt = 0; |
1930 | sbi->s_resuid = EXT4_DEF_RESUID; | 1886 | sbi->s_resuid = EXT4_DEF_RESUID; |
1931 | sbi->s_resgid = EXT4_DEF_RESGID; | 1887 | sbi->s_resgid = EXT4_DEF_RESGID; |
1888 | sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; | ||
1932 | sbi->s_sb_block = sb_block; | 1889 | sbi->s_sb_block = sb_block; |
1933 | 1890 | ||
1934 | unlock_kernel(); | 1891 | unlock_kernel(); |
1935 | 1892 | ||
1893 | /* Cleanup superblock name */ | ||
1894 | for (cp = sb->s_id; (cp = strchr(cp, '/'));) | ||
1895 | *cp = '!'; | ||
1896 | |||
1936 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); | 1897 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); |
1937 | if (!blocksize) { | 1898 | if (!blocksize) { |
1938 | printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); | 1899 | printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); |
@@ -1972,11 +1933,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
1972 | set_opt(sbi->s_mount_opt, GRPID); | 1933 | set_opt(sbi->s_mount_opt, GRPID); |
1973 | if (def_mount_opts & EXT4_DEFM_UID16) | 1934 | if (def_mount_opts & EXT4_DEFM_UID16) |
1974 | set_opt(sbi->s_mount_opt, NO_UID32); | 1935 | set_opt(sbi->s_mount_opt, NO_UID32); |
1975 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 1936 | #ifdef CONFIG_EXT4_FS_XATTR |
1976 | if (def_mount_opts & EXT4_DEFM_XATTR_USER) | 1937 | if (def_mount_opts & EXT4_DEFM_XATTR_USER) |
1977 | set_opt(sbi->s_mount_opt, XATTR_USER); | 1938 | set_opt(sbi->s_mount_opt, XATTR_USER); |
1978 | #endif | 1939 | #endif |
1979 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 1940 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
1980 | if (def_mount_opts & EXT4_DEFM_ACL) | 1941 | if (def_mount_opts & EXT4_DEFM_ACL) |
1981 | set_opt(sbi->s_mount_opt, POSIX_ACL); | 1942 | set_opt(sbi->s_mount_opt, POSIX_ACL); |
1982 | #endif | 1943 | #endif |
@@ -2011,11 +1972,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2011 | ext4_warning(sb, __func__, | 1972 | ext4_warning(sb, __func__, |
2012 | "extents feature not enabled on this filesystem, " | 1973 | "extents feature not enabled on this filesystem, " |
2013 | "use tune2fs.\n"); | 1974 | "use tune2fs.\n"); |
2014 | /* | ||
2015 | * turn on mballoc code by default in ext4 filesystem | ||
2016 | * Use -o nomballoc to turn it off | ||
2017 | */ | ||
2018 | set_opt(sbi->s_mount_opt, MBALLOC); | ||
2019 | 1975 | ||
2020 | /* | 1976 | /* |
2021 | * enable delayed allocation by default | 1977 | * enable delayed allocation by default |
@@ -2040,16 +1996,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2040 | "running e2fsck is recommended\n"); | 1996 | "running e2fsck is recommended\n"); |
2041 | 1997 | ||
2042 | /* | 1998 | /* |
2043 | * Since ext4 is still considered development code, we require | ||
2044 | * that the TEST_FILESYS flag in s->flags be set. | ||
2045 | */ | ||
2046 | if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) { | ||
2047 | printk(KERN_WARNING "EXT4-fs: %s: not marked " | ||
2048 | "OK to use with test code.\n", sb->s_id); | ||
2049 | goto failed_mount; | ||
2050 | } | ||
2051 | |||
2052 | /* | ||
2053 | * Check feature flags regardless of the revision level, since we | 1999 | * Check feature flags regardless of the revision level, since we |
2054 | * previously didn't change the revision level when setting the flags, | 2000 | * previously didn't change the revision level when setting the flags, |
2055 | * so there is a chance incompat flags are set on a rev 0 filesystem. | 2001 | * so there is a chance incompat flags are set on a rev 0 filesystem. |
@@ -2068,7 +2014,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2068 | sb->s_id, le32_to_cpu(features)); | 2014 | sb->s_id, le32_to_cpu(features)); |
2069 | goto failed_mount; | 2015 | goto failed_mount; |
2070 | } | 2016 | } |
2071 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { | 2017 | has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, |
2018 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
2019 | if (has_huge_files) { | ||
2072 | /* | 2020 | /* |
2073 | * Large file size enabled file system can only be | 2021 | * Large file size enabled file system can only be |
2074 | * mount if kernel is build with CONFIG_LSF | 2022 | * mount if kernel is build with CONFIG_LSF |
@@ -2118,8 +2066,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2118 | } | 2066 | } |
2119 | } | 2067 | } |
2120 | 2068 | ||
2121 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits); | 2069 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, |
2122 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits); | 2070 | has_huge_files); |
2071 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); | ||
2123 | 2072 | ||
2124 | if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { | 2073 | if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { |
2125 | sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; | 2074 | sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; |
@@ -2218,6 +2167,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2218 | goto failed_mount; | 2167 | goto failed_mount; |
2219 | } | 2168 | } |
2220 | 2169 | ||
2170 | #ifdef CONFIG_PROC_FS | ||
2171 | if (ext4_proc_root) | ||
2172 | sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); | ||
2173 | |||
2174 | if (sbi->s_proc) | ||
2175 | proc_create_data("inode_readahead_blks", 0644, sbi->s_proc, | ||
2176 | &ext4_ui_proc_fops, | ||
2177 | &sbi->s_inode_readahead_blks); | ||
2178 | #endif | ||
2179 | |||
2221 | bgl_lock_init(&sbi->s_blockgroup_lock); | 2180 | bgl_lock_init(&sbi->s_blockgroup_lock); |
2222 | 2181 | ||
2223 | for (i = 0; i < db_count; i++) { | 2182 | for (i = 0; i < db_count; i++) { |
@@ -2256,24 +2215,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2256 | err = percpu_counter_init(&sbi->s_dirs_counter, | 2215 | err = percpu_counter_init(&sbi->s_dirs_counter, |
2257 | ext4_count_dirs(sb)); | 2216 | ext4_count_dirs(sb)); |
2258 | } | 2217 | } |
2218 | if (!err) { | ||
2219 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | ||
2220 | } | ||
2259 | if (err) { | 2221 | if (err) { |
2260 | printk(KERN_ERR "EXT4-fs: insufficient memory\n"); | 2222 | printk(KERN_ERR "EXT4-fs: insufficient memory\n"); |
2261 | goto failed_mount3; | 2223 | goto failed_mount3; |
2262 | } | 2224 | } |
2263 | 2225 | ||
2264 | /* per fileystem reservation list head & lock */ | ||
2265 | spin_lock_init(&sbi->s_rsv_window_lock); | ||
2266 | sbi->s_rsv_window_root = RB_ROOT; | ||
2267 | /* Add a single, static dummy reservation to the start of the | ||
2268 | * reservation window list --- it gives us a placeholder for | ||
2269 | * append-at-start-of-list which makes the allocation logic | ||
2270 | * _much_ simpler. */ | ||
2271 | sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
2272 | sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
2273 | sbi->s_rsv_window_head.rsv_alloc_hit = 0; | ||
2274 | sbi->s_rsv_window_head.rsv_goal_size = 0; | ||
2275 | ext4_rsv_window_add(sb, &sbi->s_rsv_window_head); | ||
2276 | |||
2277 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 2226 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
2278 | 2227 | ||
2279 | /* | 2228 | /* |
@@ -2443,6 +2392,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2443 | "available.\n"); | 2392 | "available.\n"); |
2444 | } | 2393 | } |
2445 | 2394 | ||
2395 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | ||
2396 | printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " | ||
2397 | "requested data journaling mode\n"); | ||
2398 | clear_opt(sbi->s_mount_opt, DELALLOC); | ||
2399 | } else if (test_opt(sb, DELALLOC)) | ||
2400 | printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); | ||
2401 | |||
2402 | ext4_ext_init(sb); | ||
2403 | err = ext4_mb_init(sb, needs_recovery); | ||
2404 | if (err) { | ||
2405 | printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", | ||
2406 | err); | ||
2407 | goto failed_mount4; | ||
2408 | } | ||
2409 | |||
2446 | /* | 2410 | /* |
2447 | * akpm: core read_super() calls in here with the superblock locked. | 2411 | * akpm: core read_super() calls in here with the superblock locked. |
2448 | * That deadlocks, because orphan cleanup needs to lock the superblock | 2412 | * That deadlocks, because orphan cleanup needs to lock the superblock |
@@ -2462,16 +2426,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2462 | test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": | 2426 | test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": |
2463 | "writeback"); | 2427 | "writeback"); |
2464 | 2428 | ||
2465 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | ||
2466 | printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " | ||
2467 | "requested data journaling mode\n"); | ||
2468 | clear_opt(sbi->s_mount_opt, DELALLOC); | ||
2469 | } else if (test_opt(sb, DELALLOC)) | ||
2470 | printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); | ||
2471 | |||
2472 | ext4_ext_init(sb); | ||
2473 | ext4_mb_init(sb, needs_recovery); | ||
2474 | |||
2475 | lock_kernel(); | 2429 | lock_kernel(); |
2476 | return 0; | 2430 | return 0; |
2477 | 2431 | ||
@@ -2488,11 +2442,16 @@ failed_mount3: | |||
2488 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 2442 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
2489 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 2443 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
2490 | percpu_counter_destroy(&sbi->s_dirs_counter); | 2444 | percpu_counter_destroy(&sbi->s_dirs_counter); |
2445 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
2491 | failed_mount2: | 2446 | failed_mount2: |
2492 | for (i = 0; i < db_count; i++) | 2447 | for (i = 0; i < db_count; i++) |
2493 | brelse(sbi->s_group_desc[i]); | 2448 | brelse(sbi->s_group_desc[i]); |
2494 | kfree(sbi->s_group_desc); | 2449 | kfree(sbi->s_group_desc); |
2495 | failed_mount: | 2450 | failed_mount: |
2451 | if (sbi->s_proc) { | ||
2452 | remove_proc_entry("inode_readahead_blks", sbi->s_proc); | ||
2453 | remove_proc_entry(sb->s_id, ext4_proc_root); | ||
2454 | } | ||
2496 | #ifdef CONFIG_QUOTA | 2455 | #ifdef CONFIG_QUOTA |
2497 | for (i = 0; i < MAXQUOTAS; i++) | 2456 | for (i = 0; i < MAXQUOTAS; i++) |
2498 | kfree(sbi->s_qf_names[i]); | 2457 | kfree(sbi->s_qf_names[i]); |
@@ -2526,6 +2485,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) | |||
2526 | journal->j_flags |= JBD2_BARRIER; | 2485 | journal->j_flags |= JBD2_BARRIER; |
2527 | else | 2486 | else |
2528 | journal->j_flags &= ~JBD2_BARRIER; | 2487 | journal->j_flags &= ~JBD2_BARRIER; |
2488 | if (test_opt(sb, DATA_ERR_ABORT)) | ||
2489 | journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; | ||
2490 | else | ||
2491 | journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; | ||
2529 | spin_unlock(&journal->j_state_lock); | 2492 | spin_unlock(&journal->j_state_lock); |
2530 | } | 2493 | } |
2531 | 2494 | ||
@@ -2551,7 +2514,7 @@ static journal_t *ext4_get_journal(struct super_block *sb, | |||
2551 | return NULL; | 2514 | return NULL; |
2552 | } | 2515 | } |
2553 | 2516 | ||
2554 | jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", | 2517 | jbd_debug(2, "Journal inode found at %p: %lld bytes\n", |
2555 | journal_inode, journal_inode->i_size); | 2518 | journal_inode, journal_inode->i_size); |
2556 | if (!S_ISREG(journal_inode->i_mode)) { | 2519 | if (!S_ISREG(journal_inode->i_mode)) { |
2557 | printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); | 2520 | printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); |
@@ -2714,6 +2677,11 @@ static int ext4_load_journal(struct super_block *sb, | |||
2714 | return -EINVAL; | 2677 | return -EINVAL; |
2715 | } | 2678 | } |
2716 | 2679 | ||
2680 | if (journal->j_flags & JBD2_BARRIER) | ||
2681 | printk(KERN_INFO "EXT4-fs: barriers enabled\n"); | ||
2682 | else | ||
2683 | printk(KERN_INFO "EXT4-fs: barriers disabled\n"); | ||
2684 | |||
2717 | if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { | 2685 | if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { |
2718 | err = jbd2_journal_update_format(journal); | 2686 | err = jbd2_journal_update_format(journal); |
2719 | if (err) { | 2687 | if (err) { |
@@ -2798,13 +2766,34 @@ static void ext4_commit_super(struct super_block *sb, | |||
2798 | 2766 | ||
2799 | if (!sbh) | 2767 | if (!sbh) |
2800 | return; | 2768 | return; |
2769 | if (buffer_write_io_error(sbh)) { | ||
2770 | /* | ||
2771 | * Oh, dear. A previous attempt to write the | ||
2772 | * superblock failed. This could happen because the | ||
2773 | * USB device was yanked out. Or it could happen to | ||
2774 | * be a transient write error and maybe the block will | ||
2775 | * be remapped. Nothing we can do but to retry the | ||
2776 | * write and hope for the best. | ||
2777 | */ | ||
2778 | printk(KERN_ERR "ext4: previous I/O error to " | ||
2779 | "superblock detected for %s.\n", sb->s_id); | ||
2780 | clear_buffer_write_io_error(sbh); | ||
2781 | set_buffer_uptodate(sbh); | ||
2782 | } | ||
2801 | es->s_wtime = cpu_to_le32(get_seconds()); | 2783 | es->s_wtime = cpu_to_le32(get_seconds()); |
2802 | ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); | 2784 | ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); |
2803 | es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); | 2785 | es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); |
2804 | BUFFER_TRACE(sbh, "marking dirty"); | 2786 | BUFFER_TRACE(sbh, "marking dirty"); |
2805 | mark_buffer_dirty(sbh); | 2787 | mark_buffer_dirty(sbh); |
2806 | if (sync) | 2788 | if (sync) { |
2807 | sync_dirty_buffer(sbh); | 2789 | sync_dirty_buffer(sbh); |
2790 | if (buffer_write_io_error(sbh)) { | ||
2791 | printk(KERN_ERR "ext4: I/O error while writing " | ||
2792 | "superblock for %s.\n", sb->s_id); | ||
2793 | clear_buffer_write_io_error(sbh); | ||
2794 | set_buffer_uptodate(sbh); | ||
2795 | } | ||
2796 | } | ||
2808 | } | 2797 | } |
2809 | 2798 | ||
2810 | 2799 | ||
@@ -2819,7 +2808,9 @@ static void ext4_mark_recovery_complete(struct super_block *sb, | |||
2819 | journal_t *journal = EXT4_SB(sb)->s_journal; | 2808 | journal_t *journal = EXT4_SB(sb)->s_journal; |
2820 | 2809 | ||
2821 | jbd2_journal_lock_updates(journal); | 2810 | jbd2_journal_lock_updates(journal); |
2822 | jbd2_journal_flush(journal); | 2811 | if (jbd2_journal_flush(journal) < 0) |
2812 | goto out; | ||
2813 | |||
2823 | lock_super(sb); | 2814 | lock_super(sb); |
2824 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && | 2815 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && |
2825 | sb->s_flags & MS_RDONLY) { | 2816 | sb->s_flags & MS_RDONLY) { |
@@ -2828,6 +2819,8 @@ static void ext4_mark_recovery_complete(struct super_block *sb, | |||
2828 | ext4_commit_super(sb, es, 1); | 2819 | ext4_commit_super(sb, es, 1); |
2829 | } | 2820 | } |
2830 | unlock_super(sb); | 2821 | unlock_super(sb); |
2822 | |||
2823 | out: | ||
2831 | jbd2_journal_unlock_updates(journal); | 2824 | jbd2_journal_unlock_updates(journal); |
2832 | } | 2825 | } |
2833 | 2826 | ||
@@ -2906,6 +2899,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
2906 | { | 2899 | { |
2907 | tid_t target; | 2900 | tid_t target; |
2908 | 2901 | ||
2902 | trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); | ||
2909 | sb->s_dirt = 0; | 2903 | sb->s_dirt = 0; |
2910 | if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { | 2904 | if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { |
2911 | if (wait) | 2905 | if (wait) |
@@ -2927,7 +2921,13 @@ static void ext4_write_super_lockfs(struct super_block *sb) | |||
2927 | 2921 | ||
2928 | /* Now we set up the journal barrier. */ | 2922 | /* Now we set up the journal barrier. */ |
2929 | jbd2_journal_lock_updates(journal); | 2923 | jbd2_journal_lock_updates(journal); |
2930 | jbd2_journal_flush(journal); | 2924 | |
2925 | /* | ||
2926 | * We don't want to clear needs_recovery flag when we failed | ||
2927 | * to flush the journal. | ||
2928 | */ | ||
2929 | if (jbd2_journal_flush(journal) < 0) | ||
2930 | return; | ||
2931 | 2931 | ||
2932 | /* Journal blocked and flushed, clear needs_recovery flag. */ | 2932 | /* Journal blocked and flushed, clear needs_recovery flag. */ |
2933 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 2933 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
@@ -3161,7 +3161,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
3161 | buf->f_type = EXT4_SUPER_MAGIC; | 3161 | buf->f_type = EXT4_SUPER_MAGIC; |
3162 | buf->f_bsize = sb->s_blocksize; | 3162 | buf->f_bsize = sb->s_blocksize; |
3163 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; | 3163 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; |
3164 | buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); | 3164 | buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - |
3165 | percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); | ||
3165 | ext4_free_blocks_count_set(es, buf->f_bfree); | 3166 | ext4_free_blocks_count_set(es, buf->f_bfree); |
3166 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); | 3167 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); |
3167 | if (buf->f_bfree < ext4_r_blocks_count(es)) | 3168 | if (buf->f_bfree < ext4_r_blocks_count(es)) |
@@ -3366,8 +3367,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, | |||
3366 | * otherwise be livelocked... | 3367 | * otherwise be livelocked... |
3367 | */ | 3368 | */ |
3368 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 3369 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
3369 | jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 3370 | err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
3370 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 3371 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
3372 | if (err) { | ||
3373 | path_put(&nd.path); | ||
3374 | return err; | ||
3375 | } | ||
3371 | } | 3376 | } |
3372 | 3377 | ||
3373 | err = vfs_quota_on_path(sb, type, format_id, &nd.path); | 3378 | err = vfs_quota_on_path(sb, type, format_id, &nd.path); |
@@ -3431,7 +3436,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
3431 | handle_t *handle = journal_current_handle(); | 3436 | handle_t *handle = journal_current_handle(); |
3432 | 3437 | ||
3433 | if (!handle) { | 3438 | if (!handle) { |
3434 | printk(KERN_WARNING "EXT4-fs: Quota write (off=%Lu, len=%Lu)" | 3439 | printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" |
3435 | " cancelled because transaction is not started.\n", | 3440 | " cancelled because transaction is not started.\n", |
3436 | (unsigned long long)off, (unsigned long long)len); | 3441 | (unsigned long long)off, (unsigned long long)len); |
3437 | return -EIO; | 3442 | return -EIO; |
@@ -3492,18 +3497,82 @@ static int ext4_get_sb(struct file_system_type *fs_type, | |||
3492 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); | 3497 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); |
3493 | } | 3498 | } |
3494 | 3499 | ||
3500 | #ifdef CONFIG_PROC_FS | ||
3501 | static int ext4_ui_proc_show(struct seq_file *m, void *v) | ||
3502 | { | ||
3503 | unsigned int *p = m->private; | ||
3504 | |||
3505 | seq_printf(m, "%u\n", *p); | ||
3506 | return 0; | ||
3507 | } | ||
3508 | |||
3509 | static int ext4_ui_proc_open(struct inode *inode, struct file *file) | ||
3510 | { | ||
3511 | return single_open(file, ext4_ui_proc_show, PDE(inode)->data); | ||
3512 | } | ||
3513 | |||
3514 | static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf, | ||
3515 | size_t cnt, loff_t *ppos) | ||
3516 | { | ||
3517 | unsigned int *p = PDE(file->f_path.dentry->d_inode)->data; | ||
3518 | char str[32]; | ||
3519 | unsigned long value; | ||
3520 | |||
3521 | if (cnt >= sizeof(str)) | ||
3522 | return -EINVAL; | ||
3523 | if (copy_from_user(str, buf, cnt)) | ||
3524 | return -EFAULT; | ||
3525 | value = simple_strtol(str, NULL, 0); | ||
3526 | if (value < 0) | ||
3527 | return -ERANGE; | ||
3528 | *p = value; | ||
3529 | return cnt; | ||
3530 | } | ||
3531 | |||
3532 | const struct file_operations ext4_ui_proc_fops = { | ||
3533 | .owner = THIS_MODULE, | ||
3534 | .open = ext4_ui_proc_open, | ||
3535 | .read = seq_read, | ||
3536 | .llseek = seq_lseek, | ||
3537 | .release = single_release, | ||
3538 | .write = ext4_ui_proc_write, | ||
3539 | }; | ||
3540 | #endif | ||
3541 | |||
3542 | static struct file_system_type ext4_fs_type = { | ||
3543 | .owner = THIS_MODULE, | ||
3544 | .name = "ext4", | ||
3545 | .get_sb = ext4_get_sb, | ||
3546 | .kill_sb = kill_block_super, | ||
3547 | .fs_flags = FS_REQUIRES_DEV, | ||
3548 | }; | ||
3549 | |||
3550 | #ifdef CONFIG_EXT4DEV_COMPAT | ||
3551 | static int ext4dev_get_sb(struct file_system_type *fs_type, | ||
3552 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | ||
3553 | { | ||
3554 | printk(KERN_WARNING "EXT4-fs: Update your userspace programs " | ||
3555 | "to mount using ext4\n"); | ||
3556 | printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility " | ||
3557 | "will go away by 2.6.31\n"); | ||
3558 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); | ||
3559 | } | ||
3560 | |||
3495 | static struct file_system_type ext4dev_fs_type = { | 3561 | static struct file_system_type ext4dev_fs_type = { |
3496 | .owner = THIS_MODULE, | 3562 | .owner = THIS_MODULE, |
3497 | .name = "ext4dev", | 3563 | .name = "ext4dev", |
3498 | .get_sb = ext4_get_sb, | 3564 | .get_sb = ext4dev_get_sb, |
3499 | .kill_sb = kill_block_super, | 3565 | .kill_sb = kill_block_super, |
3500 | .fs_flags = FS_REQUIRES_DEV, | 3566 | .fs_flags = FS_REQUIRES_DEV, |
3501 | }; | 3567 | }; |
3568 | MODULE_ALIAS("ext4dev"); | ||
3569 | #endif | ||
3502 | 3570 | ||
3503 | static int __init init_ext4_fs(void) | 3571 | static int __init init_ext4_fs(void) |
3504 | { | 3572 | { |
3505 | int err; | 3573 | int err; |
3506 | 3574 | ||
3575 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | ||
3507 | err = init_ext4_mballoc(); | 3576 | err = init_ext4_mballoc(); |
3508 | if (err) | 3577 | if (err) |
3509 | return err; | 3578 | return err; |
@@ -3514,9 +3583,16 @@ static int __init init_ext4_fs(void) | |||
3514 | err = init_inodecache(); | 3583 | err = init_inodecache(); |
3515 | if (err) | 3584 | if (err) |
3516 | goto out1; | 3585 | goto out1; |
3517 | err = register_filesystem(&ext4dev_fs_type); | 3586 | err = register_filesystem(&ext4_fs_type); |
3518 | if (err) | 3587 | if (err) |
3519 | goto out; | 3588 | goto out; |
3589 | #ifdef CONFIG_EXT4DEV_COMPAT | ||
3590 | err = register_filesystem(&ext4dev_fs_type); | ||
3591 | if (err) { | ||
3592 | unregister_filesystem(&ext4_fs_type); | ||
3593 | goto out; | ||
3594 | } | ||
3595 | #endif | ||
3520 | return 0; | 3596 | return 0; |
3521 | out: | 3597 | out: |
3522 | destroy_inodecache(); | 3598 | destroy_inodecache(); |
@@ -3529,10 +3605,14 @@ out2: | |||
3529 | 3605 | ||
3530 | static void __exit exit_ext4_fs(void) | 3606 | static void __exit exit_ext4_fs(void) |
3531 | { | 3607 | { |
3608 | unregister_filesystem(&ext4_fs_type); | ||
3609 | #ifdef CONFIG_EXT4DEV_COMPAT | ||
3532 | unregister_filesystem(&ext4dev_fs_type); | 3610 | unregister_filesystem(&ext4dev_fs_type); |
3611 | #endif | ||
3533 | destroy_inodecache(); | 3612 | destroy_inodecache(); |
3534 | exit_ext4_xattr(); | 3613 | exit_ext4_xattr(); |
3535 | exit_ext4_mballoc(); | 3614 | exit_ext4_mballoc(); |
3615 | remove_proc_entry("fs/ext4", NULL); | ||
3536 | } | 3616 | } |
3537 | 3617 | ||
3538 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); | 3618 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); |
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index e9178643dc01..00740cb32be3 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c | |||
@@ -23,10 +23,10 @@ | |||
23 | #include "ext4.h" | 23 | #include "ext4.h" |
24 | #include "xattr.h" | 24 | #include "xattr.h" |
25 | 25 | ||
26 | static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) | 26 | static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) |
27 | { | 27 | { |
28 | struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); | 28 | struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); |
29 | nd_set_link(nd, (char*)ei->i_data); | 29 | nd_set_link(nd, (char *) ei->i_data); |
30 | return NULL; | 30 | return NULL; |
31 | } | 31 | } |
32 | 32 | ||
@@ -34,7 +34,7 @@ const struct inode_operations ext4_symlink_inode_operations = { | |||
34 | .readlink = generic_readlink, | 34 | .readlink = generic_readlink, |
35 | .follow_link = page_follow_link_light, | 35 | .follow_link = page_follow_link_light, |
36 | .put_link = page_put_link, | 36 | .put_link = page_put_link, |
37 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 37 | #ifdef CONFIG_EXT4_FS_XATTR |
38 | .setxattr = generic_setxattr, | 38 | .setxattr = generic_setxattr, |
39 | .getxattr = generic_getxattr, | 39 | .getxattr = generic_getxattr, |
40 | .listxattr = ext4_listxattr, | 40 | .listxattr = ext4_listxattr, |
@@ -45,7 +45,7 @@ const struct inode_operations ext4_symlink_inode_operations = { | |||
45 | const struct inode_operations ext4_fast_symlink_inode_operations = { | 45 | const struct inode_operations ext4_fast_symlink_inode_operations = { |
46 | .readlink = generic_readlink, | 46 | .readlink = generic_readlink, |
47 | .follow_link = ext4_follow_link, | 47 | .follow_link = ext4_follow_link, |
48 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 48 | #ifdef CONFIG_EXT4_FS_XATTR |
49 | .setxattr = generic_setxattr, | 49 | .setxattr = generic_setxattr, |
50 | .getxattr = generic_getxattr, | 50 | .getxattr = generic_getxattr, |
51 | .listxattr = ext4_listxattr, | 51 | .listxattr = ext4_listxattr, |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 8954208b4893..80626d516fee 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -99,12 +99,12 @@ static struct mb_cache *ext4_xattr_cache; | |||
99 | 99 | ||
100 | static struct xattr_handler *ext4_xattr_handler_map[] = { | 100 | static struct xattr_handler *ext4_xattr_handler_map[] = { |
101 | [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, | 101 | [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, |
102 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 102 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
103 | [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, | 103 | [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, |
104 | [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler, | 104 | [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler, |
105 | #endif | 105 | #endif |
106 | [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler, | 106 | [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler, |
107 | #ifdef CONFIG_EXT4DEV_FS_SECURITY | 107 | #ifdef CONFIG_EXT4_FS_SECURITY |
108 | [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, | 108 | [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, |
109 | #endif | 109 | #endif |
110 | }; | 110 | }; |
@@ -112,11 +112,11 @@ static struct xattr_handler *ext4_xattr_handler_map[] = { | |||
112 | struct xattr_handler *ext4_xattr_handlers[] = { | 112 | struct xattr_handler *ext4_xattr_handlers[] = { |
113 | &ext4_xattr_user_handler, | 113 | &ext4_xattr_user_handler, |
114 | &ext4_xattr_trusted_handler, | 114 | &ext4_xattr_trusted_handler, |
115 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 115 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
116 | &ext4_xattr_acl_access_handler, | 116 | &ext4_xattr_acl_access_handler, |
117 | &ext4_xattr_acl_default_handler, | 117 | &ext4_xattr_acl_default_handler, |
118 | #endif | 118 | #endif |
119 | #ifdef CONFIG_EXT4DEV_FS_SECURITY | 119 | #ifdef CONFIG_EXT4_FS_SECURITY |
120 | &ext4_xattr_security_handler, | 120 | &ext4_xattr_security_handler, |
121 | #endif | 121 | #endif |
122 | NULL | 122 | NULL |
@@ -959,6 +959,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
959 | struct ext4_xattr_block_find bs = { | 959 | struct ext4_xattr_block_find bs = { |
960 | .s = { .not_found = -ENODATA, }, | 960 | .s = { .not_found = -ENODATA, }, |
961 | }; | 961 | }; |
962 | unsigned long no_expand; | ||
962 | int error; | 963 | int error; |
963 | 964 | ||
964 | if (!name) | 965 | if (!name) |
@@ -966,6 +967,9 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
966 | if (strlen(name) > 255) | 967 | if (strlen(name) > 255) |
967 | return -ERANGE; | 968 | return -ERANGE; |
968 | down_write(&EXT4_I(inode)->xattr_sem); | 969 | down_write(&EXT4_I(inode)->xattr_sem); |
970 | no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND; | ||
971 | EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; | ||
972 | |||
969 | error = ext4_get_inode_loc(inode, &is.iloc); | 973 | error = ext4_get_inode_loc(inode, &is.iloc); |
970 | if (error) | 974 | if (error) |
971 | goto cleanup; | 975 | goto cleanup; |
@@ -1042,6 +1046,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
1042 | cleanup: | 1046 | cleanup: |
1043 | brelse(is.iloc.bh); | 1047 | brelse(is.iloc.bh); |
1044 | brelse(bs.bh); | 1048 | brelse(bs.bh); |
1049 | if (no_expand == 0) | ||
1050 | EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; | ||
1045 | up_write(&EXT4_I(inode)->xattr_sem); | 1051 | up_write(&EXT4_I(inode)->xattr_sem); |
1046 | return error; | 1052 | return error; |
1047 | } | 1053 | } |
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 5992fe979bb9..8ede88b18c29 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h | |||
@@ -51,8 +51,8 @@ struct ext4_xattr_entry { | |||
51 | (((name_len) + EXT4_XATTR_ROUND + \ | 51 | (((name_len) + EXT4_XATTR_ROUND + \ |
52 | sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND) | 52 | sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND) |
53 | #define EXT4_XATTR_NEXT(entry) \ | 53 | #define EXT4_XATTR_NEXT(entry) \ |
54 | ( (struct ext4_xattr_entry *)( \ | 54 | ((struct ext4_xattr_entry *)( \ |
55 | (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)) ) | 55 | (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len))) |
56 | #define EXT4_XATTR_SIZE(size) \ | 56 | #define EXT4_XATTR_SIZE(size) \ |
57 | (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) | 57 | (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) |
58 | 58 | ||
@@ -63,7 +63,7 @@ struct ext4_xattr_entry { | |||
63 | EXT4_I(inode)->i_extra_isize)) | 63 | EXT4_I(inode)->i_extra_isize)) |
64 | #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) | 64 | #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) |
65 | 65 | ||
66 | # ifdef CONFIG_EXT4DEV_FS_XATTR | 66 | # ifdef CONFIG_EXT4_FS_XATTR |
67 | 67 | ||
68 | extern struct xattr_handler ext4_xattr_user_handler; | 68 | extern struct xattr_handler ext4_xattr_user_handler; |
69 | extern struct xattr_handler ext4_xattr_trusted_handler; | 69 | extern struct xattr_handler ext4_xattr_trusted_handler; |
@@ -88,7 +88,7 @@ extern void exit_ext4_xattr(void); | |||
88 | 88 | ||
89 | extern struct xattr_handler *ext4_xattr_handlers[]; | 89 | extern struct xattr_handler *ext4_xattr_handlers[]; |
90 | 90 | ||
91 | # else /* CONFIG_EXT4DEV_FS_XATTR */ | 91 | # else /* CONFIG_EXT4_FS_XATTR */ |
92 | 92 | ||
93 | static inline int | 93 | static inline int |
94 | ext4_xattr_get(struct inode *inode, int name_index, const char *name, | 94 | ext4_xattr_get(struct inode *inode, int name_index, const char *name, |
@@ -141,9 +141,9 @@ ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, | |||
141 | 141 | ||
142 | #define ext4_xattr_handlers NULL | 142 | #define ext4_xattr_handlers NULL |
143 | 143 | ||
144 | # endif /* CONFIG_EXT4DEV_FS_XATTR */ | 144 | # endif /* CONFIG_EXT4_FS_XATTR */ |
145 | 145 | ||
146 | #ifdef CONFIG_EXT4DEV_FS_SECURITY | 146 | #ifdef CONFIG_EXT4_FS_SECURITY |
147 | extern int ext4_init_security(handle_t *handle, struct inode *inode, | 147 | extern int ext4_init_security(handle_t *handle, struct inode *inode, |
148 | struct inode *dir); | 148 | struct inode *dir); |
149 | #else | 149 | #else |
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 302e95c4af7e..fb98b3d847ed 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
7 | #include <linux/fs.h> | 7 | #include <linux/fs.h> |
8 | #include <linux/msdos_fs.h> | 8 | #include <linux/msdos_fs.h> |
9 | #include <linux/blkdev.h> | ||
9 | 10 | ||
10 | struct fatent_operations { | 11 | struct fatent_operations { |
11 | void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); | 12 | void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); |
@@ -535,6 +536,7 @@ int fat_free_clusters(struct inode *inode, int cluster) | |||
535 | struct fat_entry fatent; | 536 | struct fat_entry fatent; |
536 | struct buffer_head *bhs[MAX_BUF_PER_PAGE]; | 537 | struct buffer_head *bhs[MAX_BUF_PER_PAGE]; |
537 | int i, err, nr_bhs; | 538 | int i, err, nr_bhs; |
539 | int first_cl = cluster; | ||
538 | 540 | ||
539 | nr_bhs = 0; | 541 | nr_bhs = 0; |
540 | fatent_init(&fatent); | 542 | fatent_init(&fatent); |
@@ -551,6 +553,18 @@ int fat_free_clusters(struct inode *inode, int cluster) | |||
551 | goto error; | 553 | goto error; |
552 | } | 554 | } |
553 | 555 | ||
556 | /* | ||
557 | * Issue discard for the sectors we no longer care about, | ||
558 | * batching contiguous clusters into one request | ||
559 | */ | ||
560 | if (cluster != fatent.entry + 1) { | ||
561 | int nr_clus = fatent.entry - first_cl + 1; | ||
562 | |||
563 | sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), | ||
564 | nr_clus * sbi->sec_per_clus); | ||
565 | first_cl = cluster; | ||
566 | } | ||
567 | |||
554 | ops->ent_put(&fatent, FAT_ENT_FREE); | 568 | ops->ent_put(&fatent, FAT_ENT_FREE); |
555 | if (sbi->free_clusters != -1) { | 569 | if (sbi->free_clusters != -1) { |
556 | sbi->free_clusters++; | 570 | sbi->free_clusters++; |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 80ff3381fa21..d12cdf2a0406 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -855,7 +855,7 @@ enum { | |||
855 | Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err, | 855 | Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err, |
856 | }; | 856 | }; |
857 | 857 | ||
858 | static match_table_t fat_tokens = { | 858 | static const match_table_t fat_tokens = { |
859 | {Opt_check_r, "check=relaxed"}, | 859 | {Opt_check_r, "check=relaxed"}, |
860 | {Opt_check_s, "check=strict"}, | 860 | {Opt_check_s, "check=strict"}, |
861 | {Opt_check_n, "check=normal"}, | 861 | {Opt_check_n, "check=normal"}, |
@@ -890,14 +890,14 @@ static match_table_t fat_tokens = { | |||
890 | {Opt_tz_utc, "tz=UTC"}, | 890 | {Opt_tz_utc, "tz=UTC"}, |
891 | {Opt_err, NULL}, | 891 | {Opt_err, NULL}, |
892 | }; | 892 | }; |
893 | static match_table_t msdos_tokens = { | 893 | static const match_table_t msdos_tokens = { |
894 | {Opt_nodots, "nodots"}, | 894 | {Opt_nodots, "nodots"}, |
895 | {Opt_nodots, "dotsOK=no"}, | 895 | {Opt_nodots, "dotsOK=no"}, |
896 | {Opt_dots, "dots"}, | 896 | {Opt_dots, "dots"}, |
897 | {Opt_dots, "dotsOK=yes"}, | 897 | {Opt_dots, "dotsOK=yes"}, |
898 | {Opt_err, NULL} | 898 | {Opt_err, NULL} |
899 | }; | 899 | }; |
900 | static match_table_t vfat_tokens = { | 900 | static const match_table_t vfat_tokens = { |
901 | {Opt_charset, "iocharset=%s"}, | 901 | {Opt_charset, "iocharset=%s"}, |
902 | {Opt_shortname_lower, "shortname=lower"}, | 902 | {Opt_shortname_lower, "shortname=lower"}, |
903 | {Opt_shortname_win95, "shortname=win95"}, | 903 | {Opt_shortname_win95, "shortname=win95"}, |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 25adfc3c693a..d0ff0b8cf309 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -8,7 +8,7 @@ | |||
8 | * pages against inodes. ie: data writeback. Writeout of the | 8 | * pages against inodes. ie: data writeback. Writeout of the |
9 | * inode itself is not handled here. | 9 | * inode itself is not handled here. |
10 | * | 10 | * |
11 | * 10Apr2002 akpm@zip.com.au | 11 | * 10Apr2002 Andrew Morton |
12 | * Split out of fs/inode.c | 12 | * Split out of fs/inode.c |
13 | * Additions for address_space-based writeback | 13 | * Additions for address_space-based writeback |
14 | */ | 14 | */ |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index d2249f174e20..6a84388cacff 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -354,7 +354,7 @@ enum { | |||
354 | OPT_ERR | 354 | OPT_ERR |
355 | }; | 355 | }; |
356 | 356 | ||
357 | static match_table_t tokens = { | 357 | static const match_table_t tokens = { |
358 | {OPT_FD, "fd=%u"}, | 358 | {OPT_FD, "fd=%u"}, |
359 | {OPT_ROOTMODE, "rootmode=%o"}, | 359 | {OPT_ROOTMODE, "rootmode=%o"}, |
360 | {OPT_USER_ID, "user_id=%u"}, | 360 | {OPT_USER_ID, "user_id=%u"}, |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 13391e546616..c962283d4e7f 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -1265,6 +1265,8 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, | |||
1265 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; | 1265 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; |
1266 | if (time_before(now, holdtime)) | 1266 | if (time_before(now, holdtime)) |
1267 | delay = holdtime - now; | 1267 | delay = holdtime - now; |
1268 | if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) | ||
1269 | delay = gl->gl_ops->go_min_hold_time; | ||
1268 | 1270 | ||
1269 | spin_lock(&gl->gl_spin); | 1271 | spin_lock(&gl->gl_spin); |
1270 | handle_callback(gl, state, 1, delay); | 1272 | handle_callback(gl, state, 1, delay); |
@@ -1578,8 +1580,6 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags) | |||
1578 | *p++ = 'a'; | 1580 | *p++ = 'a'; |
1579 | if (flags & GL_EXACT) | 1581 | if (flags & GL_EXACT) |
1580 | *p++ = 'E'; | 1582 | *p++ = 'E'; |
1581 | if (flags & GL_ATIME) | ||
1582 | *p++ = 'a'; | ||
1583 | if (flags & GL_NOCACHE) | 1583 | if (flags & GL_NOCACHE) |
1584 | *p++ = 'c'; | 1584 | *p++ = 'c'; |
1585 | if (test_bit(HIF_HOLDER, &iflags)) | 1585 | if (test_bit(HIF_HOLDER, &iflags)) |
@@ -1816,15 +1816,17 @@ restart: | |||
1816 | if (gl) { | 1816 | if (gl) { |
1817 | gi->gl = hlist_entry(gl->gl_list.next, | 1817 | gi->gl = hlist_entry(gl->gl_list.next, |
1818 | struct gfs2_glock, gl_list); | 1818 | struct gfs2_glock, gl_list); |
1819 | if (gi->gl) | 1819 | } else { |
1820 | gfs2_glock_hold(gi->gl); | 1820 | gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, |
1821 | struct gfs2_glock, gl_list); | ||
1821 | } | 1822 | } |
1823 | if (gi->gl) | ||
1824 | gfs2_glock_hold(gi->gl); | ||
1822 | read_unlock(gl_lock_addr(gi->hash)); | 1825 | read_unlock(gl_lock_addr(gi->hash)); |
1823 | if (gl) | 1826 | if (gl) |
1824 | gfs2_glock_put(gl); | 1827 | gfs2_glock_put(gl); |
1825 | if (gl && gi->gl == NULL) | ||
1826 | gi->hash++; | ||
1827 | while (gi->gl == NULL) { | 1828 | while (gi->gl == NULL) { |
1829 | gi->hash++; | ||
1828 | if (gi->hash >= GFS2_GL_HASH_SIZE) | 1830 | if (gi->hash >= GFS2_GL_HASH_SIZE) |
1829 | return 1; | 1831 | return 1; |
1830 | read_lock(gl_lock_addr(gi->hash)); | 1832 | read_lock(gl_lock_addr(gi->hash)); |
@@ -1833,7 +1835,6 @@ restart: | |||
1833 | if (gi->gl) | 1835 | if (gi->gl) |
1834 | gfs2_glock_hold(gi->gl); | 1836 | gfs2_glock_hold(gi->gl); |
1835 | read_unlock(gl_lock_addr(gi->hash)); | 1837 | read_unlock(gl_lock_addr(gi->hash)); |
1836 | gi->hash++; | ||
1837 | } | 1838 | } |
1838 | 1839 | ||
1839 | if (gi->sdp != gi->gl->gl_sbd) | 1840 | if (gi->sdp != gi->gl->gl_sbd) |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 971d92af70fc..695c6b193611 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
@@ -24,7 +24,6 @@ | |||
24 | #define GL_ASYNC 0x00000040 | 24 | #define GL_ASYNC 0x00000040 |
25 | #define GL_EXACT 0x00000080 | 25 | #define GL_EXACT 0x00000080 |
26 | #define GL_SKIP 0x00000100 | 26 | #define GL_SKIP 0x00000100 |
27 | #define GL_ATIME 0x00000200 | ||
28 | #define GL_NOCACHE 0x00000400 | 27 | #define GL_NOCACHE 0x00000400 |
29 | 28 | ||
30 | #define GLR_TRYFAILED 13 | 29 | #define GLR_TRYFAILED 13 |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 448697a5c462..f566ec1b4e8e 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -386,20 +386,21 @@ struct gfs2_statfs_change_host { | |||
386 | #define GFS2_DATA_ORDERED 2 | 386 | #define GFS2_DATA_ORDERED 2 |
387 | 387 | ||
388 | struct gfs2_args { | 388 | struct gfs2_args { |
389 | char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ | 389 | char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ |
390 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ | 390 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ |
391 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ | 391 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ |
392 | int ar_spectator; /* Don't get a journal because we're always RO */ | 392 | unsigned int ar_spectator:1; /* Don't get a journal */ |
393 | int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */ | 393 | unsigned int ar_ignore_local_fs:1; /* Ignore optimisations */ |
394 | int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */ | 394 | unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */ |
395 | int ar_localcaching; /* Local-style caching (dangerous on multihost) */ | 395 | unsigned int ar_localcaching:1; /* Local caching */ |
396 | int ar_debug; /* Oops on errors instead of trying to be graceful */ | 396 | unsigned int ar_debug:1; /* Oops on errors */ |
397 | int ar_upgrade; /* Upgrade ondisk/multihost format */ | 397 | unsigned int ar_upgrade:1; /* Upgrade ondisk format */ |
398 | unsigned int ar_num_glockd; /* Number of glockd threads */ | 398 | unsigned int ar_posix_acl:1; /* Enable posix acls */ |
399 | int ar_posix_acl; /* Enable posix acls */ | 399 | unsigned int ar_quota:2; /* off/account/on */ |
400 | int ar_quota; /* off/account/on */ | 400 | unsigned int ar_suiddir:1; /* suiddir support */ |
401 | int ar_suiddir; /* suiddir support */ | 401 | unsigned int ar_data:2; /* ordered/writeback */ |
402 | int ar_data; /* ordered/writeback */ | 402 | unsigned int ar_meta:1; /* mount metafs */ |
403 | unsigned int ar_num_glockd; /* Number of glockd threads */ | ||
403 | }; | 404 | }; |
404 | 405 | ||
405 | struct gfs2_tune { | 406 | struct gfs2_tune { |
@@ -419,7 +420,6 @@ struct gfs2_tune { | |||
419 | unsigned int gt_quota_scale_den; /* Denominator */ | 420 | unsigned int gt_quota_scale_den; /* Denominator */ |
420 | unsigned int gt_quota_cache_secs; | 421 | unsigned int gt_quota_cache_secs; |
421 | unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ | 422 | unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ |
422 | unsigned int gt_atime_quantum; /* Min secs between atime updates */ | ||
423 | unsigned int gt_new_files_jdata; | 423 | unsigned int gt_new_files_jdata; |
424 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ | 424 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ |
425 | unsigned int gt_stall_secs; /* Detects trouble! */ | 425 | unsigned int gt_stall_secs; /* Detects trouble! */ |
@@ -432,7 +432,7 @@ enum { | |||
432 | SDF_JOURNAL_CHECKED = 0, | 432 | SDF_JOURNAL_CHECKED = 0, |
433 | SDF_JOURNAL_LIVE = 1, | 433 | SDF_JOURNAL_LIVE = 1, |
434 | SDF_SHUTDOWN = 2, | 434 | SDF_SHUTDOWN = 2, |
435 | SDF_NOATIME = 3, | 435 | SDF_NOBARRIERS = 3, |
436 | }; | 436 | }; |
437 | 437 | ||
438 | #define GFS2_FSNAME_LEN 256 | 438 | #define GFS2_FSNAME_LEN 256 |
@@ -461,7 +461,6 @@ struct gfs2_sb_host { | |||
461 | 461 | ||
462 | struct gfs2_sbd { | 462 | struct gfs2_sbd { |
463 | struct super_block *sd_vfs; | 463 | struct super_block *sd_vfs; |
464 | struct super_block *sd_vfs_meta; | ||
465 | struct kobject sd_kobj; | 464 | struct kobject sd_kobj; |
466 | unsigned long sd_flags; /* SDF_... */ | 465 | unsigned long sd_flags; /* SDF_... */ |
467 | struct gfs2_sb_host sd_sb; | 466 | struct gfs2_sb_host sd_sb; |
@@ -499,7 +498,9 @@ struct gfs2_sbd { | |||
499 | 498 | ||
500 | /* Inode Stuff */ | 499 | /* Inode Stuff */ |
501 | 500 | ||
502 | struct inode *sd_master_dir; | 501 | struct dentry *sd_master_dir; |
502 | struct dentry *sd_root_dir; | ||
503 | |||
503 | struct inode *sd_jindex; | 504 | struct inode *sd_jindex; |
504 | struct inode *sd_inum_inode; | 505 | struct inode *sd_inum_inode; |
505 | struct inode *sd_statfs_inode; | 506 | struct inode *sd_statfs_inode; |
@@ -634,7 +635,6 @@ struct gfs2_sbd { | |||
634 | /* Debugging crud */ | 635 | /* Debugging crud */ |
635 | 636 | ||
636 | unsigned long sd_last_warning; | 637 | unsigned long sd_last_warning; |
637 | struct vfsmount *sd_gfs2mnt; | ||
638 | struct dentry *debugfs_dir; /* debugfs directory */ | 638 | struct dentry *debugfs_dir; /* debugfs directory */ |
639 | struct dentry *debugfs_dentry_glocks; /* for debugfs */ | 639 | struct dentry *debugfs_dentry_glocks; /* for debugfs */ |
640 | }; | 640 | }; |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 8b0806a32948..7cee695fa441 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/crc32.h> | 18 | #include <linux/crc32.h> |
19 | #include <linux/lm_interface.h> | 19 | #include <linux/lm_interface.h> |
20 | #include <linux/security.h> | 20 | #include <linux/security.h> |
21 | #include <linux/time.h> | ||
21 | 22 | ||
22 | #include "gfs2.h" | 23 | #include "gfs2.h" |
23 | #include "incore.h" | 24 | #include "incore.h" |
@@ -249,6 +250,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
249 | { | 250 | { |
250 | struct gfs2_dinode_host *di = &ip->i_di; | 251 | struct gfs2_dinode_host *di = &ip->i_di; |
251 | const struct gfs2_dinode *str = buf; | 252 | const struct gfs2_dinode *str = buf; |
253 | struct timespec atime; | ||
252 | u16 height, depth; | 254 | u16 height, depth; |
253 | 255 | ||
254 | if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) | 256 | if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) |
@@ -275,8 +277,10 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
275 | di->di_size = be64_to_cpu(str->di_size); | 277 | di->di_size = be64_to_cpu(str->di_size); |
276 | i_size_write(&ip->i_inode, di->di_size); | 278 | i_size_write(&ip->i_inode, di->di_size); |
277 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); | 279 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); |
278 | ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); | 280 | atime.tv_sec = be64_to_cpu(str->di_atime); |
279 | ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); | 281 | atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); |
282 | if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) | ||
283 | ip->i_inode.i_atime = atime; | ||
280 | ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); | 284 | ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); |
281 | ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); | 285 | ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); |
282 | ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); | 286 | ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); |
@@ -1033,13 +1037,11 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, | |||
1033 | 1037 | ||
1034 | if (bh) | 1038 | if (bh) |
1035 | brelse(bh); | 1039 | brelse(bh); |
1036 | if (!inode) | ||
1037 | return ERR_PTR(-ENOMEM); | ||
1038 | return inode; | 1040 | return inode; |
1039 | 1041 | ||
1040 | fail_gunlock2: | 1042 | fail_gunlock2: |
1041 | gfs2_glock_dq_uninit(ghs + 1); | 1043 | gfs2_glock_dq_uninit(ghs + 1); |
1042 | if (inode) | 1044 | if (inode && !IS_ERR(inode)) |
1043 | iput(inode); | 1045 | iput(inode); |
1044 | fail_gunlock: | 1046 | fail_gunlock: |
1045 | gfs2_glock_dq(ghs); | 1047 | gfs2_glock_dq(ghs); |
@@ -1140,54 +1142,6 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | |||
1140 | return 0; | 1142 | return 0; |
1141 | } | 1143 | } |
1142 | 1144 | ||
1143 | /* | ||
1144 | * gfs2_ok_to_move - check if it's ok to move a directory to another directory | ||
1145 | * @this: move this | ||
1146 | * @to: to here | ||
1147 | * | ||
1148 | * Follow @to back to the root and make sure we don't encounter @this | ||
1149 | * Assumes we already hold the rename lock. | ||
1150 | * | ||
1151 | * Returns: errno | ||
1152 | */ | ||
1153 | |||
1154 | int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | ||
1155 | { | ||
1156 | struct inode *dir = &to->i_inode; | ||
1157 | struct super_block *sb = dir->i_sb; | ||
1158 | struct inode *tmp; | ||
1159 | struct qstr dotdot; | ||
1160 | int error = 0; | ||
1161 | |||
1162 | gfs2_str2qstr(&dotdot, ".."); | ||
1163 | |||
1164 | igrab(dir); | ||
1165 | |||
1166 | for (;;) { | ||
1167 | if (dir == &this->i_inode) { | ||
1168 | error = -EINVAL; | ||
1169 | break; | ||
1170 | } | ||
1171 | if (dir == sb->s_root->d_inode) { | ||
1172 | error = 0; | ||
1173 | break; | ||
1174 | } | ||
1175 | |||
1176 | tmp = gfs2_lookupi(dir, &dotdot, 1); | ||
1177 | if (IS_ERR(tmp)) { | ||
1178 | error = PTR_ERR(tmp); | ||
1179 | break; | ||
1180 | } | ||
1181 | |||
1182 | iput(dir); | ||
1183 | dir = tmp; | ||
1184 | } | ||
1185 | |||
1186 | iput(dir); | ||
1187 | |||
1188 | return error; | ||
1189 | } | ||
1190 | |||
1191 | /** | 1145 | /** |
1192 | * gfs2_readlinki - return the contents of a symlink | 1146 | * gfs2_readlinki - return the contents of a symlink |
1193 | * @ip: the symlink's inode | 1147 | * @ip: the symlink's inode |
@@ -1207,8 +1161,8 @@ int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) | |||
1207 | unsigned int x; | 1161 | unsigned int x; |
1208 | int error; | 1162 | int error; |
1209 | 1163 | ||
1210 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); | 1164 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); |
1211 | error = gfs2_glock_nq_atime(&i_gh); | 1165 | error = gfs2_glock_nq(&i_gh); |
1212 | if (error) { | 1166 | if (error) { |
1213 | gfs2_holder_uninit(&i_gh); | 1167 | gfs2_holder_uninit(&i_gh); |
1214 | return error; | 1168 | return error; |
@@ -1243,101 +1197,6 @@ out: | |||
1243 | return error; | 1197 | return error; |
1244 | } | 1198 | } |
1245 | 1199 | ||
1246 | /** | ||
1247 | * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and | ||
1248 | * conditionally update the inode's atime | ||
1249 | * @gh: the holder to acquire | ||
1250 | * | ||
1251 | * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap | ||
1252 | * Update if the difference between the current time and the inode's current | ||
1253 | * atime is greater than an interval specified at mount. | ||
1254 | * | ||
1255 | * Returns: errno | ||
1256 | */ | ||
1257 | |||
1258 | int gfs2_glock_nq_atime(struct gfs2_holder *gh) | ||
1259 | { | ||
1260 | struct gfs2_glock *gl = gh->gh_gl; | ||
1261 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
1262 | struct gfs2_inode *ip = gl->gl_object; | ||
1263 | s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum); | ||
1264 | unsigned int state; | ||
1265 | int flags; | ||
1266 | int error; | ||
1267 | struct timespec tv = CURRENT_TIME; | ||
1268 | |||
1269 | if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) || | ||
1270 | gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) || | ||
1271 | gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops)) | ||
1272 | return -EINVAL; | ||
1273 | |||
1274 | state = gh->gh_state; | ||
1275 | flags = gh->gh_flags; | ||
1276 | |||
1277 | error = gfs2_glock_nq(gh); | ||
1278 | if (error) | ||
1279 | return error; | ||
1280 | |||
1281 | if (test_bit(SDF_NOATIME, &sdp->sd_flags) || | ||
1282 | (sdp->sd_vfs->s_flags & MS_RDONLY)) | ||
1283 | return 0; | ||
1284 | |||
1285 | if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) { | ||
1286 | gfs2_glock_dq(gh); | ||
1287 | gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY, | ||
1288 | gh); | ||
1289 | error = gfs2_glock_nq(gh); | ||
1290 | if (error) | ||
1291 | return error; | ||
1292 | |||
1293 | /* Verify that atime hasn't been updated while we were | ||
1294 | trying to get exclusive lock. */ | ||
1295 | |||
1296 | tv = CURRENT_TIME; | ||
1297 | if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) { | ||
1298 | struct buffer_head *dibh; | ||
1299 | struct gfs2_dinode *di; | ||
1300 | |||
1301 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
1302 | if (error == -EROFS) | ||
1303 | return 0; | ||
1304 | if (error) | ||
1305 | goto fail; | ||
1306 | |||
1307 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
1308 | if (error) | ||
1309 | goto fail_end_trans; | ||
1310 | |||
1311 | ip->i_inode.i_atime = tv; | ||
1312 | |||
1313 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1314 | di = (struct gfs2_dinode *)dibh->b_data; | ||
1315 | di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); | ||
1316 | di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); | ||
1317 | brelse(dibh); | ||
1318 | |||
1319 | gfs2_trans_end(sdp); | ||
1320 | } | ||
1321 | |||
1322 | /* If someone else has asked for the glock, | ||
1323 | unlock and let them have it. Then reacquire | ||
1324 | in the original state. */ | ||
1325 | if (gfs2_glock_is_blocking(gl)) { | ||
1326 | gfs2_glock_dq(gh); | ||
1327 | gfs2_holder_reinit(state, flags, gh); | ||
1328 | return gfs2_glock_nq(gh); | ||
1329 | } | ||
1330 | } | ||
1331 | |||
1332 | return 0; | ||
1333 | |||
1334 | fail_end_trans: | ||
1335 | gfs2_trans_end(sdp); | ||
1336 | fail: | ||
1337 | gfs2_glock_dq(gh); | ||
1338 | return error; | ||
1339 | } | ||
1340 | |||
1341 | static int | 1200 | static int |
1342 | __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) | 1201 | __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) |
1343 | { | 1202 | { |
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 58f9607d6a86..2d43f69610a0 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
@@ -91,9 +91,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | |||
91 | int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | 91 | int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, |
92 | const struct gfs2_inode *ip); | 92 | const struct gfs2_inode *ip); |
93 | int gfs2_permission(struct inode *inode, int mask); | 93 | int gfs2_permission(struct inode *inode, int mask); |
94 | int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to); | ||
95 | int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); | 94 | int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); |
96 | int gfs2_glock_nq_atime(struct gfs2_holder *gh); | ||
97 | int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); | 95 | int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); |
98 | struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); | 96 | struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); |
99 | void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); | 97 | void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); |
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index 09d78c216f48..0c4cbe6c8285 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c | |||
@@ -144,7 +144,8 @@ static int gdlm_mount(char *table_name, char *host_data, | |||
144 | 144 | ||
145 | error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname), | 145 | error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname), |
146 | &ls->dlm_lockspace, | 146 | &ls->dlm_lockspace, |
147 | DLM_LSFL_FS | (nodir ? DLM_LSFL_NODIR : 0), | 147 | DLM_LSFL_FS | DLM_LSFL_NEWEXCL | |
148 | (nodir ? DLM_LSFL_NODIR : 0), | ||
148 | GDLM_LVB_SIZE); | 149 | GDLM_LVB_SIZE); |
149 | if (error) { | 150 | if (error) { |
150 | log_error("dlm_new_lockspace error %d", error); | 151 | log_error("dlm_new_lockspace error %d", error); |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 6c6af9f5e3ab..ad305854bdc6 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/delay.h> | 18 | #include <linux/delay.h> |
19 | #include <linux/kthread.h> | 19 | #include <linux/kthread.h> |
20 | #include <linux/freezer.h> | 20 | #include <linux/freezer.h> |
21 | #include <linux/bio.h> | ||
21 | 22 | ||
22 | #include "gfs2.h" | 23 | #include "gfs2.h" |
23 | #include "incore.h" | 24 | #include "incore.h" |
@@ -584,7 +585,6 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | |||
584 | memset(bh->b_data, 0, bh->b_size); | 585 | memset(bh->b_data, 0, bh->b_size); |
585 | set_buffer_uptodate(bh); | 586 | set_buffer_uptodate(bh); |
586 | clear_buffer_dirty(bh); | 587 | clear_buffer_dirty(bh); |
587 | unlock_buffer(bh); | ||
588 | 588 | ||
589 | gfs2_ail1_empty(sdp, 0); | 589 | gfs2_ail1_empty(sdp, 0); |
590 | tail = current_tail(sdp); | 590 | tail = current_tail(sdp); |
@@ -601,8 +601,23 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | |||
601 | hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); | 601 | hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); |
602 | lh->lh_hash = cpu_to_be32(hash); | 602 | lh->lh_hash = cpu_to_be32(hash); |
603 | 603 | ||
604 | set_buffer_dirty(bh); | 604 | bh->b_end_io = end_buffer_write_sync; |
605 | if (sync_dirty_buffer(bh)) | 605 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) |
606 | goto skip_barrier; | ||
607 | get_bh(bh); | ||
608 | submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh); | ||
609 | wait_on_buffer(bh); | ||
610 | if (buffer_eopnotsupp(bh)) { | ||
611 | clear_buffer_eopnotsupp(bh); | ||
612 | set_buffer_uptodate(bh); | ||
613 | set_bit(SDF_NOBARRIERS, &sdp->sd_flags); | ||
614 | lock_buffer(bh); | ||
615 | skip_barrier: | ||
616 | get_bh(bh); | ||
617 | submit_bh(WRITE_SYNC | (1 << BIO_RW_META), bh); | ||
618 | wait_on_buffer(bh); | ||
619 | } | ||
620 | if (!buffer_uptodate(bh)) | ||
606 | gfs2_io_error_bh(sdp, bh); | 621 | gfs2_io_error_bh(sdp, bh); |
607 | brelse(bh); | 622 | brelse(bh); |
608 | 623 | ||
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c index b941f9f9f958..f96eb90a2cfa 100644 --- a/fs/gfs2/mount.c +++ b/fs/gfs2/mount.c | |||
@@ -42,10 +42,11 @@ enum { | |||
42 | Opt_nosuiddir, | 42 | Opt_nosuiddir, |
43 | Opt_data_writeback, | 43 | Opt_data_writeback, |
44 | Opt_data_ordered, | 44 | Opt_data_ordered, |
45 | Opt_meta, | ||
45 | Opt_err, | 46 | Opt_err, |
46 | }; | 47 | }; |
47 | 48 | ||
48 | static match_table_t tokens = { | 49 | static const match_table_t tokens = { |
49 | {Opt_lockproto, "lockproto=%s"}, | 50 | {Opt_lockproto, "lockproto=%s"}, |
50 | {Opt_locktable, "locktable=%s"}, | 51 | {Opt_locktable, "locktable=%s"}, |
51 | {Opt_hostdata, "hostdata=%s"}, | 52 | {Opt_hostdata, "hostdata=%s"}, |
@@ -66,6 +67,7 @@ static match_table_t tokens = { | |||
66 | {Opt_nosuiddir, "nosuiddir"}, | 67 | {Opt_nosuiddir, "nosuiddir"}, |
67 | {Opt_data_writeback, "data=writeback"}, | 68 | {Opt_data_writeback, "data=writeback"}, |
68 | {Opt_data_ordered, "data=ordered"}, | 69 | {Opt_data_ordered, "data=ordered"}, |
70 | {Opt_meta, "meta"}, | ||
69 | {Opt_err, NULL} | 71 | {Opt_err, NULL} |
70 | }; | 72 | }; |
71 | 73 | ||
@@ -239,6 +241,11 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount) | |||
239 | case Opt_data_ordered: | 241 | case Opt_data_ordered: |
240 | args->ar_data = GFS2_DATA_ORDERED; | 242 | args->ar_data = GFS2_DATA_ORDERED; |
241 | break; | 243 | break; |
244 | case Opt_meta: | ||
245 | if (remount && args->ar_meta != 1) | ||
246 | goto cant_remount; | ||
247 | args->ar_meta = 1; | ||
248 | break; | ||
242 | case Opt_err: | 249 | case Opt_err: |
243 | default: | 250 | default: |
244 | fs_info(sdp, "unknown option: %s\n", o); | 251 | fs_info(sdp, "unknown option: %s\n", o); |
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index e64a1b04117a..27563816e1c5 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
@@ -512,8 +512,8 @@ static int gfs2_readpage(struct file *file, struct page *page) | |||
512 | int error; | 512 | int error; |
513 | 513 | ||
514 | unlock_page(page); | 514 | unlock_page(page); |
515 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 515 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); |
516 | error = gfs2_glock_nq_atime(&gh); | 516 | error = gfs2_glock_nq(&gh); |
517 | if (unlikely(error)) | 517 | if (unlikely(error)) |
518 | goto out; | 518 | goto out; |
519 | error = AOP_TRUNCATED_PAGE; | 519 | error = AOP_TRUNCATED_PAGE; |
@@ -594,8 +594,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, | |||
594 | struct gfs2_holder gh; | 594 | struct gfs2_holder gh; |
595 | int ret; | 595 | int ret; |
596 | 596 | ||
597 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 597 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); |
598 | ret = gfs2_glock_nq_atime(&gh); | 598 | ret = gfs2_glock_nq(&gh); |
599 | if (unlikely(ret)) | 599 | if (unlikely(ret)) |
600 | goto out_uninit; | 600 | goto out_uninit; |
601 | if (!gfs2_is_stuffed(ip)) | 601 | if (!gfs2_is_stuffed(ip)) |
@@ -636,8 +636,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
636 | unsigned to = from + len; | 636 | unsigned to = from + len; |
637 | struct page *page; | 637 | struct page *page; |
638 | 638 | ||
639 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh); | 639 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); |
640 | error = gfs2_glock_nq_atime(&ip->i_gh); | 640 | error = gfs2_glock_nq(&ip->i_gh); |
641 | if (unlikely(error)) | 641 | if (unlikely(error)) |
642 | goto out_uninit; | 642 | goto out_uninit; |
643 | 643 | ||
@@ -975,7 +975,7 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) | |||
975 | if (gfs2_is_stuffed(ip)) | 975 | if (gfs2_is_stuffed(ip)) |
976 | return 0; | 976 | return 0; |
977 | 977 | ||
978 | if (offset > i_size_read(&ip->i_inode)) | 978 | if (offset >= i_size_read(&ip->i_inode)) |
979 | return 0; | 979 | return 0; |
980 | return 1; | 980 | return 1; |
981 | } | 981 | } |
@@ -1000,8 +1000,8 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
1000 | * unfortunately have the option of only flushing a range like | 1000 | * unfortunately have the option of only flushing a range like |
1001 | * the VFS does. | 1001 | * the VFS does. |
1002 | */ | 1002 | */ |
1003 | gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh); | 1003 | gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); |
1004 | rv = gfs2_glock_nq_atime(&gh); | 1004 | rv = gfs2_glock_nq(&gh); |
1005 | if (rv) | 1005 | if (rv) |
1006 | return rv; | 1006 | return rv; |
1007 | rv = gfs2_ok_for_dio(ip, rw, offset); | 1007 | rv = gfs2_ok_for_dio(ip, rw, offset); |
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index e9a366d4411c..3a747f8e2188 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c | |||
@@ -89,8 +89,8 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
89 | u64 offset = file->f_pos; | 89 | u64 offset = file->f_pos; |
90 | int error; | 90 | int error; |
91 | 91 | ||
92 | gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); | 92 | gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); |
93 | error = gfs2_glock_nq_atime(&d_gh); | 93 | error = gfs2_glock_nq(&d_gh); |
94 | if (error) { | 94 | if (error) { |
95 | gfs2_holder_uninit(&d_gh); | 95 | gfs2_holder_uninit(&d_gh); |
96 | return error; | 96 | return error; |
@@ -153,8 +153,8 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr) | |||
153 | int error; | 153 | int error; |
154 | u32 fsflags; | 154 | u32 fsflags; |
155 | 155 | ||
156 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 156 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); |
157 | error = gfs2_glock_nq_atime(&gh); | 157 | error = gfs2_glock_nq(&gh); |
158 | if (error) | 158 | if (error) |
159 | return error; | 159 | return error; |
160 | 160 | ||
@@ -351,8 +351,8 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
351 | struct gfs2_alloc *al; | 351 | struct gfs2_alloc *al; |
352 | int ret; | 352 | int ret; |
353 | 353 | ||
354 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &gh); | 354 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); |
355 | ret = gfs2_glock_nq_atime(&gh); | 355 | ret = gfs2_glock_nq(&gh); |
356 | if (ret) | 356 | if (ret) |
357 | goto out; | 357 | goto out; |
358 | 358 | ||
@@ -434,8 +434,8 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
434 | struct gfs2_holder i_gh; | 434 | struct gfs2_holder i_gh; |
435 | int error; | 435 | int error; |
436 | 436 | ||
437 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); | 437 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); |
438 | error = gfs2_glock_nq_atime(&i_gh); | 438 | error = gfs2_glock_nq(&i_gh); |
439 | if (error) { | 439 | if (error) { |
440 | gfs2_holder_uninit(&i_gh); | 440 | gfs2_holder_uninit(&i_gh); |
441 | return error; | 441 | return error; |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b4d1d6490633..b117fcf2c4f5 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -40,6 +40,44 @@ | |||
40 | #define DO 0 | 40 | #define DO 0 |
41 | #define UNDO 1 | 41 | #define UNDO 1 |
42 | 42 | ||
43 | static const u32 gfs2_old_fs_formats[] = { | ||
44 | 0 | ||
45 | }; | ||
46 | |||
47 | static const u32 gfs2_old_multihost_formats[] = { | ||
48 | 0 | ||
49 | }; | ||
50 | |||
51 | /** | ||
52 | * gfs2_tune_init - Fill a gfs2_tune structure with default values | ||
53 | * @gt: tune | ||
54 | * | ||
55 | */ | ||
56 | |||
57 | static void gfs2_tune_init(struct gfs2_tune *gt) | ||
58 | { | ||
59 | spin_lock_init(>->gt_spin); | ||
60 | |||
61 | gt->gt_demote_secs = 300; | ||
62 | gt->gt_incore_log_blocks = 1024; | ||
63 | gt->gt_log_flush_secs = 60; | ||
64 | gt->gt_recoverd_secs = 60; | ||
65 | gt->gt_logd_secs = 1; | ||
66 | gt->gt_quotad_secs = 5; | ||
67 | gt->gt_quota_simul_sync = 64; | ||
68 | gt->gt_quota_warn_period = 10; | ||
69 | gt->gt_quota_scale_num = 1; | ||
70 | gt->gt_quota_scale_den = 1; | ||
71 | gt->gt_quota_cache_secs = 300; | ||
72 | gt->gt_quota_quantum = 60; | ||
73 | gt->gt_new_files_jdata = 0; | ||
74 | gt->gt_max_readahead = 1 << 18; | ||
75 | gt->gt_stall_secs = 600; | ||
76 | gt->gt_complain_secs = 10; | ||
77 | gt->gt_statfs_quantum = 30; | ||
78 | gt->gt_statfs_slow = 0; | ||
79 | } | ||
80 | |||
43 | static struct gfs2_sbd *init_sbd(struct super_block *sb) | 81 | static struct gfs2_sbd *init_sbd(struct super_block *sb) |
44 | { | 82 | { |
45 | struct gfs2_sbd *sdp; | 83 | struct gfs2_sbd *sdp; |
@@ -96,21 +134,271 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
96 | return sdp; | 134 | return sdp; |
97 | } | 135 | } |
98 | 136 | ||
99 | static void init_vfs(struct super_block *sb, unsigned noatime) | 137 | |
138 | /** | ||
139 | * gfs2_check_sb - Check superblock | ||
140 | * @sdp: the filesystem | ||
141 | * @sb: The superblock | ||
142 | * @silent: Don't print a message if the check fails | ||
143 | * | ||
144 | * Checks the version code of the FS is one that we understand how to | ||
145 | * read and that the sizes of the various on-disk structures have not | ||
146 | * changed. | ||
147 | */ | ||
148 | |||
149 | static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) | ||
100 | { | 150 | { |
101 | struct gfs2_sbd *sdp = sb->s_fs_info; | 151 | unsigned int x; |
102 | 152 | ||
103 | sb->s_magic = GFS2_MAGIC; | 153 | if (sb->sb_magic != GFS2_MAGIC || |
104 | sb->s_op = &gfs2_super_ops; | 154 | sb->sb_type != GFS2_METATYPE_SB) { |
105 | sb->s_export_op = &gfs2_export_ops; | 155 | if (!silent) |
106 | sb->s_time_gran = 1; | 156 | printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); |
107 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 157 | return -EINVAL; |
158 | } | ||
159 | |||
160 | /* If format numbers match exactly, we're done. */ | ||
161 | |||
162 | if (sb->sb_fs_format == GFS2_FORMAT_FS && | ||
163 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) | ||
164 | return 0; | ||
165 | |||
166 | if (sb->sb_fs_format != GFS2_FORMAT_FS) { | ||
167 | for (x = 0; gfs2_old_fs_formats[x]; x++) | ||
168 | if (gfs2_old_fs_formats[x] == sb->sb_fs_format) | ||
169 | break; | ||
170 | |||
171 | if (!gfs2_old_fs_formats[x]) { | ||
172 | printk(KERN_WARNING | ||
173 | "GFS2: code version (%u, %u) is incompatible " | ||
174 | "with ondisk format (%u, %u)\n", | ||
175 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
176 | sb->sb_fs_format, sb->sb_multihost_format); | ||
177 | printk(KERN_WARNING | ||
178 | "GFS2: I don't know how to upgrade this FS\n"); | ||
179 | return -EINVAL; | ||
180 | } | ||
181 | } | ||
182 | |||
183 | if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { | ||
184 | for (x = 0; gfs2_old_multihost_formats[x]; x++) | ||
185 | if (gfs2_old_multihost_formats[x] == | ||
186 | sb->sb_multihost_format) | ||
187 | break; | ||
188 | |||
189 | if (!gfs2_old_multihost_formats[x]) { | ||
190 | printk(KERN_WARNING | ||
191 | "GFS2: code version (%u, %u) is incompatible " | ||
192 | "with ondisk format (%u, %u)\n", | ||
193 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
194 | sb->sb_fs_format, sb->sb_multihost_format); | ||
195 | printk(KERN_WARNING | ||
196 | "GFS2: I don't know how to upgrade this FS\n"); | ||
197 | return -EINVAL; | ||
198 | } | ||
199 | } | ||
200 | |||
201 | if (!sdp->sd_args.ar_upgrade) { | ||
202 | printk(KERN_WARNING | ||
203 | "GFS2: code version (%u, %u) is incompatible " | ||
204 | "with ondisk format (%u, %u)\n", | ||
205 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
206 | sb->sb_fs_format, sb->sb_multihost_format); | ||
207 | printk(KERN_INFO | ||
208 | "GFS2: Use the \"upgrade\" mount option to upgrade " | ||
209 | "the FS\n"); | ||
210 | printk(KERN_INFO "GFS2: See the manual for more details\n"); | ||
211 | return -EINVAL; | ||
212 | } | ||
213 | |||
214 | return 0; | ||
215 | } | ||
216 | |||
217 | static void end_bio_io_page(struct bio *bio, int error) | ||
218 | { | ||
219 | struct page *page = bio->bi_private; | ||
108 | 220 | ||
109 | if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME)) | 221 | if (!error) |
110 | set_bit(noatime, &sdp->sd_flags); | 222 | SetPageUptodate(page); |
223 | else | ||
224 | printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); | ||
225 | unlock_page(page); | ||
226 | } | ||
227 | |||
228 | static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) | ||
229 | { | ||
230 | const struct gfs2_sb *str = buf; | ||
231 | |||
232 | sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); | ||
233 | sb->sb_type = be32_to_cpu(str->sb_header.mh_type); | ||
234 | sb->sb_format = be32_to_cpu(str->sb_header.mh_format); | ||
235 | sb->sb_fs_format = be32_to_cpu(str->sb_fs_format); | ||
236 | sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format); | ||
237 | sb->sb_bsize = be32_to_cpu(str->sb_bsize); | ||
238 | sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift); | ||
239 | sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr); | ||
240 | sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino); | ||
241 | sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr); | ||
242 | sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino); | ||
243 | |||
244 | memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); | ||
245 | memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); | ||
246 | } | ||
247 | |||
248 | /** | ||
249 | * gfs2_read_super - Read the gfs2 super block from disk | ||
250 | * @sdp: The GFS2 super block | ||
251 | * @sector: The location of the super block | ||
252 | * @error: The error code to return | ||
253 | * | ||
254 | * This uses the bio functions to read the super block from disk | ||
255 | * because we want to be 100% sure that we never read cached data. | ||
256 | * A super block is read twice only during each GFS2 mount and is | ||
257 | * never written to by the filesystem. The first time its read no | ||
258 | * locks are held, and the only details which are looked at are those | ||
259 | * relating to the locking protocol. Once locking is up and working, | ||
260 | * the sb is read again under the lock to establish the location of | ||
261 | * the master directory (contains pointers to journals etc) and the | ||
262 | * root directory. | ||
263 | * | ||
264 | * Returns: 0 on success or error | ||
265 | */ | ||
266 | |||
267 | static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) | ||
268 | { | ||
269 | struct super_block *sb = sdp->sd_vfs; | ||
270 | struct gfs2_sb *p; | ||
271 | struct page *page; | ||
272 | struct bio *bio; | ||
273 | |||
274 | page = alloc_page(GFP_NOFS); | ||
275 | if (unlikely(!page)) | ||
276 | return -ENOBUFS; | ||
277 | |||
278 | ClearPageUptodate(page); | ||
279 | ClearPageDirty(page); | ||
280 | lock_page(page); | ||
281 | |||
282 | bio = bio_alloc(GFP_NOFS, 1); | ||
283 | if (unlikely(!bio)) { | ||
284 | __free_page(page); | ||
285 | return -ENOBUFS; | ||
286 | } | ||
111 | 287 | ||
112 | /* Don't let the VFS update atimes. GFS2 handles this itself. */ | 288 | bio->bi_sector = sector * (sb->s_blocksize >> 9); |
113 | sb->s_flags |= MS_NOATIME | MS_NODIRATIME; | 289 | bio->bi_bdev = sb->s_bdev; |
290 | bio_add_page(bio, page, PAGE_SIZE, 0); | ||
291 | |||
292 | bio->bi_end_io = end_bio_io_page; | ||
293 | bio->bi_private = page; | ||
294 | submit_bio(READ_SYNC | (1 << BIO_RW_META), bio); | ||
295 | wait_on_page_locked(page); | ||
296 | bio_put(bio); | ||
297 | if (!PageUptodate(page)) { | ||
298 | __free_page(page); | ||
299 | return -EIO; | ||
300 | } | ||
301 | p = kmap(page); | ||
302 | gfs2_sb_in(&sdp->sd_sb, p); | ||
303 | kunmap(page); | ||
304 | __free_page(page); | ||
305 | return 0; | ||
306 | } | ||
307 | /** | ||
308 | * gfs2_read_sb - Read super block | ||
309 | * @sdp: The GFS2 superblock | ||
310 | * @gl: the glock for the superblock (assumed to be held) | ||
311 | * @silent: Don't print message if mount fails | ||
312 | * | ||
313 | */ | ||
314 | |||
315 | static int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) | ||
316 | { | ||
317 | u32 hash_blocks, ind_blocks, leaf_blocks; | ||
318 | u32 tmp_blocks; | ||
319 | unsigned int x; | ||
320 | int error; | ||
321 | |||
322 | error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); | ||
323 | if (error) { | ||
324 | if (!silent) | ||
325 | fs_err(sdp, "can't read superblock\n"); | ||
326 | return error; | ||
327 | } | ||
328 | |||
329 | error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); | ||
330 | if (error) | ||
331 | return error; | ||
332 | |||
333 | sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - | ||
334 | GFS2_BASIC_BLOCK_SHIFT; | ||
335 | sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; | ||
336 | sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - | ||
337 | sizeof(struct gfs2_dinode)) / sizeof(u64); | ||
338 | sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - | ||
339 | sizeof(struct gfs2_meta_header)) / sizeof(u64); | ||
340 | sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); | ||
341 | sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2; | ||
342 | sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; | ||
343 | sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64); | ||
344 | sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - | ||
345 | sizeof(struct gfs2_meta_header)) / | ||
346 | sizeof(struct gfs2_quota_change); | ||
347 | |||
348 | /* Compute maximum reservation required to add a entry to a directory */ | ||
349 | |||
350 | hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH), | ||
351 | sdp->sd_jbsize); | ||
352 | |||
353 | ind_blocks = 0; | ||
354 | for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { | ||
355 | tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs); | ||
356 | ind_blocks += tmp_blocks; | ||
357 | } | ||
358 | |||
359 | leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH; | ||
360 | |||
361 | sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; | ||
362 | |||
363 | sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - | ||
364 | sizeof(struct gfs2_dinode); | ||
365 | sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; | ||
366 | for (x = 2;; x++) { | ||
367 | u64 space, d; | ||
368 | u32 m; | ||
369 | |||
370 | space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; | ||
371 | d = space; | ||
372 | m = do_div(d, sdp->sd_inptrs); | ||
373 | |||
374 | if (d != sdp->sd_heightsize[x - 1] || m) | ||
375 | break; | ||
376 | sdp->sd_heightsize[x] = space; | ||
377 | } | ||
378 | sdp->sd_max_height = x; | ||
379 | sdp->sd_heightsize[x] = ~0; | ||
380 | gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); | ||
381 | |||
382 | sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - | ||
383 | sizeof(struct gfs2_dinode); | ||
384 | sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; | ||
385 | for (x = 2;; x++) { | ||
386 | u64 space, d; | ||
387 | u32 m; | ||
388 | |||
389 | space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; | ||
390 | d = space; | ||
391 | m = do_div(d, sdp->sd_inptrs); | ||
392 | |||
393 | if (d != sdp->sd_jheightsize[x - 1] || m) | ||
394 | break; | ||
395 | sdp->sd_jheightsize[x] = space; | ||
396 | } | ||
397 | sdp->sd_max_jheight = x; | ||
398 | sdp->sd_jheightsize[x] = ~0; | ||
399 | gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); | ||
400 | |||
401 | return 0; | ||
114 | } | 402 | } |
115 | 403 | ||
116 | static int init_names(struct gfs2_sbd *sdp, int silent) | 404 | static int init_names(struct gfs2_sbd *sdp, int silent) |
@@ -224,51 +512,59 @@ fail: | |||
224 | return error; | 512 | return error; |
225 | } | 513 | } |
226 | 514 | ||
227 | static inline struct inode *gfs2_lookup_root(struct super_block *sb, | 515 | static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, |
228 | u64 no_addr) | 516 | u64 no_addr, const char *name) |
229 | { | 517 | { |
230 | return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); | 518 | struct gfs2_sbd *sdp = sb->s_fs_info; |
519 | struct dentry *dentry; | ||
520 | struct inode *inode; | ||
521 | |||
522 | inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); | ||
523 | if (IS_ERR(inode)) { | ||
524 | fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); | ||
525 | return PTR_ERR(inode); | ||
526 | } | ||
527 | dentry = d_alloc_root(inode); | ||
528 | if (!dentry) { | ||
529 | fs_err(sdp, "can't alloc %s dentry\n", name); | ||
530 | iput(inode); | ||
531 | return -ENOMEM; | ||
532 | } | ||
533 | dentry->d_op = &gfs2_dops; | ||
534 | *dptr = dentry; | ||
535 | return 0; | ||
231 | } | 536 | } |
232 | 537 | ||
233 | static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) | 538 | static int init_sb(struct gfs2_sbd *sdp, int silent) |
234 | { | 539 | { |
235 | struct super_block *sb = sdp->sd_vfs; | 540 | struct super_block *sb = sdp->sd_vfs; |
236 | struct gfs2_holder sb_gh; | 541 | struct gfs2_holder sb_gh; |
237 | u64 no_addr; | 542 | u64 no_addr; |
238 | struct inode *inode; | 543 | int ret; |
239 | int error = 0; | ||
240 | 544 | ||
241 | if (undo) { | 545 | ret = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops, |
242 | if (sb->s_root) { | 546 | LM_ST_SHARED, 0, &sb_gh); |
243 | dput(sb->s_root); | 547 | if (ret) { |
244 | sb->s_root = NULL; | 548 | fs_err(sdp, "can't acquire superblock glock: %d\n", ret); |
245 | } | 549 | return ret; |
246 | return 0; | ||
247 | } | 550 | } |
248 | 551 | ||
249 | error = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops, | 552 | ret = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); |
250 | LM_ST_SHARED, 0, &sb_gh); | 553 | if (ret) { |
251 | if (error) { | 554 | fs_err(sdp, "can't read superblock: %d\n", ret); |
252 | fs_err(sdp, "can't acquire superblock glock: %d\n", error); | ||
253 | return error; | ||
254 | } | ||
255 | |||
256 | error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); | ||
257 | if (error) { | ||
258 | fs_err(sdp, "can't read superblock: %d\n", error); | ||
259 | goto out; | 555 | goto out; |
260 | } | 556 | } |
261 | 557 | ||
262 | /* Set up the buffer cache and SB for real */ | 558 | /* Set up the buffer cache and SB for real */ |
263 | if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { | 559 | if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { |
264 | error = -EINVAL; | 560 | ret = -EINVAL; |
265 | fs_err(sdp, "FS block size (%u) is too small for device " | 561 | fs_err(sdp, "FS block size (%u) is too small for device " |
266 | "block size (%u)\n", | 562 | "block size (%u)\n", |
267 | sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); | 563 | sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); |
268 | goto out; | 564 | goto out; |
269 | } | 565 | } |
270 | if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { | 566 | if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { |
271 | error = -EINVAL; | 567 | ret = -EINVAL; |
272 | fs_err(sdp, "FS block size (%u) is too big for machine " | 568 | fs_err(sdp, "FS block size (%u) is too big for machine " |
273 | "page size (%u)\n", | 569 | "page size (%u)\n", |
274 | sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE); | 570 | sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE); |
@@ -278,26 +574,21 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) | |||
278 | 574 | ||
279 | /* Get the root inode */ | 575 | /* Get the root inode */ |
280 | no_addr = sdp->sd_sb.sb_root_dir.no_addr; | 576 | no_addr = sdp->sd_sb.sb_root_dir.no_addr; |
281 | if (sb->s_type == &gfs2meta_fs_type) | 577 | ret = gfs2_lookup_root(sb, &sdp->sd_root_dir, no_addr, "root"); |
282 | no_addr = sdp->sd_sb.sb_master_dir.no_addr; | 578 | if (ret) |
283 | inode = gfs2_lookup_root(sb, no_addr); | ||
284 | if (IS_ERR(inode)) { | ||
285 | error = PTR_ERR(inode); | ||
286 | fs_err(sdp, "can't read in root inode: %d\n", error); | ||
287 | goto out; | 579 | goto out; |
288 | } | ||
289 | 580 | ||
290 | sb->s_root = d_alloc_root(inode); | 581 | /* Get the master inode */ |
291 | if (!sb->s_root) { | 582 | no_addr = sdp->sd_sb.sb_master_dir.no_addr; |
292 | fs_err(sdp, "can't get root dentry\n"); | 583 | ret = gfs2_lookup_root(sb, &sdp->sd_master_dir, no_addr, "master"); |
293 | error = -ENOMEM; | 584 | if (ret) { |
294 | iput(inode); | 585 | dput(sdp->sd_root_dir); |
295 | } else | 586 | goto out; |
296 | sb->s_root->d_op = &gfs2_dops; | 587 | } |
297 | 588 | sb->s_root = dget(sdp->sd_args.ar_meta ? sdp->sd_master_dir : sdp->sd_root_dir); | |
298 | out: | 589 | out: |
299 | gfs2_glock_dq_uninit(&sb_gh); | 590 | gfs2_glock_dq_uninit(&sb_gh); |
300 | return error; | 591 | return ret; |
301 | } | 592 | } |
302 | 593 | ||
303 | /** | 594 | /** |
@@ -372,6 +663,7 @@ static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) | |||
372 | 663 | ||
373 | static int init_journal(struct gfs2_sbd *sdp, int undo) | 664 | static int init_journal(struct gfs2_sbd *sdp, int undo) |
374 | { | 665 | { |
666 | struct inode *master = sdp->sd_master_dir->d_inode; | ||
375 | struct gfs2_holder ji_gh; | 667 | struct gfs2_holder ji_gh; |
376 | struct task_struct *p; | 668 | struct task_struct *p; |
377 | struct gfs2_inode *ip; | 669 | struct gfs2_inode *ip; |
@@ -383,7 +675,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) | |||
383 | goto fail_recoverd; | 675 | goto fail_recoverd; |
384 | } | 676 | } |
385 | 677 | ||
386 | sdp->sd_jindex = gfs2_lookup_simple(sdp->sd_master_dir, "jindex"); | 678 | sdp->sd_jindex = gfs2_lookup_simple(master, "jindex"); |
387 | if (IS_ERR(sdp->sd_jindex)) { | 679 | if (IS_ERR(sdp->sd_jindex)) { |
388 | fs_err(sdp, "can't lookup journal index: %d\n", error); | 680 | fs_err(sdp, "can't lookup journal index: %d\n", error); |
389 | return PTR_ERR(sdp->sd_jindex); | 681 | return PTR_ERR(sdp->sd_jindex); |
@@ -506,25 +798,17 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
506 | { | 798 | { |
507 | int error = 0; | 799 | int error = 0; |
508 | struct gfs2_inode *ip; | 800 | struct gfs2_inode *ip; |
509 | struct inode *inode; | 801 | struct inode *master = sdp->sd_master_dir->d_inode; |
510 | 802 | ||
511 | if (undo) | 803 | if (undo) |
512 | goto fail_qinode; | 804 | goto fail_qinode; |
513 | 805 | ||
514 | inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr); | ||
515 | if (IS_ERR(inode)) { | ||
516 | error = PTR_ERR(inode); | ||
517 | fs_err(sdp, "can't read in master directory: %d\n", error); | ||
518 | goto fail; | ||
519 | } | ||
520 | sdp->sd_master_dir = inode; | ||
521 | |||
522 | error = init_journal(sdp, undo); | 806 | error = init_journal(sdp, undo); |
523 | if (error) | 807 | if (error) |
524 | goto fail_master; | 808 | goto fail; |
525 | 809 | ||
526 | /* Read in the master inode number inode */ | 810 | /* Read in the master inode number inode */ |
527 | sdp->sd_inum_inode = gfs2_lookup_simple(sdp->sd_master_dir, "inum"); | 811 | sdp->sd_inum_inode = gfs2_lookup_simple(master, "inum"); |
528 | if (IS_ERR(sdp->sd_inum_inode)) { | 812 | if (IS_ERR(sdp->sd_inum_inode)) { |
529 | error = PTR_ERR(sdp->sd_inum_inode); | 813 | error = PTR_ERR(sdp->sd_inum_inode); |
530 | fs_err(sdp, "can't read in inum inode: %d\n", error); | 814 | fs_err(sdp, "can't read in inum inode: %d\n", error); |
@@ -533,7 +817,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
533 | 817 | ||
534 | 818 | ||
535 | /* Read in the master statfs inode */ | 819 | /* Read in the master statfs inode */ |
536 | sdp->sd_statfs_inode = gfs2_lookup_simple(sdp->sd_master_dir, "statfs"); | 820 | sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs"); |
537 | if (IS_ERR(sdp->sd_statfs_inode)) { | 821 | if (IS_ERR(sdp->sd_statfs_inode)) { |
538 | error = PTR_ERR(sdp->sd_statfs_inode); | 822 | error = PTR_ERR(sdp->sd_statfs_inode); |
539 | fs_err(sdp, "can't read in statfs inode: %d\n", error); | 823 | fs_err(sdp, "can't read in statfs inode: %d\n", error); |
@@ -541,7 +825,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
541 | } | 825 | } |
542 | 826 | ||
543 | /* Read in the resource index inode */ | 827 | /* Read in the resource index inode */ |
544 | sdp->sd_rindex = gfs2_lookup_simple(sdp->sd_master_dir, "rindex"); | 828 | sdp->sd_rindex = gfs2_lookup_simple(master, "rindex"); |
545 | if (IS_ERR(sdp->sd_rindex)) { | 829 | if (IS_ERR(sdp->sd_rindex)) { |
546 | error = PTR_ERR(sdp->sd_rindex); | 830 | error = PTR_ERR(sdp->sd_rindex); |
547 | fs_err(sdp, "can't get resource index inode: %d\n", error); | 831 | fs_err(sdp, "can't get resource index inode: %d\n", error); |
@@ -552,7 +836,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
552 | sdp->sd_rindex_uptodate = 0; | 836 | sdp->sd_rindex_uptodate = 0; |
553 | 837 | ||
554 | /* Read in the quota inode */ | 838 | /* Read in the quota inode */ |
555 | sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota"); | 839 | sdp->sd_quota_inode = gfs2_lookup_simple(master, "quota"); |
556 | if (IS_ERR(sdp->sd_quota_inode)) { | 840 | if (IS_ERR(sdp->sd_quota_inode)) { |
557 | error = PTR_ERR(sdp->sd_quota_inode); | 841 | error = PTR_ERR(sdp->sd_quota_inode); |
558 | fs_err(sdp, "can't get quota file inode: %d\n", error); | 842 | fs_err(sdp, "can't get quota file inode: %d\n", error); |
@@ -571,8 +855,6 @@ fail_inum: | |||
571 | iput(sdp->sd_inum_inode); | 855 | iput(sdp->sd_inum_inode); |
572 | fail_journal: | 856 | fail_journal: |
573 | init_journal(sdp, UNDO); | 857 | init_journal(sdp, UNDO); |
574 | fail_master: | ||
575 | iput(sdp->sd_master_dir); | ||
576 | fail: | 858 | fail: |
577 | return error; | 859 | return error; |
578 | } | 860 | } |
@@ -583,6 +865,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) | |||
583 | char buf[30]; | 865 | char buf[30]; |
584 | int error = 0; | 866 | int error = 0; |
585 | struct gfs2_inode *ip; | 867 | struct gfs2_inode *ip; |
868 | struct inode *master = sdp->sd_master_dir->d_inode; | ||
586 | 869 | ||
587 | if (sdp->sd_args.ar_spectator) | 870 | if (sdp->sd_args.ar_spectator) |
588 | return 0; | 871 | return 0; |
@@ -590,7 +873,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) | |||
590 | if (undo) | 873 | if (undo) |
591 | goto fail_qc_gh; | 874 | goto fail_qc_gh; |
592 | 875 | ||
593 | pn = gfs2_lookup_simple(sdp->sd_master_dir, "per_node"); | 876 | pn = gfs2_lookup_simple(master, "per_node"); |
594 | if (IS_ERR(pn)) { | 877 | if (IS_ERR(pn)) { |
595 | error = PTR_ERR(pn); | 878 | error = PTR_ERR(pn); |
596 | fs_err(sdp, "can't find per_node directory: %d\n", error); | 879 | fs_err(sdp, "can't find per_node directory: %d\n", error); |
@@ -800,7 +1083,11 @@ static int fill_super(struct super_block *sb, void *data, int silent) | |||
800 | goto fail; | 1083 | goto fail; |
801 | } | 1084 | } |
802 | 1085 | ||
803 | init_vfs(sb, SDF_NOATIME); | 1086 | sb->s_magic = GFS2_MAGIC; |
1087 | sb->s_op = &gfs2_super_ops; | ||
1088 | sb->s_export_op = &gfs2_export_ops; | ||
1089 | sb->s_time_gran = 1; | ||
1090 | sb->s_maxbytes = MAX_LFS_FILESIZE; | ||
804 | 1091 | ||
805 | /* Set up the buffer cache and fill in some fake block size values | 1092 | /* Set up the buffer cache and fill in some fake block size values |
806 | to allow us to read-in the on-disk superblock. */ | 1093 | to allow us to read-in the on-disk superblock. */ |
@@ -828,7 +1115,7 @@ static int fill_super(struct super_block *sb, void *data, int silent) | |||
828 | if (error) | 1115 | if (error) |
829 | goto fail_lm; | 1116 | goto fail_lm; |
830 | 1117 | ||
831 | error = init_sb(sdp, silent, DO); | 1118 | error = init_sb(sdp, silent); |
832 | if (error) | 1119 | if (error) |
833 | goto fail_locking; | 1120 | goto fail_locking; |
834 | 1121 | ||
@@ -869,7 +1156,11 @@ fail_per_node: | |||
869 | fail_inodes: | 1156 | fail_inodes: |
870 | init_inodes(sdp, UNDO); | 1157 | init_inodes(sdp, UNDO); |
871 | fail_sb: | 1158 | fail_sb: |
872 | init_sb(sdp, 0, UNDO); | 1159 | if (sdp->sd_root_dir) |
1160 | dput(sdp->sd_root_dir); | ||
1161 | if (sdp->sd_master_dir) | ||
1162 | dput(sdp->sd_master_dir); | ||
1163 | sb->s_root = NULL; | ||
873 | fail_locking: | 1164 | fail_locking: |
874 | init_locking(sdp, &mount_gh, UNDO); | 1165 | init_locking(sdp, &mount_gh, UNDO); |
875 | fail_lm: | 1166 | fail_lm: |
@@ -887,151 +1178,63 @@ fail: | |||
887 | } | 1178 | } |
888 | 1179 | ||
889 | static int gfs2_get_sb(struct file_system_type *fs_type, int flags, | 1180 | static int gfs2_get_sb(struct file_system_type *fs_type, int flags, |
890 | const char *dev_name, void *data, struct vfsmount *mnt) | 1181 | const char *dev_name, void *data, struct vfsmount *mnt) |
891 | { | 1182 | { |
892 | struct super_block *sb; | 1183 | return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); |
893 | struct gfs2_sbd *sdp; | ||
894 | int error = get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); | ||
895 | if (error) | ||
896 | goto out; | ||
897 | sb = mnt->mnt_sb; | ||
898 | sdp = sb->s_fs_info; | ||
899 | sdp->sd_gfs2mnt = mnt; | ||
900 | out: | ||
901 | return error; | ||
902 | } | 1184 | } |
903 | 1185 | ||
904 | static int fill_super_meta(struct super_block *sb, struct super_block *new, | 1186 | static struct super_block *get_gfs2_sb(const char *dev_name) |
905 | void *data, int silent) | ||
906 | { | 1187 | { |
907 | struct gfs2_sbd *sdp = sb->s_fs_info; | 1188 | struct super_block *sb; |
908 | struct inode *inode; | ||
909 | int error = 0; | ||
910 | |||
911 | new->s_fs_info = sdp; | ||
912 | sdp->sd_vfs_meta = sb; | ||
913 | |||
914 | init_vfs(new, SDF_NOATIME); | ||
915 | |||
916 | /* Get the master inode */ | ||
917 | inode = igrab(sdp->sd_master_dir); | ||
918 | |||
919 | new->s_root = d_alloc_root(inode); | ||
920 | if (!new->s_root) { | ||
921 | fs_err(sdp, "can't get root dentry\n"); | ||
922 | error = -ENOMEM; | ||
923 | iput(inode); | ||
924 | } else | ||
925 | new->s_root->d_op = &gfs2_dops; | ||
926 | |||
927 | return error; | ||
928 | } | ||
929 | |||
930 | static int set_bdev_super(struct super_block *s, void *data) | ||
931 | { | ||
932 | s->s_bdev = data; | ||
933 | s->s_dev = s->s_bdev->bd_dev; | ||
934 | return 0; | ||
935 | } | ||
936 | |||
937 | static int test_bdev_super(struct super_block *s, void *data) | ||
938 | { | ||
939 | return s->s_bdev == data; | ||
940 | } | ||
941 | |||
942 | static struct super_block* get_gfs2_sb(const char *dev_name) | ||
943 | { | ||
944 | struct kstat stat; | ||
945 | struct nameidata nd; | 1189 | struct nameidata nd; |
946 | struct super_block *sb = NULL, *s; | ||
947 | int error; | 1190 | int error; |
948 | 1191 | ||
949 | error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); | 1192 | error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); |
950 | if (error) { | 1193 | if (error) { |
951 | printk(KERN_WARNING "GFS2: path_lookup on %s returned error\n", | 1194 | printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", |
952 | dev_name); | 1195 | dev_name, error); |
953 | goto out; | 1196 | return NULL; |
954 | } | ||
955 | error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat); | ||
956 | |||
957 | list_for_each_entry(s, &gfs2_fs_type.fs_supers, s_instances) { | ||
958 | if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) || | ||
959 | (S_ISDIR(stat.mode) && | ||
960 | s == nd.path.dentry->d_inode->i_sb)) { | ||
961 | sb = s; | ||
962 | goto free_nd; | ||
963 | } | ||
964 | } | 1197 | } |
965 | 1198 | sb = nd.path.dentry->d_inode->i_sb; | |
966 | printk(KERN_WARNING "GFS2: Unrecognized block device or " | 1199 | if (sb && (sb->s_type == &gfs2_fs_type)) |
967 | "mount point %s\n", dev_name); | 1200 | atomic_inc(&sb->s_active); |
968 | 1201 | else | |
969 | free_nd: | 1202 | sb = NULL; |
970 | path_put(&nd.path); | 1203 | path_put(&nd.path); |
971 | out: | ||
972 | return sb; | 1204 | return sb; |
973 | } | 1205 | } |
974 | 1206 | ||
975 | static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, | 1207 | static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, |
976 | const char *dev_name, void *data, struct vfsmount *mnt) | 1208 | const char *dev_name, void *data, struct vfsmount *mnt) |
977 | { | 1209 | { |
978 | int error = 0; | 1210 | struct super_block *sb = NULL; |
979 | struct super_block *sb = NULL, *new; | ||
980 | struct gfs2_sbd *sdp; | 1211 | struct gfs2_sbd *sdp; |
981 | 1212 | ||
982 | sb = get_gfs2_sb(dev_name); | 1213 | sb = get_gfs2_sb(dev_name); |
983 | if (!sb) { | 1214 | if (!sb) { |
984 | printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); | 1215 | printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); |
985 | error = -ENOENT; | 1216 | return -ENOENT; |
986 | goto error; | ||
987 | } | 1217 | } |
988 | sdp = sb->s_fs_info; | 1218 | sdp = sb->s_fs_info; |
989 | if (sdp->sd_vfs_meta) { | 1219 | mnt->mnt_sb = sb; |
990 | printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n"); | 1220 | mnt->mnt_root = dget(sdp->sd_master_dir); |
991 | error = -EBUSY; | 1221 | return 0; |
992 | goto error; | ||
993 | } | ||
994 | down(&sb->s_bdev->bd_mount_sem); | ||
995 | new = sget(fs_type, test_bdev_super, set_bdev_super, sb->s_bdev); | ||
996 | up(&sb->s_bdev->bd_mount_sem); | ||
997 | if (IS_ERR(new)) { | ||
998 | error = PTR_ERR(new); | ||
999 | goto error; | ||
1000 | } | ||
1001 | new->s_flags = flags; | ||
1002 | strlcpy(new->s_id, sb->s_id, sizeof(new->s_id)); | ||
1003 | sb_set_blocksize(new, sb->s_blocksize); | ||
1004 | error = fill_super_meta(sb, new, data, flags & MS_SILENT ? 1 : 0); | ||
1005 | if (error) { | ||
1006 | up_write(&new->s_umount); | ||
1007 | deactivate_super(new); | ||
1008 | goto error; | ||
1009 | } | ||
1010 | |||
1011 | new->s_flags |= MS_ACTIVE; | ||
1012 | |||
1013 | /* Grab a reference to the gfs2 mount point */ | ||
1014 | atomic_inc(&sdp->sd_gfs2mnt->mnt_count); | ||
1015 | return simple_set_mnt(mnt, new); | ||
1016 | error: | ||
1017 | return error; | ||
1018 | } | 1222 | } |
1019 | 1223 | ||
1020 | static void gfs2_kill_sb(struct super_block *sb) | 1224 | static void gfs2_kill_sb(struct super_block *sb) |
1021 | { | 1225 | { |
1022 | if (sb->s_fs_info) { | 1226 | struct gfs2_sbd *sdp = sb->s_fs_info; |
1023 | gfs2_delete_debugfs_file(sb->s_fs_info); | 1227 | if (sdp) { |
1024 | gfs2_meta_syncfs(sb->s_fs_info); | 1228 | gfs2_meta_syncfs(sdp); |
1229 | dput(sdp->sd_root_dir); | ||
1230 | dput(sdp->sd_master_dir); | ||
1231 | sdp->sd_root_dir = NULL; | ||
1232 | sdp->sd_master_dir = NULL; | ||
1025 | } | 1233 | } |
1234 | shrink_dcache_sb(sb); | ||
1026 | kill_block_super(sb); | 1235 | kill_block_super(sb); |
1027 | } | 1236 | if (sdp) |
1028 | 1237 | gfs2_delete_debugfs_file(sdp); | |
1029 | static void gfs2_kill_sb_meta(struct super_block *sb) | ||
1030 | { | ||
1031 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
1032 | generic_shutdown_super(sb); | ||
1033 | sdp->sd_vfs_meta = NULL; | ||
1034 | atomic_dec(&sdp->sd_gfs2mnt->mnt_count); | ||
1035 | } | 1238 | } |
1036 | 1239 | ||
1037 | struct file_system_type gfs2_fs_type = { | 1240 | struct file_system_type gfs2_fs_type = { |
@@ -1046,7 +1249,6 @@ struct file_system_type gfs2meta_fs_type = { | |||
1046 | .name = "gfs2meta", | 1249 | .name = "gfs2meta", |
1047 | .fs_flags = FS_REQUIRES_DEV, | 1250 | .fs_flags = FS_REQUIRES_DEV, |
1048 | .get_sb = gfs2_get_sb_meta, | 1251 | .get_sb = gfs2_get_sb_meta, |
1049 | .kill_sb = gfs2_kill_sb_meta, | ||
1050 | .owner = THIS_MODULE, | 1252 | .owner = THIS_MODULE, |
1051 | }; | 1253 | }; |
1052 | 1254 | ||
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index e2c62f73a778..534e1e2c65ca 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -159,9 +159,13 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
159 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); | 159 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); |
160 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); | 160 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); |
161 | 161 | ||
162 | error = gfs2_glock_nq_m(2, ghs); | 162 | error = gfs2_glock_nq(ghs); /* parent */ |
163 | if (error) | 163 | if (error) |
164 | goto out; | 164 | goto out_parent; |
165 | |||
166 | error = gfs2_glock_nq(ghs + 1); /* child */ | ||
167 | if (error) | ||
168 | goto out_child; | ||
165 | 169 | ||
166 | error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); | 170 | error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); |
167 | if (error) | 171 | if (error) |
@@ -245,8 +249,10 @@ out_alloc: | |||
245 | if (alloc_required) | 249 | if (alloc_required) |
246 | gfs2_alloc_put(dip); | 250 | gfs2_alloc_put(dip); |
247 | out_gunlock: | 251 | out_gunlock: |
248 | gfs2_glock_dq_m(2, ghs); | 252 | gfs2_glock_dq(ghs + 1); |
249 | out: | 253 | out_child: |
254 | gfs2_glock_dq(ghs); | ||
255 | out_parent: | ||
250 | gfs2_holder_uninit(ghs); | 256 | gfs2_holder_uninit(ghs); |
251 | gfs2_holder_uninit(ghs + 1); | 257 | gfs2_holder_uninit(ghs + 1); |
252 | if (!error) { | 258 | if (!error) { |
@@ -302,7 +308,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | |||
302 | 308 | ||
303 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); | 309 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); |
304 | if (error) | 310 | if (error) |
305 | goto out_rgrp; | 311 | goto out_gunlock; |
306 | 312 | ||
307 | error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); | 313 | error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); |
308 | if (error) | 314 | if (error) |
@@ -316,6 +322,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | |||
316 | 322 | ||
317 | out_end_trans: | 323 | out_end_trans: |
318 | gfs2_trans_end(sdp); | 324 | gfs2_trans_end(sdp); |
325 | out_gunlock: | ||
319 | gfs2_glock_dq(ghs + 2); | 326 | gfs2_glock_dq(ghs + 2); |
320 | out_rgrp: | 327 | out_rgrp: |
321 | gfs2_holder_uninit(ghs + 2); | 328 | gfs2_holder_uninit(ghs + 2); |
@@ -485,7 +492,6 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
485 | struct gfs2_holder ri_gh; | 492 | struct gfs2_holder ri_gh; |
486 | int error; | 493 | int error; |
487 | 494 | ||
488 | |||
489 | error = gfs2_rindex_hold(sdp, &ri_gh); | 495 | error = gfs2_rindex_hold(sdp, &ri_gh); |
490 | if (error) | 496 | if (error) |
491 | return error; | 497 | return error; |
@@ -495,9 +501,17 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
495 | rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); | 501 | rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); |
496 | gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); | 502 | gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); |
497 | 503 | ||
498 | error = gfs2_glock_nq_m(3, ghs); | 504 | error = gfs2_glock_nq(ghs); /* parent */ |
499 | if (error) | 505 | if (error) |
500 | goto out; | 506 | goto out_parent; |
507 | |||
508 | error = gfs2_glock_nq(ghs + 1); /* child */ | ||
509 | if (error) | ||
510 | goto out_child; | ||
511 | |||
512 | error = gfs2_glock_nq(ghs + 2); /* rgrp */ | ||
513 | if (error) | ||
514 | goto out_rgrp; | ||
501 | 515 | ||
502 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); | 516 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); |
503 | if (error) | 517 | if (error) |
@@ -523,11 +537,15 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
523 | gfs2_trans_end(sdp); | 537 | gfs2_trans_end(sdp); |
524 | 538 | ||
525 | out_gunlock: | 539 | out_gunlock: |
526 | gfs2_glock_dq_m(3, ghs); | 540 | gfs2_glock_dq(ghs + 2); |
527 | out: | 541 | out_rgrp: |
528 | gfs2_holder_uninit(ghs); | ||
529 | gfs2_holder_uninit(ghs + 1); | ||
530 | gfs2_holder_uninit(ghs + 2); | 542 | gfs2_holder_uninit(ghs + 2); |
543 | gfs2_glock_dq(ghs + 1); | ||
544 | out_child: | ||
545 | gfs2_holder_uninit(ghs + 1); | ||
546 | gfs2_glock_dq(ghs); | ||
547 | out_parent: | ||
548 | gfs2_holder_uninit(ghs); | ||
531 | gfs2_glock_dq_uninit(&ri_gh); | 549 | gfs2_glock_dq_uninit(&ri_gh); |
532 | return error; | 550 | return error; |
533 | } | 551 | } |
@@ -571,6 +589,54 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, | |||
571 | return 0; | 589 | return 0; |
572 | } | 590 | } |
573 | 591 | ||
592 | /* | ||
593 | * gfs2_ok_to_move - check if it's ok to move a directory to another directory | ||
594 | * @this: move this | ||
595 | * @to: to here | ||
596 | * | ||
597 | * Follow @to back to the root and make sure we don't encounter @this | ||
598 | * Assumes we already hold the rename lock. | ||
599 | * | ||
600 | * Returns: errno | ||
601 | */ | ||
602 | |||
603 | static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | ||
604 | { | ||
605 | struct inode *dir = &to->i_inode; | ||
606 | struct super_block *sb = dir->i_sb; | ||
607 | struct inode *tmp; | ||
608 | struct qstr dotdot; | ||
609 | int error = 0; | ||
610 | |||
611 | gfs2_str2qstr(&dotdot, ".."); | ||
612 | |||
613 | igrab(dir); | ||
614 | |||
615 | for (;;) { | ||
616 | if (dir == &this->i_inode) { | ||
617 | error = -EINVAL; | ||
618 | break; | ||
619 | } | ||
620 | if (dir == sb->s_root->d_inode) { | ||
621 | error = 0; | ||
622 | break; | ||
623 | } | ||
624 | |||
625 | tmp = gfs2_lookupi(dir, &dotdot, 1); | ||
626 | if (IS_ERR(tmp)) { | ||
627 | error = PTR_ERR(tmp); | ||
628 | break; | ||
629 | } | ||
630 | |||
631 | iput(dir); | ||
632 | dir = tmp; | ||
633 | } | ||
634 | |||
635 | iput(dir); | ||
636 | |||
637 | return error; | ||
638 | } | ||
639 | |||
574 | /** | 640 | /** |
575 | * gfs2_rename - Rename a file | 641 | * gfs2_rename - Rename a file |
576 | * @odir: Parent directory of old file name | 642 | * @odir: Parent directory of old file name |
@@ -589,7 +655,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
589 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); | 655 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); |
590 | struct gfs2_inode *nip = NULL; | 656 | struct gfs2_inode *nip = NULL; |
591 | struct gfs2_sbd *sdp = GFS2_SB(odir); | 657 | struct gfs2_sbd *sdp = GFS2_SB(odir); |
592 | struct gfs2_holder ghs[5], r_gh; | 658 | struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }; |
593 | struct gfs2_rgrpd *nrgd; | 659 | struct gfs2_rgrpd *nrgd; |
594 | unsigned int num_gh; | 660 | unsigned int num_gh; |
595 | int dir_rename = 0; | 661 | int dir_rename = 0; |
@@ -603,19 +669,20 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
603 | return 0; | 669 | return 0; |
604 | } | 670 | } |
605 | 671 | ||
606 | /* Make sure we aren't trying to move a dirctory into it's subdir */ | ||
607 | |||
608 | if (S_ISDIR(ip->i_inode.i_mode) && odip != ndip) { | ||
609 | dir_rename = 1; | ||
610 | 672 | ||
611 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0, | 673 | if (odip != ndip) { |
612 | &r_gh); | 674 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, |
675 | 0, &r_gh); | ||
613 | if (error) | 676 | if (error) |
614 | goto out; | 677 | goto out; |
615 | 678 | ||
616 | error = gfs2_ok_to_move(ip, ndip); | 679 | if (S_ISDIR(ip->i_inode.i_mode)) { |
617 | if (error) | 680 | dir_rename = 1; |
618 | goto out_gunlock_r; | 681 | /* don't move a dirctory into it's subdir */ |
682 | error = gfs2_ok_to_move(ip, ndip); | ||
683 | if (error) | ||
684 | goto out_gunlock_r; | ||
685 | } | ||
619 | } | 686 | } |
620 | 687 | ||
621 | num_gh = 1; | 688 | num_gh = 1; |
@@ -639,9 +706,11 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
639 | gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); | 706 | gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); |
640 | } | 707 | } |
641 | 708 | ||
642 | error = gfs2_glock_nq_m(num_gh, ghs); | 709 | for (x = 0; x < num_gh; x++) { |
643 | if (error) | 710 | error = gfs2_glock_nq(ghs + x); |
644 | goto out_uninit; | 711 | if (error) |
712 | goto out_gunlock; | ||
713 | } | ||
645 | 714 | ||
646 | /* Check out the old directory */ | 715 | /* Check out the old directory */ |
647 | 716 | ||
@@ -804,12 +873,12 @@ out_alloc: | |||
804 | if (alloc_required) | 873 | if (alloc_required) |
805 | gfs2_alloc_put(ndip); | 874 | gfs2_alloc_put(ndip); |
806 | out_gunlock: | 875 | out_gunlock: |
807 | gfs2_glock_dq_m(num_gh, ghs); | 876 | while (x--) { |
808 | out_uninit: | 877 | gfs2_glock_dq(ghs + x); |
809 | for (x = 0; x < num_gh; x++) | ||
810 | gfs2_holder_uninit(ghs + x); | 878 | gfs2_holder_uninit(ghs + x); |
879 | } | ||
811 | out_gunlock_r: | 880 | out_gunlock_r: |
812 | if (dir_rename) | 881 | if (r_gh.gh_gl) |
813 | gfs2_glock_dq_uninit(&r_gh); | 882 | gfs2_glock_dq_uninit(&r_gh); |
814 | out: | 883 | out: |
815 | return error; | 884 | return error; |
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index f66ea0f7a356..d5355d9b5926 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/gfs2_ondisk.h> | 20 | #include <linux/gfs2_ondisk.h> |
21 | #include <linux/crc32.h> | 21 | #include <linux/crc32.h> |
22 | #include <linux/lm_interface.h> | 22 | #include <linux/lm_interface.h> |
23 | #include <linux/time.h> | ||
23 | 24 | ||
24 | #include "gfs2.h" | 25 | #include "gfs2.h" |
25 | #include "incore.h" | 26 | #include "incore.h" |
@@ -38,6 +39,7 @@ | |||
38 | #include "dir.h" | 39 | #include "dir.h" |
39 | #include "eattr.h" | 40 | #include "eattr.h" |
40 | #include "bmap.h" | 41 | #include "bmap.h" |
42 | #include "meta_io.h" | ||
41 | 43 | ||
42 | /** | 44 | /** |
43 | * gfs2_write_inode - Make sure the inode is stable on the disk | 45 | * gfs2_write_inode - Make sure the inode is stable on the disk |
@@ -50,16 +52,74 @@ | |||
50 | static int gfs2_write_inode(struct inode *inode, int sync) | 52 | static int gfs2_write_inode(struct inode *inode, int sync) |
51 | { | 53 | { |
52 | struct gfs2_inode *ip = GFS2_I(inode); | 54 | struct gfs2_inode *ip = GFS2_I(inode); |
53 | 55 | struct gfs2_sbd *sdp = GFS2_SB(inode); | |
54 | /* Check this is a "normal" inode */ | 56 | struct gfs2_holder gh; |
55 | if (test_bit(GIF_USER, &ip->i_flags)) { | 57 | struct buffer_head *bh; |
56 | if (current->flags & PF_MEMALLOC) | 58 | struct timespec atime; |
57 | return 0; | 59 | struct gfs2_dinode *di; |
58 | if (sync) | 60 | int ret = 0; |
59 | gfs2_log_flush(GFS2_SB(inode), ip->i_gl); | 61 | |
62 | /* Check this is a "normal" inode, etc */ | ||
63 | if (!test_bit(GIF_USER, &ip->i_flags) || | ||
64 | (current->flags & PF_MEMALLOC)) | ||
65 | return 0; | ||
66 | ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | ||
67 | if (ret) | ||
68 | goto do_flush; | ||
69 | ret = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
70 | if (ret) | ||
71 | goto do_unlock; | ||
72 | ret = gfs2_meta_inode_buffer(ip, &bh); | ||
73 | if (ret == 0) { | ||
74 | di = (struct gfs2_dinode *)bh->b_data; | ||
75 | atime.tv_sec = be64_to_cpu(di->di_atime); | ||
76 | atime.tv_nsec = be32_to_cpu(di->di_atime_nsec); | ||
77 | if (timespec_compare(&inode->i_atime, &atime) > 0) { | ||
78 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
79 | gfs2_dinode_out(ip, bh->b_data); | ||
80 | } | ||
81 | brelse(bh); | ||
60 | } | 82 | } |
83 | gfs2_trans_end(sdp); | ||
84 | do_unlock: | ||
85 | gfs2_glock_dq_uninit(&gh); | ||
86 | do_flush: | ||
87 | if (sync != 0) | ||
88 | gfs2_log_flush(GFS2_SB(inode), ip->i_gl); | ||
89 | return ret; | ||
90 | } | ||
61 | 91 | ||
62 | return 0; | 92 | /** |
93 | * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one | ||
94 | * @sdp: the filesystem | ||
95 | * | ||
96 | * Returns: errno | ||
97 | */ | ||
98 | |||
99 | static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | ||
100 | { | ||
101 | struct gfs2_holder t_gh; | ||
102 | int error; | ||
103 | |||
104 | gfs2_quota_sync(sdp); | ||
105 | gfs2_statfs_sync(sdp); | ||
106 | |||
107 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, | ||
108 | &t_gh); | ||
109 | if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
110 | return error; | ||
111 | |||
112 | gfs2_meta_syncfs(sdp); | ||
113 | gfs2_log_shutdown(sdp); | ||
114 | |||
115 | clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); | ||
116 | |||
117 | if (t_gh.gh_gl) | ||
118 | gfs2_glock_dq_uninit(&t_gh); | ||
119 | |||
120 | gfs2_quota_cleanup(sdp); | ||
121 | |||
122 | return error; | ||
63 | } | 123 | } |
64 | 124 | ||
65 | /** | 125 | /** |
@@ -73,12 +133,6 @@ static void gfs2_put_super(struct super_block *sb) | |||
73 | struct gfs2_sbd *sdp = sb->s_fs_info; | 133 | struct gfs2_sbd *sdp = sb->s_fs_info; |
74 | int error; | 134 | int error; |
75 | 135 | ||
76 | if (!sdp) | ||
77 | return; | ||
78 | |||
79 | if (!strncmp(sb->s_type->name, "gfs2meta", 8)) | ||
80 | return; /* Nothing to do */ | ||
81 | |||
82 | /* Unfreeze the filesystem, if we need to */ | 136 | /* Unfreeze the filesystem, if we need to */ |
83 | 137 | ||
84 | mutex_lock(&sdp->sd_freeze_lock); | 138 | mutex_lock(&sdp->sd_freeze_lock); |
@@ -101,7 +155,6 @@ static void gfs2_put_super(struct super_block *sb) | |||
101 | 155 | ||
102 | /* Release stuff */ | 156 | /* Release stuff */ |
103 | 157 | ||
104 | iput(sdp->sd_master_dir); | ||
105 | iput(sdp->sd_jindex); | 158 | iput(sdp->sd_jindex); |
106 | iput(sdp->sd_inum_inode); | 159 | iput(sdp->sd_inum_inode); |
107 | iput(sdp->sd_statfs_inode); | 160 | iput(sdp->sd_statfs_inode); |
@@ -152,6 +205,7 @@ static void gfs2_write_super(struct super_block *sb) | |||
152 | * | 205 | * |
153 | * Flushes the log to disk. | 206 | * Flushes the log to disk. |
154 | */ | 207 | */ |
208 | |||
155 | static int gfs2_sync_fs(struct super_block *sb, int wait) | 209 | static int gfs2_sync_fs(struct super_block *sb, int wait) |
156 | { | 210 | { |
157 | sb->s_dirt = 0; | 211 | sb->s_dirt = 0; |
@@ -270,14 +324,6 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
270 | } | 324 | } |
271 | } | 325 | } |
272 | 326 | ||
273 | if (*flags & (MS_NOATIME | MS_NODIRATIME)) | ||
274 | set_bit(SDF_NOATIME, &sdp->sd_flags); | ||
275 | else | ||
276 | clear_bit(SDF_NOATIME, &sdp->sd_flags); | ||
277 | |||
278 | /* Don't let the VFS update atimes. GFS2 handles this itself. */ | ||
279 | *flags |= MS_NOATIME | MS_NODIRATIME; | ||
280 | |||
281 | return error; | 327 | return error; |
282 | } | 328 | } |
283 | 329 | ||
@@ -295,6 +341,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
295 | * inode's blocks, or alternatively pass the baton on to another | 341 | * inode's blocks, or alternatively pass the baton on to another |
296 | * node for later deallocation. | 342 | * node for later deallocation. |
297 | */ | 343 | */ |
344 | |||
298 | static void gfs2_drop_inode(struct inode *inode) | 345 | static void gfs2_drop_inode(struct inode *inode) |
299 | { | 346 | { |
300 | struct gfs2_inode *ip = GFS2_I(inode); | 347 | struct gfs2_inode *ip = GFS2_I(inode); |
@@ -333,6 +380,16 @@ static void gfs2_clear_inode(struct inode *inode) | |||
333 | } | 380 | } |
334 | } | 381 | } |
335 | 382 | ||
383 | static int is_ancestor(const struct dentry *d1, const struct dentry *d2) | ||
384 | { | ||
385 | do { | ||
386 | if (d1 == d2) | ||
387 | return 1; | ||
388 | d1 = d1->d_parent; | ||
389 | } while (!IS_ROOT(d1)); | ||
390 | return 0; | ||
391 | } | ||
392 | |||
336 | /** | 393 | /** |
337 | * gfs2_show_options - Show mount options for /proc/mounts | 394 | * gfs2_show_options - Show mount options for /proc/mounts |
338 | * @s: seq_file structure | 395 | * @s: seq_file structure |
@@ -346,6 +403,8 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
346 | struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; | 403 | struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; |
347 | struct gfs2_args *args = &sdp->sd_args; | 404 | struct gfs2_args *args = &sdp->sd_args; |
348 | 405 | ||
406 | if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) | ||
407 | seq_printf(s, ",meta"); | ||
349 | if (args->ar_lockproto[0]) | 408 | if (args->ar_lockproto[0]) |
350 | seq_printf(s, ",lockproto=%s", args->ar_lockproto); | 409 | seq_printf(s, ",lockproto=%s", args->ar_lockproto); |
351 | if (args->ar_locktable[0]) | 410 | if (args->ar_locktable[0]) |
@@ -414,6 +473,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
414 | * conversion on the iopen lock, but we can change that later. This | 473 | * conversion on the iopen lock, but we can change that later. This |
415 | * is safe, just less efficient. | 474 | * is safe, just less efficient. |
416 | */ | 475 | */ |
476 | |||
417 | static void gfs2_delete_inode(struct inode *inode) | 477 | static void gfs2_delete_inode(struct inode *inode) |
418 | { | 478 | { |
419 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; | 479 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; |
@@ -478,8 +538,6 @@ out: | |||
478 | clear_inode(inode); | 538 | clear_inode(inode); |
479 | } | 539 | } |
480 | 540 | ||
481 | |||
482 | |||
483 | static struct inode *gfs2_alloc_inode(struct super_block *sb) | 541 | static struct inode *gfs2_alloc_inode(struct super_block *sb) |
484 | { | 542 | { |
485 | struct gfs2_inode *ip; | 543 | struct gfs2_inode *ip; |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index ca831991cbc2..c3ba3d9d0aac 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -33,313 +33,6 @@ | |||
33 | #include "trans.h" | 33 | #include "trans.h" |
34 | #include "util.h" | 34 | #include "util.h" |
35 | 35 | ||
36 | static const u32 gfs2_old_fs_formats[] = { | ||
37 | 0 | ||
38 | }; | ||
39 | |||
40 | static const u32 gfs2_old_multihost_formats[] = { | ||
41 | 0 | ||
42 | }; | ||
43 | |||
44 | /** | ||
45 | * gfs2_tune_init - Fill a gfs2_tune structure with default values | ||
46 | * @gt: tune | ||
47 | * | ||
48 | */ | ||
49 | |||
50 | void gfs2_tune_init(struct gfs2_tune *gt) | ||
51 | { | ||
52 | spin_lock_init(>->gt_spin); | ||
53 | |||
54 | gt->gt_demote_secs = 300; | ||
55 | gt->gt_incore_log_blocks = 1024; | ||
56 | gt->gt_log_flush_secs = 60; | ||
57 | gt->gt_recoverd_secs = 60; | ||
58 | gt->gt_logd_secs = 1; | ||
59 | gt->gt_quotad_secs = 5; | ||
60 | gt->gt_quota_simul_sync = 64; | ||
61 | gt->gt_quota_warn_period = 10; | ||
62 | gt->gt_quota_scale_num = 1; | ||
63 | gt->gt_quota_scale_den = 1; | ||
64 | gt->gt_quota_cache_secs = 300; | ||
65 | gt->gt_quota_quantum = 60; | ||
66 | gt->gt_atime_quantum = 3600; | ||
67 | gt->gt_new_files_jdata = 0; | ||
68 | gt->gt_max_readahead = 1 << 18; | ||
69 | gt->gt_stall_secs = 600; | ||
70 | gt->gt_complain_secs = 10; | ||
71 | gt->gt_statfs_quantum = 30; | ||
72 | gt->gt_statfs_slow = 0; | ||
73 | } | ||
74 | |||
75 | /** | ||
76 | * gfs2_check_sb - Check superblock | ||
77 | * @sdp: the filesystem | ||
78 | * @sb: The superblock | ||
79 | * @silent: Don't print a message if the check fails | ||
80 | * | ||
81 | * Checks the version code of the FS is one that we understand how to | ||
82 | * read and that the sizes of the various on-disk structures have not | ||
83 | * changed. | ||
84 | */ | ||
85 | |||
86 | int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) | ||
87 | { | ||
88 | unsigned int x; | ||
89 | |||
90 | if (sb->sb_magic != GFS2_MAGIC || | ||
91 | sb->sb_type != GFS2_METATYPE_SB) { | ||
92 | if (!silent) | ||
93 | printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); | ||
94 | return -EINVAL; | ||
95 | } | ||
96 | |||
97 | /* If format numbers match exactly, we're done. */ | ||
98 | |||
99 | if (sb->sb_fs_format == GFS2_FORMAT_FS && | ||
100 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) | ||
101 | return 0; | ||
102 | |||
103 | if (sb->sb_fs_format != GFS2_FORMAT_FS) { | ||
104 | for (x = 0; gfs2_old_fs_formats[x]; x++) | ||
105 | if (gfs2_old_fs_formats[x] == sb->sb_fs_format) | ||
106 | break; | ||
107 | |||
108 | if (!gfs2_old_fs_formats[x]) { | ||
109 | printk(KERN_WARNING | ||
110 | "GFS2: code version (%u, %u) is incompatible " | ||
111 | "with ondisk format (%u, %u)\n", | ||
112 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
113 | sb->sb_fs_format, sb->sb_multihost_format); | ||
114 | printk(KERN_WARNING | ||
115 | "GFS2: I don't know how to upgrade this FS\n"); | ||
116 | return -EINVAL; | ||
117 | } | ||
118 | } | ||
119 | |||
120 | if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { | ||
121 | for (x = 0; gfs2_old_multihost_formats[x]; x++) | ||
122 | if (gfs2_old_multihost_formats[x] == | ||
123 | sb->sb_multihost_format) | ||
124 | break; | ||
125 | |||
126 | if (!gfs2_old_multihost_formats[x]) { | ||
127 | printk(KERN_WARNING | ||
128 | "GFS2: code version (%u, %u) is incompatible " | ||
129 | "with ondisk format (%u, %u)\n", | ||
130 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
131 | sb->sb_fs_format, sb->sb_multihost_format); | ||
132 | printk(KERN_WARNING | ||
133 | "GFS2: I don't know how to upgrade this FS\n"); | ||
134 | return -EINVAL; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | if (!sdp->sd_args.ar_upgrade) { | ||
139 | printk(KERN_WARNING | ||
140 | "GFS2: code version (%u, %u) is incompatible " | ||
141 | "with ondisk format (%u, %u)\n", | ||
142 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
143 | sb->sb_fs_format, sb->sb_multihost_format); | ||
144 | printk(KERN_INFO | ||
145 | "GFS2: Use the \"upgrade\" mount option to upgrade " | ||
146 | "the FS\n"); | ||
147 | printk(KERN_INFO "GFS2: See the manual for more details\n"); | ||
148 | return -EINVAL; | ||
149 | } | ||
150 | |||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | |||
155 | static void end_bio_io_page(struct bio *bio, int error) | ||
156 | { | ||
157 | struct page *page = bio->bi_private; | ||
158 | |||
159 | if (!error) | ||
160 | SetPageUptodate(page); | ||
161 | else | ||
162 | printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); | ||
163 | unlock_page(page); | ||
164 | } | ||
165 | |||
166 | static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) | ||
167 | { | ||
168 | const struct gfs2_sb *str = buf; | ||
169 | |||
170 | sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); | ||
171 | sb->sb_type = be32_to_cpu(str->sb_header.mh_type); | ||
172 | sb->sb_format = be32_to_cpu(str->sb_header.mh_format); | ||
173 | sb->sb_fs_format = be32_to_cpu(str->sb_fs_format); | ||
174 | sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format); | ||
175 | sb->sb_bsize = be32_to_cpu(str->sb_bsize); | ||
176 | sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift); | ||
177 | sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr); | ||
178 | sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino); | ||
179 | sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr); | ||
180 | sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino); | ||
181 | |||
182 | memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); | ||
183 | memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); | ||
184 | } | ||
185 | |||
186 | /** | ||
187 | * gfs2_read_super - Read the gfs2 super block from disk | ||
188 | * @sdp: The GFS2 super block | ||
189 | * @sector: The location of the super block | ||
190 | * @error: The error code to return | ||
191 | * | ||
192 | * This uses the bio functions to read the super block from disk | ||
193 | * because we want to be 100% sure that we never read cached data. | ||
194 | * A super block is read twice only during each GFS2 mount and is | ||
195 | * never written to by the filesystem. The first time its read no | ||
196 | * locks are held, and the only details which are looked at are those | ||
197 | * relating to the locking protocol. Once locking is up and working, | ||
198 | * the sb is read again under the lock to establish the location of | ||
199 | * the master directory (contains pointers to journals etc) and the | ||
200 | * root directory. | ||
201 | * | ||
202 | * Returns: 0 on success or error | ||
203 | */ | ||
204 | |||
205 | int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) | ||
206 | { | ||
207 | struct super_block *sb = sdp->sd_vfs; | ||
208 | struct gfs2_sb *p; | ||
209 | struct page *page; | ||
210 | struct bio *bio; | ||
211 | |||
212 | page = alloc_page(GFP_NOFS); | ||
213 | if (unlikely(!page)) | ||
214 | return -ENOBUFS; | ||
215 | |||
216 | ClearPageUptodate(page); | ||
217 | ClearPageDirty(page); | ||
218 | lock_page(page); | ||
219 | |||
220 | bio = bio_alloc(GFP_NOFS, 1); | ||
221 | if (unlikely(!bio)) { | ||
222 | __free_page(page); | ||
223 | return -ENOBUFS; | ||
224 | } | ||
225 | |||
226 | bio->bi_sector = sector * (sb->s_blocksize >> 9); | ||
227 | bio->bi_bdev = sb->s_bdev; | ||
228 | bio_add_page(bio, page, PAGE_SIZE, 0); | ||
229 | |||
230 | bio->bi_end_io = end_bio_io_page; | ||
231 | bio->bi_private = page; | ||
232 | submit_bio(READ_SYNC | (1 << BIO_RW_META), bio); | ||
233 | wait_on_page_locked(page); | ||
234 | bio_put(bio); | ||
235 | if (!PageUptodate(page)) { | ||
236 | __free_page(page); | ||
237 | return -EIO; | ||
238 | } | ||
239 | p = kmap(page); | ||
240 | gfs2_sb_in(&sdp->sd_sb, p); | ||
241 | kunmap(page); | ||
242 | __free_page(page); | ||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | /** | ||
247 | * gfs2_read_sb - Read super block | ||
248 | * @sdp: The GFS2 superblock | ||
249 | * @gl: the glock for the superblock (assumed to be held) | ||
250 | * @silent: Don't print message if mount fails | ||
251 | * | ||
252 | */ | ||
253 | |||
254 | int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) | ||
255 | { | ||
256 | u32 hash_blocks, ind_blocks, leaf_blocks; | ||
257 | u32 tmp_blocks; | ||
258 | unsigned int x; | ||
259 | int error; | ||
260 | |||
261 | error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); | ||
262 | if (error) { | ||
263 | if (!silent) | ||
264 | fs_err(sdp, "can't read superblock\n"); | ||
265 | return error; | ||
266 | } | ||
267 | |||
268 | error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); | ||
269 | if (error) | ||
270 | return error; | ||
271 | |||
272 | sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - | ||
273 | GFS2_BASIC_BLOCK_SHIFT; | ||
274 | sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; | ||
275 | sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - | ||
276 | sizeof(struct gfs2_dinode)) / sizeof(u64); | ||
277 | sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - | ||
278 | sizeof(struct gfs2_meta_header)) / sizeof(u64); | ||
279 | sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); | ||
280 | sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2; | ||
281 | sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; | ||
282 | sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64); | ||
283 | sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - | ||
284 | sizeof(struct gfs2_meta_header)) / | ||
285 | sizeof(struct gfs2_quota_change); | ||
286 | |||
287 | /* Compute maximum reservation required to add a entry to a directory */ | ||
288 | |||
289 | hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH), | ||
290 | sdp->sd_jbsize); | ||
291 | |||
292 | ind_blocks = 0; | ||
293 | for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { | ||
294 | tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs); | ||
295 | ind_blocks += tmp_blocks; | ||
296 | } | ||
297 | |||
298 | leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH; | ||
299 | |||
300 | sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; | ||
301 | |||
302 | sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - | ||
303 | sizeof(struct gfs2_dinode); | ||
304 | sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; | ||
305 | for (x = 2;; x++) { | ||
306 | u64 space, d; | ||
307 | u32 m; | ||
308 | |||
309 | space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; | ||
310 | d = space; | ||
311 | m = do_div(d, sdp->sd_inptrs); | ||
312 | |||
313 | if (d != sdp->sd_heightsize[x - 1] || m) | ||
314 | break; | ||
315 | sdp->sd_heightsize[x] = space; | ||
316 | } | ||
317 | sdp->sd_max_height = x; | ||
318 | sdp->sd_heightsize[x] = ~0; | ||
319 | gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); | ||
320 | |||
321 | sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - | ||
322 | sizeof(struct gfs2_dinode); | ||
323 | sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; | ||
324 | for (x = 2;; x++) { | ||
325 | u64 space, d; | ||
326 | u32 m; | ||
327 | |||
328 | space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; | ||
329 | d = space; | ||
330 | m = do_div(d, sdp->sd_inptrs); | ||
331 | |||
332 | if (d != sdp->sd_jheightsize[x - 1] || m) | ||
333 | break; | ||
334 | sdp->sd_jheightsize[x] = space; | ||
335 | } | ||
336 | sdp->sd_max_jheight = x; | ||
337 | sdp->sd_jheightsize[x] = ~0; | ||
338 | gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); | ||
339 | |||
340 | return 0; | ||
341 | } | ||
342 | |||
343 | /** | 36 | /** |
344 | * gfs2_jindex_hold - Grab a lock on the jindex | 37 | * gfs2_jindex_hold - Grab a lock on the jindex |
345 | * @sdp: The GFS2 superblock | 38 | * @sdp: The GFS2 superblock |
@@ -581,39 +274,6 @@ fail: | |||
581 | return error; | 274 | return error; |
582 | } | 275 | } |
583 | 276 | ||
584 | /** | ||
585 | * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one | ||
586 | * @sdp: the filesystem | ||
587 | * | ||
588 | * Returns: errno | ||
589 | */ | ||
590 | |||
591 | int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | ||
592 | { | ||
593 | struct gfs2_holder t_gh; | ||
594 | int error; | ||
595 | |||
596 | gfs2_quota_sync(sdp); | ||
597 | gfs2_statfs_sync(sdp); | ||
598 | |||
599 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, | ||
600 | &t_gh); | ||
601 | if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
602 | return error; | ||
603 | |||
604 | gfs2_meta_syncfs(sdp); | ||
605 | gfs2_log_shutdown(sdp); | ||
606 | |||
607 | clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); | ||
608 | |||
609 | if (t_gh.gh_gl) | ||
610 | gfs2_glock_dq_uninit(&t_gh); | ||
611 | |||
612 | gfs2_quota_cleanup(sdp); | ||
613 | |||
614 | return error; | ||
615 | } | ||
616 | |||
617 | static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) | 277 | static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) |
618 | { | 278 | { |
619 | const struct gfs2_statfs_change *str = buf; | 279 | const struct gfs2_statfs_change *str = buf; |
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 44361ecc44f7..50a4c9b1215e 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h | |||
@@ -12,11 +12,6 @@ | |||
12 | 12 | ||
13 | #include "incore.h" | 13 | #include "incore.h" |
14 | 14 | ||
15 | void gfs2_tune_init(struct gfs2_tune *gt); | ||
16 | |||
17 | int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent); | ||
18 | int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); | ||
19 | int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector); | ||
20 | void gfs2_lm_unmount(struct gfs2_sbd *sdp); | 15 | void gfs2_lm_unmount(struct gfs2_sbd *sdp); |
21 | 16 | ||
22 | static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) | 17 | static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) |
@@ -40,7 +35,6 @@ int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, | |||
40 | struct gfs2_inode **ipp); | 35 | struct gfs2_inode **ipp); |
41 | 36 | ||
42 | int gfs2_make_fs_rw(struct gfs2_sbd *sdp); | 37 | int gfs2_make_fs_rw(struct gfs2_sbd *sdp); |
43 | int gfs2_make_fs_ro(struct gfs2_sbd *sdp); | ||
44 | 38 | ||
45 | int gfs2_statfs_init(struct gfs2_sbd *sdp); | 39 | int gfs2_statfs_init(struct gfs2_sbd *sdp); |
46 | void gfs2_statfs_change(struct gfs2_sbd *sdp, | 40 | void gfs2_statfs_change(struct gfs2_sbd *sdp, |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 74846559fc3f..7e1879f1a02c 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -269,14 +269,6 @@ ARGS_ATTR(quota, "%u\n"); | |||
269 | ARGS_ATTR(suiddir, "%d\n"); | 269 | ARGS_ATTR(suiddir, "%d\n"); |
270 | ARGS_ATTR(data, "%d\n"); | 270 | ARGS_ATTR(data, "%d\n"); |
271 | 271 | ||
272 | /* one oddball doesn't fit the macro mold */ | ||
273 | static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf) | ||
274 | { | ||
275 | return snprintf(buf, PAGE_SIZE, "%d\n", | ||
276 | !!test_bit(SDF_NOATIME, &sdp->sd_flags)); | ||
277 | } | ||
278 | static struct args_attr args_attr_noatime = __ATTR_RO(noatime); | ||
279 | |||
280 | static struct attribute *args_attrs[] = { | 272 | static struct attribute *args_attrs[] = { |
281 | &args_attr_lockproto.attr, | 273 | &args_attr_lockproto.attr, |
282 | &args_attr_locktable.attr, | 274 | &args_attr_locktable.attr, |
@@ -292,7 +284,6 @@ static struct attribute *args_attrs[] = { | |||
292 | &args_attr_quota.attr, | 284 | &args_attr_quota.attr, |
293 | &args_attr_suiddir.attr, | 285 | &args_attr_suiddir.attr, |
294 | &args_attr_data.attr, | 286 | &args_attr_data.attr, |
295 | &args_attr_noatime.attr, | ||
296 | NULL, | 287 | NULL, |
297 | }; | 288 | }; |
298 | 289 | ||
@@ -407,7 +398,6 @@ TUNE_ATTR(incore_log_blocks, 0); | |||
407 | TUNE_ATTR(log_flush_secs, 0); | 398 | TUNE_ATTR(log_flush_secs, 0); |
408 | TUNE_ATTR(quota_warn_period, 0); | 399 | TUNE_ATTR(quota_warn_period, 0); |
409 | TUNE_ATTR(quota_quantum, 0); | 400 | TUNE_ATTR(quota_quantum, 0); |
410 | TUNE_ATTR(atime_quantum, 0); | ||
411 | TUNE_ATTR(max_readahead, 0); | 401 | TUNE_ATTR(max_readahead, 0); |
412 | TUNE_ATTR(complain_secs, 0); | 402 | TUNE_ATTR(complain_secs, 0); |
413 | TUNE_ATTR(statfs_slow, 0); | 403 | TUNE_ATTR(statfs_slow, 0); |
@@ -427,7 +417,6 @@ static struct attribute *tune_attrs[] = { | |||
427 | &tune_attr_log_flush_secs.attr, | 417 | &tune_attr_log_flush_secs.attr, |
428 | &tune_attr_quota_warn_period.attr, | 418 | &tune_attr_quota_warn_period.attr, |
429 | &tune_attr_quota_quantum.attr, | 419 | &tune_attr_quota_quantum.attr, |
430 | &tune_attr_atime_quantum.attr, | ||
431 | &tune_attr_max_readahead.attr, | 420 | &tune_attr_max_readahead.attr, |
432 | &tune_attr_complain_secs.attr, | 421 | &tune_attr_complain_secs.attr, |
433 | &tune_attr_statfs_slow.attr, | 422 | &tune_attr_statfs_slow.attr, |
diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c index ba851576ebb1..6d98f116ca03 100644 --- a/fs/hfs/catalog.c +++ b/fs/hfs/catalog.c | |||
@@ -190,6 +190,10 @@ int hfs_cat_find_brec(struct super_block *sb, u32 cnid, | |||
190 | 190 | ||
191 | fd->search_key->cat.ParID = rec.thread.ParID; | 191 | fd->search_key->cat.ParID = rec.thread.ParID; |
192 | len = fd->search_key->cat.CName.len = rec.thread.CName.len; | 192 | len = fd->search_key->cat.CName.len = rec.thread.CName.len; |
193 | if (len > HFS_NAMELEN) { | ||
194 | printk(KERN_ERR "hfs: bad catalog namelength\n"); | ||
195 | return -EIO; | ||
196 | } | ||
193 | memcpy(fd->search_key->cat.CName.name, rec.thread.CName.name, len); | 197 | memcpy(fd->search_key->cat.CName.name, rec.thread.CName.name, len); |
194 | return hfs_brec_find(fd); | 198 | return hfs_brec_find(fd); |
195 | } | 199 | } |
diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4abb1047c689..3c7c7637719c 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c | |||
@@ -173,7 +173,7 @@ enum { | |||
173 | opt_err | 173 | opt_err |
174 | }; | 174 | }; |
175 | 175 | ||
176 | static match_table_t tokens = { | 176 | static const match_table_t tokens = { |
177 | { opt_uid, "uid=%u" }, | 177 | { opt_uid, "uid=%u" }, |
178 | { opt_gid, "gid=%u" }, | 178 | { opt_gid, "gid=%u" }, |
179 | { opt_umask, "umask=%o" }, | 179 | { opt_umask, "umask=%o" }, |
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index d128a25b74d2..ea30afc2a03c 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c | |||
@@ -32,6 +32,10 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma | |||
32 | mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); | 32 | mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); |
33 | mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; | 33 | mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; |
34 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); | 34 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); |
35 | if (IS_ERR(page)) { | ||
36 | start = size; | ||
37 | goto out; | ||
38 | } | ||
35 | pptr = kmap(page); | 39 | pptr = kmap(page); |
36 | curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32; | 40 | curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32; |
37 | i = offset % 32; | 41 | i = offset % 32; |
@@ -73,6 +77,10 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma | |||
73 | break; | 77 | break; |
74 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, | 78 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, |
75 | NULL); | 79 | NULL); |
80 | if (IS_ERR(page)) { | ||
81 | start = size; | ||
82 | goto out; | ||
83 | } | ||
76 | curr = pptr = kmap(page); | 84 | curr = pptr = kmap(page); |
77 | if ((size ^ offset) / PAGE_CACHE_BITS) | 85 | if ((size ^ offset) / PAGE_CACHE_BITS) |
78 | end = pptr + PAGE_CACHE_BITS / 32; | 86 | end = pptr + PAGE_CACHE_BITS / 32; |
@@ -120,6 +128,10 @@ found: | |||
120 | offset += PAGE_CACHE_BITS; | 128 | offset += PAGE_CACHE_BITS; |
121 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, | 129 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, |
122 | NULL); | 130 | NULL); |
131 | if (IS_ERR(page)) { | ||
132 | start = size; | ||
133 | goto out; | ||
134 | } | ||
123 | pptr = kmap(page); | 135 | pptr = kmap(page); |
124 | curr = pptr; | 136 | curr = pptr; |
125 | end = pptr + PAGE_CACHE_BITS / 32; | 137 | end = pptr + PAGE_CACHE_BITS / 32; |
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index ba117c445e78..f6874acb2cf2 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c | |||
@@ -168,6 +168,11 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, | |||
168 | return -EIO; | 168 | return -EIO; |
169 | } | 169 | } |
170 | 170 | ||
171 | if (be16_to_cpu(tmp.thread.nodeName.length) > 255) { | ||
172 | printk(KERN_ERR "hfs: catalog name length corrupted\n"); | ||
173 | return -EIO; | ||
174 | } | ||
175 | |||
171 | hfsplus_cat_build_key_uni(fd->search_key, be32_to_cpu(tmp.thread.parentID), | 176 | hfsplus_cat_build_key_uni(fd->search_key, be32_to_cpu(tmp.thread.parentID), |
172 | &tmp.thread.nodeName); | 177 | &tmp.thread.nodeName); |
173 | return hfs_brec_find(fd); | 178 | return hfs_brec_find(fd); |
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 9997cbf8beb5..9699c56d323f 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c | |||
@@ -25,7 +25,7 @@ enum { | |||
25 | opt_force, opt_err | 25 | opt_force, opt_err |
26 | }; | 26 | }; |
27 | 27 | ||
28 | static match_table_t tokens = { | 28 | static const match_table_t tokens = { |
29 | { opt_creator, "creator=%s" }, | 29 | { opt_creator, "creator=%s" }, |
30 | { opt_type, "type=%s" }, | 30 | { opt_type, "type=%s" }, |
31 | { opt_umask, "umask=%o" }, | 31 | { opt_umask, "umask=%o" }, |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index e834e578c93f..eb74531a0a8e 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
@@ -356,7 +356,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
356 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { | 356 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { |
357 | printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); | 357 | printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); |
358 | sb->s_flags |= MS_RDONLY; | 358 | sb->s_flags |= MS_RDONLY; |
359 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { | 359 | } else if ((vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) && !(sb->s_flags & MS_RDONLY)) { |
360 | printk(KERN_WARNING "hfs: write access to a journaled filesystem is not supported, " | 360 | printk(KERN_WARNING "hfs: write access to a journaled filesystem is not supported, " |
361 | "use the force option at your own risk, mounting read-only.\n"); | 361 | "use the force option at your own risk, mounting read-only.\n"); |
362 | sb->s_flags |= MS_RDONLY; | 362 | sb->s_flags |= MS_RDONLY; |
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index b8ae9c90ada0..29ad461d568f 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c | |||
@@ -215,7 +215,7 @@ enum { | |||
215 | Opt_timeshift, Opt_err, | 215 | Opt_timeshift, Opt_err, |
216 | }; | 216 | }; |
217 | 217 | ||
218 | static match_table_t tokens = { | 218 | static const match_table_t tokens = { |
219 | {Opt_help, "help"}, | 219 | {Opt_help, "help"}, |
220 | {Opt_uid, "uid=%u"}, | 220 | {Opt_uid, "uid=%u"}, |
221 | {Opt_gid, "gid=%u"}, | 221 | {Opt_gid, "gid=%u"}, |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 3f58923fb39b..61edc701b0e6 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -57,7 +57,7 @@ enum { | |||
57 | Opt_err, | 57 | Opt_err, |
58 | }; | 58 | }; |
59 | 59 | ||
60 | static match_table_t tokens = { | 60 | static const match_table_t tokens = { |
61 | {Opt_size, "size=%s"}, | 61 | {Opt_size, "size=%s"}, |
62 | {Opt_nr_inodes, "nr_inodes=%s"}, | 62 | {Opt_nr_inodes, "nr_inodes=%s"}, |
63 | {Opt_mode, "mode=%o"}, | 63 | {Opt_mode, "mode=%o"}, |
diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 60249429a253..d85c7d931cdf 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c | |||
@@ -323,7 +323,7 @@ out: | |||
323 | } | 323 | } |
324 | 324 | ||
325 | /* | 325 | /* |
326 | * remove_kevent - cleans up and ultimately frees the given kevent | 326 | * remove_kevent - cleans up the given kevent |
327 | * | 327 | * |
328 | * Caller must hold dev->ev_mutex. | 328 | * Caller must hold dev->ev_mutex. |
329 | */ | 329 | */ |
@@ -334,7 +334,13 @@ static void remove_kevent(struct inotify_device *dev, | |||
334 | 334 | ||
335 | dev->event_count--; | 335 | dev->event_count--; |
336 | dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; | 336 | dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; |
337 | } | ||
337 | 338 | ||
339 | /* | ||
340 | * free_kevent - frees the given kevent. | ||
341 | */ | ||
342 | static void free_kevent(struct inotify_kernel_event *kevent) | ||
343 | { | ||
338 | kfree(kevent->name); | 344 | kfree(kevent->name); |
339 | kmem_cache_free(event_cachep, kevent); | 345 | kmem_cache_free(event_cachep, kevent); |
340 | } | 346 | } |
@@ -350,6 +356,7 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev) | |||
350 | struct inotify_kernel_event *kevent; | 356 | struct inotify_kernel_event *kevent; |
351 | kevent = inotify_dev_get_event(dev); | 357 | kevent = inotify_dev_get_event(dev); |
352 | remove_kevent(dev, kevent); | 358 | remove_kevent(dev, kevent); |
359 | free_kevent(kevent); | ||
353 | } | 360 | } |
354 | } | 361 | } |
355 | 362 | ||
@@ -433,17 +440,15 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
433 | dev = file->private_data; | 440 | dev = file->private_data; |
434 | 441 | ||
435 | while (1) { | 442 | while (1) { |
436 | int events; | ||
437 | 443 | ||
438 | prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); | 444 | prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); |
439 | 445 | ||
440 | mutex_lock(&dev->ev_mutex); | 446 | mutex_lock(&dev->ev_mutex); |
441 | events = !list_empty(&dev->events); | 447 | if (!list_empty(&dev->events)) { |
442 | mutex_unlock(&dev->ev_mutex); | ||
443 | if (events) { | ||
444 | ret = 0; | 448 | ret = 0; |
445 | break; | 449 | break; |
446 | } | 450 | } |
451 | mutex_unlock(&dev->ev_mutex); | ||
447 | 452 | ||
448 | if (file->f_flags & O_NONBLOCK) { | 453 | if (file->f_flags & O_NONBLOCK) { |
449 | ret = -EAGAIN; | 454 | ret = -EAGAIN; |
@@ -462,7 +467,6 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
462 | if (ret) | 467 | if (ret) |
463 | return ret; | 468 | return ret; |
464 | 469 | ||
465 | mutex_lock(&dev->ev_mutex); | ||
466 | while (1) { | 470 | while (1) { |
467 | struct inotify_kernel_event *kevent; | 471 | struct inotify_kernel_event *kevent; |
468 | 472 | ||
@@ -481,6 +485,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
481 | } | 485 | } |
482 | break; | 486 | break; |
483 | } | 487 | } |
488 | remove_kevent(dev, kevent); | ||
489 | |||
490 | /* | ||
491 | * Must perform the copy_to_user outside the mutex in order | ||
492 | * to avoid a lock order reversal with mmap_sem. | ||
493 | */ | ||
494 | mutex_unlock(&dev->ev_mutex); | ||
484 | 495 | ||
485 | if (copy_to_user(buf, &kevent->event, event_size)) { | 496 | if (copy_to_user(buf, &kevent->event, event_size)) { |
486 | ret = -EFAULT; | 497 | ret = -EFAULT; |
@@ -498,7 +509,9 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
498 | count -= kevent->event.len; | 509 | count -= kevent->event.len; |
499 | } | 510 | } |
500 | 511 | ||
501 | remove_kevent(dev, kevent); | 512 | free_kevent(kevent); |
513 | |||
514 | mutex_lock(&dev->ev_mutex); | ||
502 | } | 515 | } |
503 | mutex_unlock(&dev->ev_mutex); | 516 | mutex_unlock(&dev->ev_mutex); |
504 | 517 | ||
diff --git a/fs/ioctl.c b/fs/ioctl.c index 7db32b3382d3..d152856c371b 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -13,9 +13,14 @@ | |||
13 | #include <linux/security.h> | 13 | #include <linux/security.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/uaccess.h> | 15 | #include <linux/uaccess.h> |
16 | #include <linux/writeback.h> | ||
17 | #include <linux/buffer_head.h> | ||
16 | 18 | ||
17 | #include <asm/ioctls.h> | 19 | #include <asm/ioctls.h> |
18 | 20 | ||
21 | /* So that the fiemap access checks can't overflow on 32 bit machines. */ | ||
22 | #define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent)) | ||
23 | |||
19 | /** | 24 | /** |
20 | * vfs_ioctl - call filesystem specific ioctl methods | 25 | * vfs_ioctl - call filesystem specific ioctl methods |
21 | * @filp: open file to invoke ioctl method on | 26 | * @filp: open file to invoke ioctl method on |
@@ -71,6 +76,276 @@ static int ioctl_fibmap(struct file *filp, int __user *p) | |||
71 | return put_user(res, p); | 76 | return put_user(res, p); |
72 | } | 77 | } |
73 | 78 | ||
79 | /** | ||
80 | * fiemap_fill_next_extent - Fiemap helper function | ||
81 | * @fieinfo: Fiemap context passed into ->fiemap | ||
82 | * @logical: Extent logical start offset, in bytes | ||
83 | * @phys: Extent physical start offset, in bytes | ||
84 | * @len: Extent length, in bytes | ||
85 | * @flags: FIEMAP_EXTENT flags that describe this extent | ||
86 | * | ||
87 | * Called from file system ->fiemap callback. Will populate extent | ||
88 | * info as passed in via arguments and copy to user memory. On | ||
89 | * success, extent count on fieinfo is incremented. | ||
90 | * | ||
91 | * Returns 0 on success, -errno on error, 1 if this was the last | ||
92 | * extent that will fit in user array. | ||
93 | */ | ||
94 | #define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC) | ||
95 | #define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED) | ||
96 | #define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE) | ||
97 | int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, | ||
98 | u64 phys, u64 len, u32 flags) | ||
99 | { | ||
100 | struct fiemap_extent extent; | ||
101 | struct fiemap_extent *dest = fieinfo->fi_extents_start; | ||
102 | |||
103 | /* only count the extents */ | ||
104 | if (fieinfo->fi_extents_max == 0) { | ||
105 | fieinfo->fi_extents_mapped++; | ||
106 | return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; | ||
107 | } | ||
108 | |||
109 | if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max) | ||
110 | return 1; | ||
111 | |||
112 | if (flags & SET_UNKNOWN_FLAGS) | ||
113 | flags |= FIEMAP_EXTENT_UNKNOWN; | ||
114 | if (flags & SET_NO_UNMOUNTED_IO_FLAGS) | ||
115 | flags |= FIEMAP_EXTENT_ENCODED; | ||
116 | if (flags & SET_NOT_ALIGNED_FLAGS) | ||
117 | flags |= FIEMAP_EXTENT_NOT_ALIGNED; | ||
118 | |||
119 | memset(&extent, 0, sizeof(extent)); | ||
120 | extent.fe_logical = logical; | ||
121 | extent.fe_physical = phys; | ||
122 | extent.fe_length = len; | ||
123 | extent.fe_flags = flags; | ||
124 | |||
125 | dest += fieinfo->fi_extents_mapped; | ||
126 | if (copy_to_user(dest, &extent, sizeof(extent))) | ||
127 | return -EFAULT; | ||
128 | |||
129 | fieinfo->fi_extents_mapped++; | ||
130 | if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max) | ||
131 | return 1; | ||
132 | return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; | ||
133 | } | ||
134 | EXPORT_SYMBOL(fiemap_fill_next_extent); | ||
135 | |||
136 | /** | ||
137 | * fiemap_check_flags - check validity of requested flags for fiemap | ||
138 | * @fieinfo: Fiemap context passed into ->fiemap | ||
139 | * @fs_flags: Set of fiemap flags that the file system understands | ||
140 | * | ||
141 | * Called from file system ->fiemap callback. This will compute the | ||
142 | * intersection of valid fiemap flags and those that the fs supports. That | ||
143 | * value is then compared against the user supplied flags. In case of bad user | ||
144 | * flags, the invalid values will be written into the fieinfo structure, and | ||
145 | * -EBADR is returned, which tells ioctl_fiemap() to return those values to | ||
146 | * userspace. For this reason, a return code of -EBADR should be preserved. | ||
147 | * | ||
148 | * Returns 0 on success, -EBADR on bad flags. | ||
149 | */ | ||
150 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags) | ||
151 | { | ||
152 | u32 incompat_flags; | ||
153 | |||
154 | incompat_flags = fieinfo->fi_flags & ~(FIEMAP_FLAGS_COMPAT & fs_flags); | ||
155 | if (incompat_flags) { | ||
156 | fieinfo->fi_flags = incompat_flags; | ||
157 | return -EBADR; | ||
158 | } | ||
159 | return 0; | ||
160 | } | ||
161 | EXPORT_SYMBOL(fiemap_check_flags); | ||
162 | |||
163 | static int fiemap_check_ranges(struct super_block *sb, | ||
164 | u64 start, u64 len, u64 *new_len) | ||
165 | { | ||
166 | *new_len = len; | ||
167 | |||
168 | if (len == 0) | ||
169 | return -EINVAL; | ||
170 | |||
171 | if (start > sb->s_maxbytes) | ||
172 | return -EFBIG; | ||
173 | |||
174 | /* | ||
175 | * Shrink request scope to what the fs can actually handle. | ||
176 | */ | ||
177 | if ((len > sb->s_maxbytes) || | ||
178 | (sb->s_maxbytes - len) < start) | ||
179 | *new_len = sb->s_maxbytes - start; | ||
180 | |||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | static int ioctl_fiemap(struct file *filp, unsigned long arg) | ||
185 | { | ||
186 | struct fiemap fiemap; | ||
187 | struct fiemap_extent_info fieinfo = { 0, }; | ||
188 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
189 | struct super_block *sb = inode->i_sb; | ||
190 | u64 len; | ||
191 | int error; | ||
192 | |||
193 | if (!inode->i_op->fiemap) | ||
194 | return -EOPNOTSUPP; | ||
195 | |||
196 | if (copy_from_user(&fiemap, (struct fiemap __user *)arg, | ||
197 | sizeof(struct fiemap))) | ||
198 | return -EFAULT; | ||
199 | |||
200 | if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) | ||
201 | return -EINVAL; | ||
202 | |||
203 | error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length, | ||
204 | &len); | ||
205 | if (error) | ||
206 | return error; | ||
207 | |||
208 | fieinfo.fi_flags = fiemap.fm_flags; | ||
209 | fieinfo.fi_extents_max = fiemap.fm_extent_count; | ||
210 | fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap)); | ||
211 | |||
212 | if (fiemap.fm_extent_count != 0 && | ||
213 | !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start, | ||
214 | fieinfo.fi_extents_max * sizeof(struct fiemap_extent))) | ||
215 | return -EFAULT; | ||
216 | |||
217 | if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC) | ||
218 | filemap_write_and_wait(inode->i_mapping); | ||
219 | |||
220 | error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len); | ||
221 | fiemap.fm_flags = fieinfo.fi_flags; | ||
222 | fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; | ||
223 | if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap))) | ||
224 | error = -EFAULT; | ||
225 | |||
226 | return error; | ||
227 | } | ||
228 | |||
229 | #ifdef CONFIG_BLOCK | ||
230 | |||
231 | #define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits) | ||
232 | #define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits); | ||
233 | |||
234 | /* | ||
235 | * @inode - the inode to map | ||
236 | * @arg - the pointer to userspace where we copy everything to | ||
237 | * @get_block - the fs's get_block function | ||
238 | * | ||
239 | * This does FIEMAP for block based inodes. Basically it will just loop | ||
240 | * through get_block until we hit the number of extents we want to map, or we | ||
241 | * go past the end of the file and hit a hole. | ||
242 | * | ||
243 | * If it is possible to have data blocks beyond a hole past @inode->i_size, then | ||
244 | * please do not use this function, it will stop at the first unmapped block | ||
245 | * beyond i_size | ||
246 | */ | ||
247 | int generic_block_fiemap(struct inode *inode, | ||
248 | struct fiemap_extent_info *fieinfo, u64 start, | ||
249 | u64 len, get_block_t *get_block) | ||
250 | { | ||
251 | struct buffer_head tmp; | ||
252 | unsigned int start_blk; | ||
253 | long long length = 0, map_len = 0; | ||
254 | u64 logical = 0, phys = 0, size = 0; | ||
255 | u32 flags = FIEMAP_EXTENT_MERGED; | ||
256 | int ret = 0; | ||
257 | |||
258 | if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))) | ||
259 | return ret; | ||
260 | |||
261 | start_blk = logical_to_blk(inode, start); | ||
262 | |||
263 | /* guard against change */ | ||
264 | mutex_lock(&inode->i_mutex); | ||
265 | |||
266 | length = (long long)min_t(u64, len, i_size_read(inode)); | ||
267 | map_len = length; | ||
268 | |||
269 | do { | ||
270 | /* | ||
271 | * we set b_size to the total size we want so it will map as | ||
272 | * many contiguous blocks as possible at once | ||
273 | */ | ||
274 | memset(&tmp, 0, sizeof(struct buffer_head)); | ||
275 | tmp.b_size = map_len; | ||
276 | |||
277 | ret = get_block(inode, start_blk, &tmp, 0); | ||
278 | if (ret) | ||
279 | break; | ||
280 | |||
281 | /* HOLE */ | ||
282 | if (!buffer_mapped(&tmp)) { | ||
283 | /* | ||
284 | * first hole after going past the EOF, this is our | ||
285 | * last extent | ||
286 | */ | ||
287 | if (length <= 0) { | ||
288 | flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST; | ||
289 | ret = fiemap_fill_next_extent(fieinfo, logical, | ||
290 | phys, size, | ||
291 | flags); | ||
292 | break; | ||
293 | } | ||
294 | |||
295 | length -= blk_to_logical(inode, 1); | ||
296 | |||
297 | /* if we have holes up to/past EOF then we're done */ | ||
298 | if (length <= 0) | ||
299 | break; | ||
300 | |||
301 | start_blk++; | ||
302 | } else { | ||
303 | if (length <= 0 && size) { | ||
304 | ret = fiemap_fill_next_extent(fieinfo, logical, | ||
305 | phys, size, | ||
306 | flags); | ||
307 | if (ret) | ||
308 | break; | ||
309 | } | ||
310 | |||
311 | logical = blk_to_logical(inode, start_blk); | ||
312 | phys = blk_to_logical(inode, tmp.b_blocknr); | ||
313 | size = tmp.b_size; | ||
314 | flags = FIEMAP_EXTENT_MERGED; | ||
315 | |||
316 | length -= tmp.b_size; | ||
317 | start_blk += logical_to_blk(inode, size); | ||
318 | |||
319 | /* | ||
320 | * if we are past the EOF we need to loop again to see | ||
321 | * if there is a hole so we can mark this extent as the | ||
322 | * last one, and if not keep mapping things until we | ||
323 | * find a hole, or we run out of slots in the extent | ||
324 | * array | ||
325 | */ | ||
326 | if (length <= 0) | ||
327 | continue; | ||
328 | |||
329 | ret = fiemap_fill_next_extent(fieinfo, logical, phys, | ||
330 | size, flags); | ||
331 | if (ret) | ||
332 | break; | ||
333 | } | ||
334 | cond_resched(); | ||
335 | } while (1); | ||
336 | |||
337 | mutex_unlock(&inode->i_mutex); | ||
338 | |||
339 | /* if ret is 1 then we just hit the end of the extent array */ | ||
340 | if (ret == 1) | ||
341 | ret = 0; | ||
342 | |||
343 | return ret; | ||
344 | } | ||
345 | EXPORT_SYMBOL(generic_block_fiemap); | ||
346 | |||
347 | #endif /* CONFIG_BLOCK */ | ||
348 | |||
74 | static int file_ioctl(struct file *filp, unsigned int cmd, | 349 | static int file_ioctl(struct file *filp, unsigned int cmd, |
75 | unsigned long arg) | 350 | unsigned long arg) |
76 | { | 351 | { |
@@ -80,6 +355,8 @@ static int file_ioctl(struct file *filp, unsigned int cmd, | |||
80 | switch (cmd) { | 355 | switch (cmd) { |
81 | case FIBMAP: | 356 | case FIBMAP: |
82 | return ioctl_fibmap(filp, p); | 357 | return ioctl_fibmap(filp, p); |
358 | case FS_IOC_FIEMAP: | ||
359 | return ioctl_fiemap(filp, arg); | ||
83 | case FIGETBSZ: | 360 | case FIGETBSZ: |
84 | return put_user(inode->i_sb->s_blocksize, p); | 361 | return put_user(inode->i_sb->s_blocksize, p); |
85 | case FIONREAD: | 362 | case FIONREAD: |
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 26948a6033b6..3f8af0f1505b 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
@@ -310,7 +310,7 @@ enum { | |||
310 | Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode, | 310 | Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode, |
311 | }; | 311 | }; |
312 | 312 | ||
313 | static match_table_t tokens = { | 313 | static const match_table_t tokens = { |
314 | {Opt_norock, "norock"}, | 314 | {Opt_norock, "norock"}, |
315 | {Opt_nojoliet, "nojoliet"}, | 315 | {Opt_nojoliet, "nojoliet"}, |
316 | {Opt_unhide, "unhide"}, | 316 | {Opt_unhide, "unhide"}, |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 91389c8aee8a..9203c3332f17 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/time.h> | 20 | #include <linux/time.h> |
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include <linux/jbd2.h> | 22 | #include <linux/jbd2.h> |
23 | #include <linux/marker.h> | ||
23 | #include <linux/errno.h> | 24 | #include <linux/errno.h> |
24 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
25 | 26 | ||
@@ -93,7 +94,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
93 | int ret = 0; | 94 | int ret = 0; |
94 | struct buffer_head *bh = jh2bh(jh); | 95 | struct buffer_head *bh = jh2bh(jh); |
95 | 96 | ||
96 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { | 97 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && |
98 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | ||
97 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 99 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
98 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; | 100 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; |
99 | jbd_unlock_bh_state(bh); | 101 | jbd_unlock_bh_state(bh); |
@@ -126,14 +128,29 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
126 | 128 | ||
127 | /* | 129 | /* |
128 | * Test again, another process may have checkpointed while we | 130 | * Test again, another process may have checkpointed while we |
129 | * were waiting for the checkpoint lock | 131 | * were waiting for the checkpoint lock. If there are no |
132 | * outstanding transactions there is nothing to checkpoint and | ||
133 | * we can't make progress. Abort the journal in this case. | ||
130 | */ | 134 | */ |
131 | spin_lock(&journal->j_state_lock); | 135 | spin_lock(&journal->j_state_lock); |
136 | spin_lock(&journal->j_list_lock); | ||
132 | nblocks = jbd_space_needed(journal); | 137 | nblocks = jbd_space_needed(journal); |
133 | if (__jbd2_log_space_left(journal) < nblocks) { | 138 | if (__jbd2_log_space_left(journal) < nblocks) { |
139 | int chkpt = journal->j_checkpoint_transactions != NULL; | ||
140 | |||
141 | spin_unlock(&journal->j_list_lock); | ||
134 | spin_unlock(&journal->j_state_lock); | 142 | spin_unlock(&journal->j_state_lock); |
135 | jbd2_log_do_checkpoint(journal); | 143 | if (chkpt) { |
144 | jbd2_log_do_checkpoint(journal); | ||
145 | } else { | ||
146 | printk(KERN_ERR "%s: no transactions\n", | ||
147 | __func__); | ||
148 | jbd2_journal_abort(journal, 0); | ||
149 | } | ||
150 | |||
136 | spin_lock(&journal->j_state_lock); | 151 | spin_lock(&journal->j_state_lock); |
152 | } else { | ||
153 | spin_unlock(&journal->j_list_lock); | ||
137 | } | 154 | } |
138 | mutex_unlock(&journal->j_checkpoint_mutex); | 155 | mutex_unlock(&journal->j_checkpoint_mutex); |
139 | } | 156 | } |
@@ -160,21 +177,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) | |||
160 | * buffers. Note that we take the buffers in the opposite ordering | 177 | * buffers. Note that we take the buffers in the opposite ordering |
161 | * from the one in which they were submitted for IO. | 178 | * from the one in which they were submitted for IO. |
162 | * | 179 | * |
180 | * Return 0 on success, and return <0 if some buffers have failed | ||
181 | * to be written out. | ||
182 | * | ||
163 | * Called with j_list_lock held. | 183 | * Called with j_list_lock held. |
164 | */ | 184 | */ |
165 | static void __wait_cp_io(journal_t *journal, transaction_t *transaction) | 185 | static int __wait_cp_io(journal_t *journal, transaction_t *transaction) |
166 | { | 186 | { |
167 | struct journal_head *jh; | 187 | struct journal_head *jh; |
168 | struct buffer_head *bh; | 188 | struct buffer_head *bh; |
169 | tid_t this_tid; | 189 | tid_t this_tid; |
170 | int released = 0; | 190 | int released = 0; |
191 | int ret = 0; | ||
171 | 192 | ||
172 | this_tid = transaction->t_tid; | 193 | this_tid = transaction->t_tid; |
173 | restart: | 194 | restart: |
174 | /* Did somebody clean up the transaction in the meanwhile? */ | 195 | /* Did somebody clean up the transaction in the meanwhile? */ |
175 | if (journal->j_checkpoint_transactions != transaction || | 196 | if (journal->j_checkpoint_transactions != transaction || |
176 | transaction->t_tid != this_tid) | 197 | transaction->t_tid != this_tid) |
177 | return; | 198 | return ret; |
178 | while (!released && transaction->t_checkpoint_io_list) { | 199 | while (!released && transaction->t_checkpoint_io_list) { |
179 | jh = transaction->t_checkpoint_io_list; | 200 | jh = transaction->t_checkpoint_io_list; |
180 | bh = jh2bh(jh); | 201 | bh = jh2bh(jh); |
@@ -194,6 +215,9 @@ restart: | |||
194 | spin_lock(&journal->j_list_lock); | 215 | spin_lock(&journal->j_list_lock); |
195 | goto restart; | 216 | goto restart; |
196 | } | 217 | } |
218 | if (unlikely(buffer_write_io_error(bh))) | ||
219 | ret = -EIO; | ||
220 | |||
197 | /* | 221 | /* |
198 | * Now in whatever state the buffer currently is, we know that | 222 | * Now in whatever state the buffer currently is, we know that |
199 | * it has been written out and so we can drop it from the list | 223 | * it has been written out and so we can drop it from the list |
@@ -203,6 +227,8 @@ restart: | |||
203 | jbd2_journal_remove_journal_head(bh); | 227 | jbd2_journal_remove_journal_head(bh); |
204 | __brelse(bh); | 228 | __brelse(bh); |
205 | } | 229 | } |
230 | |||
231 | return ret; | ||
206 | } | 232 | } |
207 | 233 | ||
208 | #define NR_BATCH 64 | 234 | #define NR_BATCH 64 |
@@ -226,7 +252,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | |||
226 | * Try to flush one buffer from the checkpoint list to disk. | 252 | * Try to flush one buffer from the checkpoint list to disk. |
227 | * | 253 | * |
228 | * Return 1 if something happened which requires us to abort the current | 254 | * Return 1 if something happened which requires us to abort the current |
229 | * scan of the checkpoint list. | 255 | * scan of the checkpoint list. Return <0 if the buffer has failed to |
256 | * be written out. | ||
230 | * | 257 | * |
231 | * Called with j_list_lock held and drops it if 1 is returned | 258 | * Called with j_list_lock held and drops it if 1 is returned |
232 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | 259 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it |
@@ -258,6 +285,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
258 | jbd2_log_wait_commit(journal, tid); | 285 | jbd2_log_wait_commit(journal, tid); |
259 | ret = 1; | 286 | ret = 1; |
260 | } else if (!buffer_dirty(bh)) { | 287 | } else if (!buffer_dirty(bh)) { |
288 | ret = 1; | ||
289 | if (unlikely(buffer_write_io_error(bh))) | ||
290 | ret = -EIO; | ||
261 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); | 291 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); |
262 | BUFFER_TRACE(bh, "remove from checkpoint"); | 292 | BUFFER_TRACE(bh, "remove from checkpoint"); |
263 | __jbd2_journal_remove_checkpoint(jh); | 293 | __jbd2_journal_remove_checkpoint(jh); |
@@ -265,7 +295,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
265 | jbd_unlock_bh_state(bh); | 295 | jbd_unlock_bh_state(bh); |
266 | jbd2_journal_remove_journal_head(bh); | 296 | jbd2_journal_remove_journal_head(bh); |
267 | __brelse(bh); | 297 | __brelse(bh); |
268 | ret = 1; | ||
269 | } else { | 298 | } else { |
270 | /* | 299 | /* |
271 | * Important: we are about to write the buffer, and | 300 | * Important: we are about to write the buffer, and |
@@ -298,6 +327,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
298 | * to disk. We submit larger chunks of data at once. | 327 | * to disk. We submit larger chunks of data at once. |
299 | * | 328 | * |
300 | * The journal should be locked before calling this function. | 329 | * The journal should be locked before calling this function. |
330 | * Called with j_checkpoint_mutex held. | ||
301 | */ | 331 | */ |
302 | int jbd2_log_do_checkpoint(journal_t *journal) | 332 | int jbd2_log_do_checkpoint(journal_t *journal) |
303 | { | 333 | { |
@@ -313,6 +343,8 @@ int jbd2_log_do_checkpoint(journal_t *journal) | |||
313 | * journal straight away. | 343 | * journal straight away. |
314 | */ | 344 | */ |
315 | result = jbd2_cleanup_journal_tail(journal); | 345 | result = jbd2_cleanup_journal_tail(journal); |
346 | trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d", | ||
347 | journal->j_devname, result); | ||
316 | jbd_debug(1, "cleanup_journal_tail returned %d\n", result); | 348 | jbd_debug(1, "cleanup_journal_tail returned %d\n", result); |
317 | if (result <= 0) | 349 | if (result <= 0) |
318 | return result; | 350 | return result; |
@@ -321,6 +353,7 @@ int jbd2_log_do_checkpoint(journal_t *journal) | |||
321 | * OK, we need to start writing disk blocks. Take one transaction | 353 | * OK, we need to start writing disk blocks. Take one transaction |
322 | * and write it. | 354 | * and write it. |
323 | */ | 355 | */ |
356 | result = 0; | ||
324 | spin_lock(&journal->j_list_lock); | 357 | spin_lock(&journal->j_list_lock); |
325 | if (!journal->j_checkpoint_transactions) | 358 | if (!journal->j_checkpoint_transactions) |
326 | goto out; | 359 | goto out; |
@@ -339,7 +372,7 @@ restart: | |||
339 | int batch_count = 0; | 372 | int batch_count = 0; |
340 | struct buffer_head *bhs[NR_BATCH]; | 373 | struct buffer_head *bhs[NR_BATCH]; |
341 | struct journal_head *jh; | 374 | struct journal_head *jh; |
342 | int retry = 0; | 375 | int retry = 0, err; |
343 | 376 | ||
344 | while (!retry && transaction->t_checkpoint_list) { | 377 | while (!retry && transaction->t_checkpoint_list) { |
345 | struct buffer_head *bh; | 378 | struct buffer_head *bh; |
@@ -353,6 +386,8 @@ restart: | |||
353 | } | 386 | } |
354 | retry = __process_buffer(journal, jh, bhs, &batch_count, | 387 | retry = __process_buffer(journal, jh, bhs, &batch_count, |
355 | transaction); | 388 | transaction); |
389 | if (retry < 0 && !result) | ||
390 | result = retry; | ||
356 | if (!retry && (need_resched() || | 391 | if (!retry && (need_resched() || |
357 | spin_needbreak(&journal->j_list_lock))) { | 392 | spin_needbreak(&journal->j_list_lock))) { |
358 | spin_unlock(&journal->j_list_lock); | 393 | spin_unlock(&journal->j_list_lock); |
@@ -377,14 +412,18 @@ restart: | |||
377 | * Now we have cleaned up the first transaction's checkpoint | 412 | * Now we have cleaned up the first transaction's checkpoint |
378 | * list. Let's clean up the second one | 413 | * list. Let's clean up the second one |
379 | */ | 414 | */ |
380 | __wait_cp_io(journal, transaction); | 415 | err = __wait_cp_io(journal, transaction); |
416 | if (!result) | ||
417 | result = err; | ||
381 | } | 418 | } |
382 | out: | 419 | out: |
383 | spin_unlock(&journal->j_list_lock); | 420 | spin_unlock(&journal->j_list_lock); |
384 | result = jbd2_cleanup_journal_tail(journal); | ||
385 | if (result < 0) | 421 | if (result < 0) |
386 | return result; | 422 | jbd2_journal_abort(journal, result); |
387 | return 0; | 423 | else |
424 | result = jbd2_cleanup_journal_tail(journal); | ||
425 | |||
426 | return (result < 0) ? result : 0; | ||
388 | } | 427 | } |
389 | 428 | ||
390 | /* | 429 | /* |
@@ -400,8 +439,9 @@ out: | |||
400 | * This is the only part of the journaling code which really needs to be | 439 | * This is the only part of the journaling code which really needs to be |
401 | * aware of transaction aborts. Checkpointing involves writing to the | 440 | * aware of transaction aborts. Checkpointing involves writing to the |
402 | * main filesystem area rather than to the journal, so it can proceed | 441 | * main filesystem area rather than to the journal, so it can proceed |
403 | * even in abort state, but we must not update the journal superblock if | 442 | * even in abort state, but we must not update the super block if |
404 | * we have an abort error outstanding. | 443 | * checkpointing may have failed. Otherwise, we would lose some metadata |
444 | * buffers which should be written-back to the filesystem. | ||
405 | */ | 445 | */ |
406 | 446 | ||
407 | int jbd2_cleanup_journal_tail(journal_t *journal) | 447 | int jbd2_cleanup_journal_tail(journal_t *journal) |
@@ -410,6 +450,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
410 | tid_t first_tid; | 450 | tid_t first_tid; |
411 | unsigned long blocknr, freed; | 451 | unsigned long blocknr, freed; |
412 | 452 | ||
453 | if (is_journal_aborted(journal)) | ||
454 | return 1; | ||
455 | |||
413 | /* OK, work out the oldest transaction remaining in the log, and | 456 | /* OK, work out the oldest transaction remaining in the log, and |
414 | * the log block it starts at. | 457 | * the log block it starts at. |
415 | * | 458 | * |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f2ad061e95ec..8b119e16aa36 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/time.h> | 16 | #include <linux/time.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | #include <linux/jbd2.h> | 18 | #include <linux/jbd2.h> |
19 | #include <linux/marker.h> | ||
19 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
20 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
21 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
@@ -126,8 +127,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
126 | 127 | ||
127 | JBUFFER_TRACE(descriptor, "submit commit block"); | 128 | JBUFFER_TRACE(descriptor, "submit commit block"); |
128 | lock_buffer(bh); | 129 | lock_buffer(bh); |
129 | get_bh(bh); | 130 | clear_buffer_dirty(bh); |
130 | set_buffer_dirty(bh); | ||
131 | set_buffer_uptodate(bh); | 131 | set_buffer_uptodate(bh); |
132 | bh->b_end_io = journal_end_buffer_io_sync; | 132 | bh->b_end_io = journal_end_buffer_io_sync; |
133 | 133 | ||
@@ -147,12 +147,9 @@ static int journal_submit_commit_record(journal_t *journal, | |||
147 | * to remember if we sent a barrier request | 147 | * to remember if we sent a barrier request |
148 | */ | 148 | */ |
149 | if (ret == -EOPNOTSUPP && barrier_done) { | 149 | if (ret == -EOPNOTSUPP && barrier_done) { |
150 | char b[BDEVNAME_SIZE]; | ||
151 | |||
152 | printk(KERN_WARNING | 150 | printk(KERN_WARNING |
153 | "JBD: barrier-based sync failed on %s - " | 151 | "JBD: barrier-based sync failed on %s - " |
154 | "disabling barriers\n", | 152 | "disabling barriers\n", journal->j_devname); |
155 | bdevname(journal->j_dev, b)); | ||
156 | spin_lock(&journal->j_state_lock); | 153 | spin_lock(&journal->j_state_lock); |
157 | journal->j_flags &= ~JBD2_BARRIER; | 154 | journal->j_flags &= ~JBD2_BARRIER; |
158 | spin_unlock(&journal->j_state_lock); | 155 | spin_unlock(&journal->j_state_lock); |
@@ -160,7 +157,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
160 | /* And try again, without the barrier */ | 157 | /* And try again, without the barrier */ |
161 | lock_buffer(bh); | 158 | lock_buffer(bh); |
162 | set_buffer_uptodate(bh); | 159 | set_buffer_uptodate(bh); |
163 | set_buffer_dirty(bh); | 160 | clear_buffer_dirty(bh); |
164 | ret = submit_bh(WRITE, bh); | 161 | ret = submit_bh(WRITE, bh); |
165 | } | 162 | } |
166 | *cbh = bh; | 163 | *cbh = bh; |
@@ -371,6 +368,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
371 | commit_transaction = journal->j_running_transaction; | 368 | commit_transaction = journal->j_running_transaction; |
372 | J_ASSERT(commit_transaction->t_state == T_RUNNING); | 369 | J_ASSERT(commit_transaction->t_state == T_RUNNING); |
373 | 370 | ||
371 | trace_mark(jbd2_start_commit, "dev %s transaction %d", | ||
372 | journal->j_devname, commit_transaction->t_tid); | ||
374 | jbd_debug(1, "JBD: starting commit of transaction %d\n", | 373 | jbd_debug(1, "JBD: starting commit of transaction %d\n", |
375 | commit_transaction->t_tid); | 374 | commit_transaction->t_tid); |
376 | 375 | ||
@@ -505,9 +504,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
505 | jh = commit_transaction->t_buffers; | 504 | jh = commit_transaction->t_buffers; |
506 | 505 | ||
507 | /* If we're in abort mode, we just un-journal the buffer and | 506 | /* If we're in abort mode, we just un-journal the buffer and |
508 | release it for background writing. */ | 507 | release it. */ |
509 | 508 | ||
510 | if (is_journal_aborted(journal)) { | 509 | if (is_journal_aborted(journal)) { |
510 | clear_buffer_jbddirty(jh2bh(jh)); | ||
511 | JBUFFER_TRACE(jh, "journal is aborting: refile"); | 511 | JBUFFER_TRACE(jh, "journal is aborting: refile"); |
512 | jbd2_journal_refile_buffer(journal, jh); | 512 | jbd2_journal_refile_buffer(journal, jh); |
513 | /* If that was the last one, we need to clean up | 513 | /* If that was the last one, we need to clean up |
@@ -681,11 +681,11 @@ start_journal_io: | |||
681 | */ | 681 | */ |
682 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | 682 | err = journal_finish_inode_data_buffers(journal, commit_transaction); |
683 | if (err) { | 683 | if (err) { |
684 | char b[BDEVNAME_SIZE]; | ||
685 | |||
686 | printk(KERN_WARNING | 684 | printk(KERN_WARNING |
687 | "JBD2: Detected IO errors while flushing file data " | 685 | "JBD2: Detected IO errors while flushing file data " |
688 | "on %s\n", bdevname(journal->j_fs_dev, b)); | 686 | "on %s\n", journal->j_devname); |
687 | if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) | ||
688 | jbd2_journal_abort(journal, err); | ||
689 | err = 0; | 689 | err = 0; |
690 | } | 690 | } |
691 | 691 | ||
@@ -786,6 +786,9 @@ wait_for_iobuf: | |||
786 | /* AKPM: bforget here */ | 786 | /* AKPM: bforget here */ |
787 | } | 787 | } |
788 | 788 | ||
789 | if (err) | ||
790 | jbd2_journal_abort(journal, err); | ||
791 | |||
789 | jbd_debug(3, "JBD: commit phase 5\n"); | 792 | jbd_debug(3, "JBD: commit phase 5\n"); |
790 | 793 | ||
791 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | 794 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
@@ -884,6 +887,8 @@ restart_loop: | |||
884 | if (buffer_jbddirty(bh)) { | 887 | if (buffer_jbddirty(bh)) { |
885 | JBUFFER_TRACE(jh, "add to new checkpointing trans"); | 888 | JBUFFER_TRACE(jh, "add to new checkpointing trans"); |
886 | __jbd2_journal_insert_checkpoint(jh, commit_transaction); | 889 | __jbd2_journal_insert_checkpoint(jh, commit_transaction); |
890 | if (is_journal_aborted(journal)) | ||
891 | clear_buffer_jbddirty(bh); | ||
887 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); | 892 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); |
888 | __jbd2_journal_refile_buffer(jh); | 893 | __jbd2_journal_refile_buffer(jh); |
889 | jbd_unlock_bh_state(bh); | 894 | jbd_unlock_bh_state(bh); |
@@ -990,6 +995,12 @@ restart_loop: | |||
990 | } | 995 | } |
991 | spin_unlock(&journal->j_list_lock); | 996 | spin_unlock(&journal->j_list_lock); |
992 | 997 | ||
998 | if (journal->j_commit_callback) | ||
999 | journal->j_commit_callback(journal, commit_transaction); | ||
1000 | |||
1001 | trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", | ||
1002 | journal->j_devname, commit_transaction->t_tid, | ||
1003 | journal->j_tail_sequence); | ||
993 | jbd_debug(1, "JBD: commit %d complete, head %d\n", | 1004 | jbd_debug(1, "JBD: commit %d complete, head %d\n", |
994 | journal->j_commit_sequence, journal->j_tail_sequence); | 1005 | journal->j_commit_sequence, journal->j_tail_sequence); |
995 | 1006 | ||
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 8207a01c4edb..783de118de92 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -597,13 +597,9 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, | |||
597 | if (ret) | 597 | if (ret) |
598 | *retp = ret; | 598 | *retp = ret; |
599 | else { | 599 | else { |
600 | char b[BDEVNAME_SIZE]; | ||
601 | |||
602 | printk(KERN_ALERT "%s: journal block not found " | 600 | printk(KERN_ALERT "%s: journal block not found " |
603 | "at offset %lu on %s\n", | 601 | "at offset %lu on %s\n", |
604 | __func__, | 602 | __func__, blocknr, journal->j_devname); |
605 | blocknr, | ||
606 | bdevname(journal->j_dev, b)); | ||
607 | err = -EIO; | 603 | err = -EIO; |
608 | __journal_abort_soft(journal, err); | 604 | __journal_abort_soft(journal, err); |
609 | } | 605 | } |
@@ -901,10 +897,7 @@ static struct proc_dir_entry *proc_jbd2_stats; | |||
901 | 897 | ||
902 | static void jbd2_stats_proc_init(journal_t *journal) | 898 | static void jbd2_stats_proc_init(journal_t *journal) |
903 | { | 899 | { |
904 | char name[BDEVNAME_SIZE]; | 900 | journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats); |
905 | |||
906 | bdevname(journal->j_dev, name); | ||
907 | journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); | ||
908 | if (journal->j_proc_entry) { | 901 | if (journal->j_proc_entry) { |
909 | proc_create_data("history", S_IRUGO, journal->j_proc_entry, | 902 | proc_create_data("history", S_IRUGO, journal->j_proc_entry, |
910 | &jbd2_seq_history_fops, journal); | 903 | &jbd2_seq_history_fops, journal); |
@@ -915,12 +908,9 @@ static void jbd2_stats_proc_init(journal_t *journal) | |||
915 | 908 | ||
916 | static void jbd2_stats_proc_exit(journal_t *journal) | 909 | static void jbd2_stats_proc_exit(journal_t *journal) |
917 | { | 910 | { |
918 | char name[BDEVNAME_SIZE]; | ||
919 | |||
920 | bdevname(journal->j_dev, name); | ||
921 | remove_proc_entry("info", journal->j_proc_entry); | 911 | remove_proc_entry("info", journal->j_proc_entry); |
922 | remove_proc_entry("history", journal->j_proc_entry); | 912 | remove_proc_entry("history", journal->j_proc_entry); |
923 | remove_proc_entry(name, proc_jbd2_stats); | 913 | remove_proc_entry(journal->j_devname, proc_jbd2_stats); |
924 | } | 914 | } |
925 | 915 | ||
926 | static void journal_init_stats(journal_t *journal) | 916 | static void journal_init_stats(journal_t *journal) |
@@ -1018,6 +1008,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, | |||
1018 | { | 1008 | { |
1019 | journal_t *journal = journal_init_common(); | 1009 | journal_t *journal = journal_init_common(); |
1020 | struct buffer_head *bh; | 1010 | struct buffer_head *bh; |
1011 | char *p; | ||
1021 | int n; | 1012 | int n; |
1022 | 1013 | ||
1023 | if (!journal) | 1014 | if (!journal) |
@@ -1039,6 +1030,10 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, | |||
1039 | journal->j_fs_dev = fs_dev; | 1030 | journal->j_fs_dev = fs_dev; |
1040 | journal->j_blk_offset = start; | 1031 | journal->j_blk_offset = start; |
1041 | journal->j_maxlen = len; | 1032 | journal->j_maxlen = len; |
1033 | bdevname(journal->j_dev, journal->j_devname); | ||
1034 | p = journal->j_devname; | ||
1035 | while ((p = strchr(p, '/'))) | ||
1036 | *p = '!'; | ||
1042 | jbd2_stats_proc_init(journal); | 1037 | jbd2_stats_proc_init(journal); |
1043 | 1038 | ||
1044 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); | 1039 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); |
@@ -1061,6 +1056,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) | |||
1061 | { | 1056 | { |
1062 | struct buffer_head *bh; | 1057 | struct buffer_head *bh; |
1063 | journal_t *journal = journal_init_common(); | 1058 | journal_t *journal = journal_init_common(); |
1059 | char *p; | ||
1064 | int err; | 1060 | int err; |
1065 | int n; | 1061 | int n; |
1066 | unsigned long long blocknr; | 1062 | unsigned long long blocknr; |
@@ -1070,6 +1066,12 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) | |||
1070 | 1066 | ||
1071 | journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; | 1067 | journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; |
1072 | journal->j_inode = inode; | 1068 | journal->j_inode = inode; |
1069 | bdevname(journal->j_dev, journal->j_devname); | ||
1070 | p = journal->j_devname; | ||
1071 | while ((p = strchr(p, '/'))) | ||
1072 | *p = '!'; | ||
1073 | p = journal->j_devname + strlen(journal->j_devname); | ||
1074 | sprintf(p, ":%lu", journal->j_inode->i_ino); | ||
1073 | jbd_debug(1, | 1075 | jbd_debug(1, |
1074 | "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", | 1076 | "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", |
1075 | journal, inode->i_sb->s_id, inode->i_ino, | 1077 | journal, inode->i_sb->s_id, inode->i_ino, |
@@ -1253,6 +1255,22 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
1253 | goto out; | 1255 | goto out; |
1254 | } | 1256 | } |
1255 | 1257 | ||
1258 | if (buffer_write_io_error(bh)) { | ||
1259 | /* | ||
1260 | * Oh, dear. A previous attempt to write the journal | ||
1261 | * superblock failed. This could happen because the | ||
1262 | * USB device was yanked out. Or it could happen to | ||
1263 | * be a transient write error and maybe the block will | ||
1264 | * be remapped. Nothing we can do but to retry the | ||
1265 | * write and hope for the best. | ||
1266 | */ | ||
1267 | printk(KERN_ERR "JBD2: previous I/O error detected " | ||
1268 | "for journal superblock update for %s.\n", | ||
1269 | journal->j_devname); | ||
1270 | clear_buffer_write_io_error(bh); | ||
1271 | set_buffer_uptodate(bh); | ||
1272 | } | ||
1273 | |||
1256 | spin_lock(&journal->j_state_lock); | 1274 | spin_lock(&journal->j_state_lock); |
1257 | jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", | 1275 | jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", |
1258 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); | 1276 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); |
@@ -1264,9 +1282,16 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
1264 | 1282 | ||
1265 | BUFFER_TRACE(bh, "marking dirty"); | 1283 | BUFFER_TRACE(bh, "marking dirty"); |
1266 | mark_buffer_dirty(bh); | 1284 | mark_buffer_dirty(bh); |
1267 | if (wait) | 1285 | if (wait) { |
1268 | sync_dirty_buffer(bh); | 1286 | sync_dirty_buffer(bh); |
1269 | else | 1287 | if (buffer_write_io_error(bh)) { |
1288 | printk(KERN_ERR "JBD2: I/O error detected " | ||
1289 | "when updating journal superblock for %s.\n", | ||
1290 | journal->j_devname); | ||
1291 | clear_buffer_write_io_error(bh); | ||
1292 | set_buffer_uptodate(bh); | ||
1293 | } | ||
1294 | } else | ||
1270 | ll_rw_block(SWRITE, 1, &bh); | 1295 | ll_rw_block(SWRITE, 1, &bh); |
1271 | 1296 | ||
1272 | out: | 1297 | out: |
@@ -1426,9 +1451,12 @@ recovery_error: | |||
1426 | * | 1451 | * |
1427 | * Release a journal_t structure once it is no longer in use by the | 1452 | * Release a journal_t structure once it is no longer in use by the |
1428 | * journaled object. | 1453 | * journaled object. |
1454 | * Return <0 if we couldn't clean up the journal. | ||
1429 | */ | 1455 | */ |
1430 | void jbd2_journal_destroy(journal_t *journal) | 1456 | int jbd2_journal_destroy(journal_t *journal) |
1431 | { | 1457 | { |
1458 | int err = 0; | ||
1459 | |||
1432 | /* Wait for the commit thread to wake up and die. */ | 1460 | /* Wait for the commit thread to wake up and die. */ |
1433 | journal_kill_thread(journal); | 1461 | journal_kill_thread(journal); |
1434 | 1462 | ||
@@ -1451,11 +1479,16 @@ void jbd2_journal_destroy(journal_t *journal) | |||
1451 | J_ASSERT(journal->j_checkpoint_transactions == NULL); | 1479 | J_ASSERT(journal->j_checkpoint_transactions == NULL); |
1452 | spin_unlock(&journal->j_list_lock); | 1480 | spin_unlock(&journal->j_list_lock); |
1453 | 1481 | ||
1454 | /* We can now mark the journal as empty. */ | ||
1455 | journal->j_tail = 0; | ||
1456 | journal->j_tail_sequence = ++journal->j_transaction_sequence; | ||
1457 | if (journal->j_sb_buffer) { | 1482 | if (journal->j_sb_buffer) { |
1458 | jbd2_journal_update_superblock(journal, 1); | 1483 | if (!is_journal_aborted(journal)) { |
1484 | /* We can now mark the journal as empty. */ | ||
1485 | journal->j_tail = 0; | ||
1486 | journal->j_tail_sequence = | ||
1487 | ++journal->j_transaction_sequence; | ||
1488 | jbd2_journal_update_superblock(journal, 1); | ||
1489 | } else { | ||
1490 | err = -EIO; | ||
1491 | } | ||
1459 | brelse(journal->j_sb_buffer); | 1492 | brelse(journal->j_sb_buffer); |
1460 | } | 1493 | } |
1461 | 1494 | ||
@@ -1467,6 +1500,8 @@ void jbd2_journal_destroy(journal_t *journal) | |||
1467 | jbd2_journal_destroy_revoke(journal); | 1500 | jbd2_journal_destroy_revoke(journal); |
1468 | kfree(journal->j_wbuf); | 1501 | kfree(journal->j_wbuf); |
1469 | kfree(journal); | 1502 | kfree(journal); |
1503 | |||
1504 | return err; | ||
1470 | } | 1505 | } |
1471 | 1506 | ||
1472 | 1507 | ||
@@ -1692,10 +1727,16 @@ int jbd2_journal_flush(journal_t *journal) | |||
1692 | spin_lock(&journal->j_list_lock); | 1727 | spin_lock(&journal->j_list_lock); |
1693 | while (!err && journal->j_checkpoint_transactions != NULL) { | 1728 | while (!err && journal->j_checkpoint_transactions != NULL) { |
1694 | spin_unlock(&journal->j_list_lock); | 1729 | spin_unlock(&journal->j_list_lock); |
1730 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1695 | err = jbd2_log_do_checkpoint(journal); | 1731 | err = jbd2_log_do_checkpoint(journal); |
1732 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1696 | spin_lock(&journal->j_list_lock); | 1733 | spin_lock(&journal->j_list_lock); |
1697 | } | 1734 | } |
1698 | spin_unlock(&journal->j_list_lock); | 1735 | spin_unlock(&journal->j_list_lock); |
1736 | |||
1737 | if (is_journal_aborted(journal)) | ||
1738 | return -EIO; | ||
1739 | |||
1699 | jbd2_cleanup_journal_tail(journal); | 1740 | jbd2_cleanup_journal_tail(journal); |
1700 | 1741 | ||
1701 | /* Finally, mark the journal as really needing no recovery. | 1742 | /* Finally, mark the journal as really needing no recovery. |
@@ -1717,7 +1758,7 @@ int jbd2_journal_flush(journal_t *journal) | |||
1717 | J_ASSERT(journal->j_head == journal->j_tail); | 1758 | J_ASSERT(journal->j_head == journal->j_tail); |
1718 | J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); | 1759 | J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); |
1719 | spin_unlock(&journal->j_state_lock); | 1760 | spin_unlock(&journal->j_state_lock); |
1720 | return err; | 1761 | return 0; |
1721 | } | 1762 | } |
1722 | 1763 | ||
1723 | /** | 1764 | /** |
@@ -1761,23 +1802,6 @@ int jbd2_journal_wipe(journal_t *journal, int write) | |||
1761 | } | 1802 | } |
1762 | 1803 | ||
1763 | /* | 1804 | /* |
1764 | * journal_dev_name: format a character string to describe on what | ||
1765 | * device this journal is present. | ||
1766 | */ | ||
1767 | |||
1768 | static const char *journal_dev_name(journal_t *journal, char *buffer) | ||
1769 | { | ||
1770 | struct block_device *bdev; | ||
1771 | |||
1772 | if (journal->j_inode) | ||
1773 | bdev = journal->j_inode->i_sb->s_bdev; | ||
1774 | else | ||
1775 | bdev = journal->j_dev; | ||
1776 | |||
1777 | return bdevname(bdev, buffer); | ||
1778 | } | ||
1779 | |||
1780 | /* | ||
1781 | * Journal abort has very specific semantics, which we describe | 1805 | * Journal abort has very specific semantics, which we describe |
1782 | * for journal abort. | 1806 | * for journal abort. |
1783 | * | 1807 | * |
@@ -1793,13 +1817,12 @@ static const char *journal_dev_name(journal_t *journal, char *buffer) | |||
1793 | void __jbd2_journal_abort_hard(journal_t *journal) | 1817 | void __jbd2_journal_abort_hard(journal_t *journal) |
1794 | { | 1818 | { |
1795 | transaction_t *transaction; | 1819 | transaction_t *transaction; |
1796 | char b[BDEVNAME_SIZE]; | ||
1797 | 1820 | ||
1798 | if (journal->j_flags & JBD2_ABORT) | 1821 | if (journal->j_flags & JBD2_ABORT) |
1799 | return; | 1822 | return; |
1800 | 1823 | ||
1801 | printk(KERN_ERR "Aborting journal on device %s.\n", | 1824 | printk(KERN_ERR "Aborting journal on device %s.\n", |
1802 | journal_dev_name(journal, b)); | 1825 | journal->j_devname); |
1803 | 1826 | ||
1804 | spin_lock(&journal->j_state_lock); | 1827 | spin_lock(&journal->j_state_lock); |
1805 | journal->j_flags |= JBD2_ABORT; | 1828 | journal->j_flags |= JBD2_ABORT; |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 058f50f65b76..73063285b13f 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -225,7 +225,7 @@ do { \ | |||
225 | */ | 225 | */ |
226 | int jbd2_journal_recover(journal_t *journal) | 226 | int jbd2_journal_recover(journal_t *journal) |
227 | { | 227 | { |
228 | int err; | 228 | int err, err2; |
229 | journal_superblock_t * sb; | 229 | journal_superblock_t * sb; |
230 | 230 | ||
231 | struct recovery_info info; | 231 | struct recovery_info info; |
@@ -263,7 +263,10 @@ int jbd2_journal_recover(journal_t *journal) | |||
263 | journal->j_transaction_sequence = ++info.end_transaction; | 263 | journal->j_transaction_sequence = ++info.end_transaction; |
264 | 264 | ||
265 | jbd2_journal_clear_revoke(journal); | 265 | jbd2_journal_clear_revoke(journal); |
266 | sync_blockdev(journal->j_fs_dev); | 266 | err2 = sync_blockdev(journal->j_fs_dev); |
267 | if (!err) | ||
268 | err = err2; | ||
269 | |||
267 | return err; | 270 | return err; |
268 | } | 271 | } |
269 | 272 | ||
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e5d540588fa9..39b7805a599a 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -52,6 +52,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
52 | transaction->t_expires = jiffies + journal->j_commit_interval; | 52 | transaction->t_expires = jiffies + journal->j_commit_interval; |
53 | spin_lock_init(&transaction->t_handle_lock); | 53 | spin_lock_init(&transaction->t_handle_lock); |
54 | INIT_LIST_HEAD(&transaction->t_inode_list); | 54 | INIT_LIST_HEAD(&transaction->t_inode_list); |
55 | INIT_LIST_HEAD(&transaction->t_private_list); | ||
55 | 56 | ||
56 | /* Set up the commit timer for the new transaction. */ | 57 | /* Set up the commit timer for the new transaction. */ |
57 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); | 58 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); |
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h index 31559f45fdde..4c41db91eaa4 100644 --- a/fs/jffs2/jffs2_fs_i.h +++ b/fs/jffs2/jffs2_fs_i.h | |||
@@ -12,7 +12,6 @@ | |||
12 | #ifndef _JFFS2_FS_I | 12 | #ifndef _JFFS2_FS_I |
13 | #define _JFFS2_FS_I | 13 | #define _JFFS2_FS_I |
14 | 14 | ||
15 | #include <linux/version.h> | ||
16 | #include <linux/rbtree.h> | 15 | #include <linux/rbtree.h> |
17 | #include <linux/posix_acl.h> | 16 | #include <linux/posix_acl.h> |
18 | #include <linux/mutex.h> | 17 | #include <linux/mutex.h> |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 3630718be395..0dae345e481b 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -199,7 +199,7 @@ enum { | |||
199 | Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask | 199 | Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask |
200 | }; | 200 | }; |
201 | 201 | ||
202 | static match_table_t tokens = { | 202 | static const match_table_t tokens = { |
203 | {Opt_integrity, "integrity"}, | 203 | {Opt_integrity, "integrity"}, |
204 | {Opt_nointegrity, "nointegrity"}, | 204 | {Opt_nointegrity, "nointegrity"}, |
205 | {Opt_iocharset, "iocharset=%s"}, | 205 | {Opt_iocharset, "iocharset=%s"}, |
diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile index 7725a0a9a555..97f6073ab339 100644 --- a/fs/lockd/Makefile +++ b/fs/lockd/Makefile | |||
@@ -5,6 +5,6 @@ | |||
5 | obj-$(CONFIG_LOCKD) += lockd.o | 5 | obj-$(CONFIG_LOCKD) += lockd.o |
6 | 6 | ||
7 | lockd-objs-y := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \ | 7 | lockd-objs-y := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \ |
8 | svcproc.o svcsubs.o mon.o xdr.o | 8 | svcproc.o svcsubs.o mon.o xdr.o grace.o |
9 | lockd-objs-$(CONFIG_LOCKD_V4) += xdr4.o svc4proc.o | 9 | lockd-objs-$(CONFIG_LOCKD_V4) += xdr4.o svc4proc.o |
10 | lockd-objs := $(lockd-objs-y) | 10 | lockd-objs := $(lockd-objs-y) |
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 0b45fd3a4bfd..8307dd64bf46 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c | |||
@@ -54,14 +54,13 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) | |||
54 | u32 nlm_version = (nlm_init->nfs_version == 2) ? 1 : 4; | 54 | u32 nlm_version = (nlm_init->nfs_version == 2) ? 1 : 4; |
55 | int status; | 55 | int status; |
56 | 56 | ||
57 | status = lockd_up(nlm_init->protocol); | 57 | status = lockd_up(); |
58 | if (status < 0) | 58 | if (status < 0) |
59 | return ERR_PTR(status); | 59 | return ERR_PTR(status); |
60 | 60 | ||
61 | host = nlmclnt_lookup_host((struct sockaddr_in *)nlm_init->address, | 61 | host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen, |
62 | nlm_init->protocol, nlm_version, | 62 | nlm_init->protocol, nlm_version, |
63 | nlm_init->hostname, | 63 | nlm_init->hostname); |
64 | strlen(nlm_init->hostname)); | ||
65 | if (host == NULL) { | 64 | if (host == NULL) { |
66 | lockd_down(); | 65 | lockd_down(); |
67 | return ERR_PTR(-ENOLCK); | 66 | return ERR_PTR(-ENOLCK); |
@@ -142,7 +141,7 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) | |||
142 | /* | 141 | /* |
143 | * The server lockd has called us back to tell us the lock was granted | 142 | * The server lockd has called us back to tell us the lock was granted |
144 | */ | 143 | */ |
145 | __be32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock) | 144 | __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock) |
146 | { | 145 | { |
147 | const struct file_lock *fl = &lock->fl; | 146 | const struct file_lock *fl = &lock->fl; |
148 | const struct nfs_fh *fh = &lock->fh; | 147 | const struct nfs_fh *fh = &lock->fh; |
@@ -166,7 +165,7 @@ __be32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock | |||
166 | */ | 165 | */ |
167 | if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) | 166 | if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) |
168 | continue; | 167 | continue; |
169 | if (!nlm_cmp_addr(&block->b_host->h_addr, addr)) | 168 | if (!nlm_cmp_addr(nlm_addr(block->b_host), addr)) |
170 | continue; | 169 | continue; |
171 | if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) | 170 | if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) |
172 | continue; | 171 | continue; |
@@ -216,7 +215,7 @@ reclaimer(void *ptr) | |||
216 | /* This one ensures that our parent doesn't terminate while the | 215 | /* This one ensures that our parent doesn't terminate while the |
217 | * reclaim is in progress */ | 216 | * reclaim is in progress */ |
218 | lock_kernel(); | 217 | lock_kernel(); |
219 | lockd_up(0); /* note: this cannot fail as lockd is already running */ | 218 | lockd_up(); /* note: this cannot fail as lockd is already running */ |
220 | 219 | ||
221 | dprintk("lockd: reclaiming locks for host %s\n", host->h_name); | 220 | dprintk("lockd: reclaiming locks for host %s\n", host->h_name); |
222 | 221 | ||
diff --git a/fs/lockd/grace.c b/fs/lockd/grace.c new file mode 100644 index 000000000000..183cc1f0af1c --- /dev/null +++ b/fs/lockd/grace.c | |||
@@ -0,0 +1,59 @@ | |||
1 | /* | ||
2 | * Common code for control of lockd and nfsv4 grace periods. | ||
3 | */ | ||
4 | |||
5 | #include <linux/module.h> | ||
6 | #include <linux/lockd/bind.h> | ||
7 | |||
8 | static LIST_HEAD(grace_list); | ||
9 | static DEFINE_SPINLOCK(grace_lock); | ||
10 | |||
11 | /** | ||
12 | * locks_start_grace | ||
13 | * @lm: who this grace period is for | ||
14 | * | ||
15 | * A grace period is a period during which locks should not be given | ||
16 | * out. Currently grace periods are only enforced by the two lock | ||
17 | * managers (lockd and nfsd), using the locks_in_grace() function to | ||
18 | * check when they are in a grace period. | ||
19 | * | ||
20 | * This function is called to start a grace period. | ||
21 | */ | ||
22 | void locks_start_grace(struct lock_manager *lm) | ||
23 | { | ||
24 | spin_lock(&grace_lock); | ||
25 | list_add(&lm->list, &grace_list); | ||
26 | spin_unlock(&grace_lock); | ||
27 | } | ||
28 | EXPORT_SYMBOL_GPL(locks_start_grace); | ||
29 | |||
30 | /** | ||
31 | * locks_end_grace | ||
32 | * @lm: who this grace period is for | ||
33 | * | ||
34 | * Call this function to state that the given lock manager is ready to | ||
35 | * resume regular locking. The grace period will not end until all lock | ||
36 | * managers that called locks_start_grace() also call locks_end_grace(). | ||
37 | * Note that callers count on it being safe to call this more than once, | ||
38 | * and the second call should be a no-op. | ||
39 | */ | ||
40 | void locks_end_grace(struct lock_manager *lm) | ||
41 | { | ||
42 | spin_lock(&grace_lock); | ||
43 | list_del_init(&lm->list); | ||
44 | spin_unlock(&grace_lock); | ||
45 | } | ||
46 | EXPORT_SYMBOL_GPL(locks_end_grace); | ||
47 | |||
48 | /** | ||
49 | * locks_in_grace | ||
50 | * | ||
51 | * Lock managers call this function to determine when it is OK for them | ||
52 | * to answer ordinary lock requests, and when they should accept only | ||
53 | * lock reclaims. | ||
54 | */ | ||
55 | int locks_in_grace(void) | ||
56 | { | ||
57 | return !list_empty(&grace_list); | ||
58 | } | ||
59 | EXPORT_SYMBOL_GPL(locks_in_grace); | ||
diff --git a/fs/lockd/host.c b/fs/lockd/host.c index a17664c7eacc..9fd8889097b7 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c | |||
@@ -11,16 +11,17 @@ | |||
11 | #include <linux/types.h> | 11 | #include <linux/types.h> |
12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
13 | #include <linux/in.h> | 13 | #include <linux/in.h> |
14 | #include <linux/in6.h> | ||
14 | #include <linux/sunrpc/clnt.h> | 15 | #include <linux/sunrpc/clnt.h> |
15 | #include <linux/sunrpc/svc.h> | 16 | #include <linux/sunrpc/svc.h> |
16 | #include <linux/lockd/lockd.h> | 17 | #include <linux/lockd/lockd.h> |
17 | #include <linux/lockd/sm_inter.h> | 18 | #include <linux/lockd/sm_inter.h> |
18 | #include <linux/mutex.h> | 19 | #include <linux/mutex.h> |
19 | 20 | ||
21 | #include <net/ipv6.h> | ||
20 | 22 | ||
21 | #define NLMDBG_FACILITY NLMDBG_HOSTCACHE | 23 | #define NLMDBG_FACILITY NLMDBG_HOSTCACHE |
22 | #define NLM_HOST_NRHASH 32 | 24 | #define NLM_HOST_NRHASH 32 |
23 | #define NLM_ADDRHASH(addr) (ntohl(addr) & (NLM_HOST_NRHASH-1)) | ||
24 | #define NLM_HOST_REBIND (60 * HZ) | 25 | #define NLM_HOST_REBIND (60 * HZ) |
25 | #define NLM_HOST_EXPIRE (300 * HZ) | 26 | #define NLM_HOST_EXPIRE (300 * HZ) |
26 | #define NLM_HOST_COLLECT (120 * HZ) | 27 | #define NLM_HOST_COLLECT (120 * HZ) |
@@ -30,42 +31,115 @@ static unsigned long next_gc; | |||
30 | static int nrhosts; | 31 | static int nrhosts; |
31 | static DEFINE_MUTEX(nlm_host_mutex); | 32 | static DEFINE_MUTEX(nlm_host_mutex); |
32 | 33 | ||
33 | |||
34 | static void nlm_gc_hosts(void); | 34 | static void nlm_gc_hosts(void); |
35 | static struct nsm_handle * __nsm_find(const struct sockaddr_in *, | 35 | static struct nsm_handle *nsm_find(const struct sockaddr *sap, |
36 | const char *, unsigned int, int); | 36 | const size_t salen, |
37 | static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, | 37 | const char *hostname, |
38 | const char *hostname, | 38 | const size_t hostname_len, |
39 | unsigned int hostname_len); | 39 | const int create); |
40 | |||
41 | struct nlm_lookup_host_info { | ||
42 | const int server; /* search for server|client */ | ||
43 | const struct sockaddr *sap; /* address to search for */ | ||
44 | const size_t salen; /* it's length */ | ||
45 | const unsigned short protocol; /* transport to search for*/ | ||
46 | const u32 version; /* NLM version to search for */ | ||
47 | const char *hostname; /* remote's hostname */ | ||
48 | const size_t hostname_len; /* it's length */ | ||
49 | const struct sockaddr *src_sap; /* our address (optional) */ | ||
50 | const size_t src_len; /* it's length */ | ||
51 | }; | ||
52 | |||
53 | /* | ||
54 | * Hash function must work well on big- and little-endian platforms | ||
55 | */ | ||
56 | static unsigned int __nlm_hash32(const __be32 n) | ||
57 | { | ||
58 | unsigned int hash = (__force u32)n ^ ((__force u32)n >> 16); | ||
59 | return hash ^ (hash >> 8); | ||
60 | } | ||
61 | |||
62 | static unsigned int __nlm_hash_addr4(const struct sockaddr *sap) | ||
63 | { | ||
64 | const struct sockaddr_in *sin = (struct sockaddr_in *)sap; | ||
65 | return __nlm_hash32(sin->sin_addr.s_addr); | ||
66 | } | ||
67 | |||
68 | static unsigned int __nlm_hash_addr6(const struct sockaddr *sap) | ||
69 | { | ||
70 | const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; | ||
71 | const struct in6_addr addr = sin6->sin6_addr; | ||
72 | return __nlm_hash32(addr.s6_addr32[0]) ^ | ||
73 | __nlm_hash32(addr.s6_addr32[1]) ^ | ||
74 | __nlm_hash32(addr.s6_addr32[2]) ^ | ||
75 | __nlm_hash32(addr.s6_addr32[3]); | ||
76 | } | ||
77 | |||
78 | static unsigned int nlm_hash_address(const struct sockaddr *sap) | ||
79 | { | ||
80 | unsigned int hash; | ||
81 | |||
82 | switch (sap->sa_family) { | ||
83 | case AF_INET: | ||
84 | hash = __nlm_hash_addr4(sap); | ||
85 | break; | ||
86 | case AF_INET6: | ||
87 | hash = __nlm_hash_addr6(sap); | ||
88 | break; | ||
89 | default: | ||
90 | hash = 0; | ||
91 | } | ||
92 | return hash & (NLM_HOST_NRHASH - 1); | ||
93 | } | ||
94 | |||
95 | static void nlm_clear_port(struct sockaddr *sap) | ||
96 | { | ||
97 | switch (sap->sa_family) { | ||
98 | case AF_INET: | ||
99 | ((struct sockaddr_in *)sap)->sin_port = 0; | ||
100 | break; | ||
101 | case AF_INET6: | ||
102 | ((struct sockaddr_in6 *)sap)->sin6_port = 0; | ||
103 | break; | ||
104 | } | ||
105 | } | ||
106 | |||
107 | static void nlm_display_address(const struct sockaddr *sap, | ||
108 | char *buf, const size_t len) | ||
109 | { | ||
110 | const struct sockaddr_in *sin = (struct sockaddr_in *)sap; | ||
111 | const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; | ||
112 | |||
113 | switch (sap->sa_family) { | ||
114 | case AF_UNSPEC: | ||
115 | snprintf(buf, len, "unspecified"); | ||
116 | break; | ||
117 | case AF_INET: | ||
118 | snprintf(buf, len, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr)); | ||
119 | break; | ||
120 | case AF_INET6: | ||
121 | if (ipv6_addr_v4mapped(&sin6->sin6_addr)) | ||
122 | snprintf(buf, len, NIPQUAD_FMT, | ||
123 | NIPQUAD(sin6->sin6_addr.s6_addr32[3])); | ||
124 | else | ||
125 | snprintf(buf, len, NIP6_FMT, NIP6(sin6->sin6_addr)); | ||
126 | break; | ||
127 | default: | ||
128 | snprintf(buf, len, "unsupported address family"); | ||
129 | break; | ||
130 | } | ||
131 | } | ||
40 | 132 | ||
41 | /* | 133 | /* |
42 | * Common host lookup routine for server & client | 134 | * Common host lookup routine for server & client |
43 | */ | 135 | */ |
44 | static struct nlm_host *nlm_lookup_host(int server, | 136 | static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) |
45 | const struct sockaddr_in *sin, | ||
46 | int proto, u32 version, | ||
47 | const char *hostname, | ||
48 | unsigned int hostname_len, | ||
49 | const struct sockaddr_in *ssin) | ||
50 | { | 137 | { |
51 | struct hlist_head *chain; | 138 | struct hlist_head *chain; |
52 | struct hlist_node *pos; | 139 | struct hlist_node *pos; |
53 | struct nlm_host *host; | 140 | struct nlm_host *host; |
54 | struct nsm_handle *nsm = NULL; | 141 | struct nsm_handle *nsm = NULL; |
55 | int hash; | ||
56 | |||
57 | dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT | ||
58 | ", p=%d, v=%u, my role=%s, name=%.*s)\n", | ||
59 | NIPQUAD(ssin->sin_addr.s_addr), | ||
60 | NIPQUAD(sin->sin_addr.s_addr), proto, version, | ||
61 | server? "server" : "client", | ||
62 | hostname_len, | ||
63 | hostname? hostname : "<none>"); | ||
64 | 142 | ||
65 | |||
66 | hash = NLM_ADDRHASH(sin->sin_addr.s_addr); | ||
67 | |||
68 | /* Lock hash table */ | ||
69 | mutex_lock(&nlm_host_mutex); | 143 | mutex_lock(&nlm_host_mutex); |
70 | 144 | ||
71 | if (time_after_eq(jiffies, next_gc)) | 145 | if (time_after_eq(jiffies, next_gc)) |
@@ -78,22 +152,22 @@ static struct nlm_host *nlm_lookup_host(int server, | |||
78 | * different NLM rpc_clients into one single nlm_host object. | 152 | * different NLM rpc_clients into one single nlm_host object. |
79 | * This would allow us to have one nlm_host per address. | 153 | * This would allow us to have one nlm_host per address. |
80 | */ | 154 | */ |
81 | chain = &nlm_hosts[hash]; | 155 | chain = &nlm_hosts[nlm_hash_address(ni->sap)]; |
82 | hlist_for_each_entry(host, pos, chain, h_hash) { | 156 | hlist_for_each_entry(host, pos, chain, h_hash) { |
83 | if (!nlm_cmp_addr(&host->h_addr, sin)) | 157 | if (!nlm_cmp_addr(nlm_addr(host), ni->sap)) |
84 | continue; | 158 | continue; |
85 | 159 | ||
86 | /* See if we have an NSM handle for this client */ | 160 | /* See if we have an NSM handle for this client */ |
87 | if (!nsm) | 161 | if (!nsm) |
88 | nsm = host->h_nsmhandle; | 162 | nsm = host->h_nsmhandle; |
89 | 163 | ||
90 | if (host->h_proto != proto) | 164 | if (host->h_proto != ni->protocol) |
91 | continue; | 165 | continue; |
92 | if (host->h_version != version) | 166 | if (host->h_version != ni->version) |
93 | continue; | 167 | continue; |
94 | if (host->h_server != server) | 168 | if (host->h_server != ni->server) |
95 | continue; | 169 | continue; |
96 | if (!nlm_cmp_addr(&host->h_saddr, ssin)) | 170 | if (!nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap)) |
97 | continue; | 171 | continue; |
98 | 172 | ||
99 | /* Move to head of hash chain. */ | 173 | /* Move to head of hash chain. */ |
@@ -101,30 +175,41 @@ static struct nlm_host *nlm_lookup_host(int server, | |||
101 | hlist_add_head(&host->h_hash, chain); | 175 | hlist_add_head(&host->h_hash, chain); |
102 | 176 | ||
103 | nlm_get_host(host); | 177 | nlm_get_host(host); |
178 | dprintk("lockd: nlm_lookup_host found host %s (%s)\n", | ||
179 | host->h_name, host->h_addrbuf); | ||
104 | goto out; | 180 | goto out; |
105 | } | 181 | } |
106 | if (nsm) | ||
107 | atomic_inc(&nsm->sm_count); | ||
108 | |||
109 | host = NULL; | ||
110 | 182 | ||
111 | /* Sadly, the host isn't in our hash table yet. See if | 183 | /* |
112 | * we have an NSM handle for it. If not, create one. | 184 | * The host wasn't in our hash table. If we don't |
185 | * have an NSM handle for it yet, create one. | ||
113 | */ | 186 | */ |
114 | if (!nsm && !(nsm = nsm_find(sin, hostname, hostname_len))) | 187 | if (nsm) |
115 | goto out; | 188 | atomic_inc(&nsm->sm_count); |
189 | else { | ||
190 | host = NULL; | ||
191 | nsm = nsm_find(ni->sap, ni->salen, | ||
192 | ni->hostname, ni->hostname_len, 1); | ||
193 | if (!nsm) { | ||
194 | dprintk("lockd: nlm_lookup_host failed; " | ||
195 | "no nsm handle\n"); | ||
196 | goto out; | ||
197 | } | ||
198 | } | ||
116 | 199 | ||
117 | host = kzalloc(sizeof(*host), GFP_KERNEL); | 200 | host = kzalloc(sizeof(*host), GFP_KERNEL); |
118 | if (!host) { | 201 | if (!host) { |
119 | nsm_release(nsm); | 202 | nsm_release(nsm); |
203 | dprintk("lockd: nlm_lookup_host failed; no memory\n"); | ||
120 | goto out; | 204 | goto out; |
121 | } | 205 | } |
122 | host->h_name = nsm->sm_name; | 206 | host->h_name = nsm->sm_name; |
123 | host->h_addr = *sin; | 207 | memcpy(nlm_addr(host), ni->sap, ni->salen); |
124 | host->h_addr.sin_port = 0; /* ouch! */ | 208 | host->h_addrlen = ni->salen; |
125 | host->h_saddr = *ssin; | 209 | nlm_clear_port(nlm_addr(host)); |
126 | host->h_version = version; | 210 | memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len); |
127 | host->h_proto = proto; | 211 | host->h_version = ni->version; |
212 | host->h_proto = ni->protocol; | ||
128 | host->h_rpcclnt = NULL; | 213 | host->h_rpcclnt = NULL; |
129 | mutex_init(&host->h_mutex); | 214 | mutex_init(&host->h_mutex); |
130 | host->h_nextrebind = jiffies + NLM_HOST_REBIND; | 215 | host->h_nextrebind = jiffies + NLM_HOST_REBIND; |
@@ -135,7 +220,7 @@ static struct nlm_host *nlm_lookup_host(int server, | |||
135 | host->h_state = 0; /* pseudo NSM state */ | 220 | host->h_state = 0; /* pseudo NSM state */ |
136 | host->h_nsmstate = 0; /* real NSM state */ | 221 | host->h_nsmstate = 0; /* real NSM state */ |
137 | host->h_nsmhandle = nsm; | 222 | host->h_nsmhandle = nsm; |
138 | host->h_server = server; | 223 | host->h_server = ni->server; |
139 | hlist_add_head(&host->h_hash, chain); | 224 | hlist_add_head(&host->h_hash, chain); |
140 | INIT_LIST_HEAD(&host->h_lockowners); | 225 | INIT_LIST_HEAD(&host->h_lockowners); |
141 | spin_lock_init(&host->h_lock); | 226 | spin_lock_init(&host->h_lock); |
@@ -143,6 +228,15 @@ static struct nlm_host *nlm_lookup_host(int server, | |||
143 | INIT_LIST_HEAD(&host->h_reclaim); | 228 | INIT_LIST_HEAD(&host->h_reclaim); |
144 | 229 | ||
145 | nrhosts++; | 230 | nrhosts++; |
231 | |||
232 | nlm_display_address((struct sockaddr *)&host->h_addr, | ||
233 | host->h_addrbuf, sizeof(host->h_addrbuf)); | ||
234 | nlm_display_address((struct sockaddr *)&host->h_srcaddr, | ||
235 | host->h_srcaddrbuf, sizeof(host->h_srcaddrbuf)); | ||
236 | |||
237 | dprintk("lockd: nlm_lookup_host created host %s\n", | ||
238 | host->h_name); | ||
239 | |||
146 | out: | 240 | out: |
147 | mutex_unlock(&nlm_host_mutex); | 241 | mutex_unlock(&nlm_host_mutex); |
148 | return host; | 242 | return host; |
@@ -170,33 +264,103 @@ nlm_destroy_host(struct nlm_host *host) | |||
170 | kfree(host); | 264 | kfree(host); |
171 | } | 265 | } |
172 | 266 | ||
173 | /* | 267 | /** |
174 | * Find an NLM server handle in the cache. If there is none, create it. | 268 | * nlmclnt_lookup_host - Find an NLM host handle matching a remote server |
269 | * @sap: network address of server | ||
270 | * @salen: length of server address | ||
271 | * @protocol: transport protocol to use | ||
272 | * @version: NLM protocol version | ||
273 | * @hostname: '\0'-terminated hostname of server | ||
274 | * | ||
275 | * Returns an nlm_host structure that matches the passed-in | ||
276 | * [server address, transport protocol, NLM version, server hostname]. | ||
277 | * If one doesn't already exist in the host cache, a new handle is | ||
278 | * created and returned. | ||
175 | */ | 279 | */ |
176 | struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *sin, | 280 | struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, |
177 | int proto, u32 version, | 281 | const size_t salen, |
178 | const char *hostname, | 282 | const unsigned short protocol, |
179 | unsigned int hostname_len) | 283 | const u32 version, const char *hostname) |
180 | { | 284 | { |
181 | struct sockaddr_in ssin = {0}; | 285 | const struct sockaddr source = { |
182 | 286 | .sa_family = AF_UNSPEC, | |
183 | return nlm_lookup_host(0, sin, proto, version, | 287 | }; |
184 | hostname, hostname_len, &ssin); | 288 | struct nlm_lookup_host_info ni = { |
289 | .server = 0, | ||
290 | .sap = sap, | ||
291 | .salen = salen, | ||
292 | .protocol = protocol, | ||
293 | .version = version, | ||
294 | .hostname = hostname, | ||
295 | .hostname_len = strlen(hostname), | ||
296 | .src_sap = &source, | ||
297 | .src_len = sizeof(source), | ||
298 | }; | ||
299 | |||
300 | dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__, | ||
301 | (hostname ? hostname : "<none>"), version, | ||
302 | (protocol == IPPROTO_UDP ? "udp" : "tcp")); | ||
303 | |||
304 | return nlm_lookup_host(&ni); | ||
185 | } | 305 | } |
186 | 306 | ||
187 | /* | 307 | /** |
188 | * Find an NLM client handle in the cache. If there is none, create it. | 308 | * nlmsvc_lookup_host - Find an NLM host handle matching a remote client |
309 | * @rqstp: incoming NLM request | ||
310 | * @hostname: name of client host | ||
311 | * @hostname_len: length of client hostname | ||
312 | * | ||
313 | * Returns an nlm_host structure that matches the [client address, | ||
314 | * transport protocol, NLM version, client hostname] of the passed-in | ||
315 | * NLM request. If one doesn't already exist in the host cache, a | ||
316 | * new handle is created and returned. | ||
317 | * | ||
318 | * Before possibly creating a new nlm_host, construct a sockaddr | ||
319 | * for a specific source address in case the local system has | ||
320 | * multiple network addresses. The family of the address in | ||
321 | * rq_daddr is guaranteed to be the same as the family of the | ||
322 | * address in rq_addr, so it's safe to use the same family for | ||
323 | * the source address. | ||
189 | */ | 324 | */ |
190 | struct nlm_host * | 325 | struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, |
191 | nlmsvc_lookup_host(struct svc_rqst *rqstp, | 326 | const char *hostname, |
192 | const char *hostname, unsigned int hostname_len) | 327 | const size_t hostname_len) |
193 | { | 328 | { |
194 | struct sockaddr_in ssin = {0}; | 329 | struct sockaddr_in sin = { |
330 | .sin_family = AF_INET, | ||
331 | }; | ||
332 | struct sockaddr_in6 sin6 = { | ||
333 | .sin6_family = AF_INET6, | ||
334 | }; | ||
335 | struct nlm_lookup_host_info ni = { | ||
336 | .server = 1, | ||
337 | .sap = svc_addr(rqstp), | ||
338 | .salen = rqstp->rq_addrlen, | ||
339 | .protocol = rqstp->rq_prot, | ||
340 | .version = rqstp->rq_vers, | ||
341 | .hostname = hostname, | ||
342 | .hostname_len = hostname_len, | ||
343 | .src_len = rqstp->rq_addrlen, | ||
344 | }; | ||
345 | |||
346 | dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__, | ||
347 | (int)hostname_len, hostname, rqstp->rq_vers, | ||
348 | (rqstp->rq_prot == IPPROTO_UDP ? "udp" : "tcp")); | ||
349 | |||
350 | switch (ni.sap->sa_family) { | ||
351 | case AF_INET: | ||
352 | sin.sin_addr.s_addr = rqstp->rq_daddr.addr.s_addr; | ||
353 | ni.src_sap = (struct sockaddr *)&sin; | ||
354 | break; | ||
355 | case AF_INET6: | ||
356 | ipv6_addr_copy(&sin6.sin6_addr, &rqstp->rq_daddr.addr6); | ||
357 | ni.src_sap = (struct sockaddr *)&sin6; | ||
358 | break; | ||
359 | default: | ||
360 | return NULL; | ||
361 | } | ||
195 | 362 | ||
196 | ssin.sin_addr = rqstp->rq_daddr.addr; | 363 | return nlm_lookup_host(&ni); |
197 | return nlm_lookup_host(1, svc_addr_in(rqstp), | ||
198 | rqstp->rq_prot, rqstp->rq_vers, | ||
199 | hostname, hostname_len, &ssin); | ||
200 | } | 364 | } |
201 | 365 | ||
202 | /* | 366 | /* |
@@ -207,9 +371,8 @@ nlm_bind_host(struct nlm_host *host) | |||
207 | { | 371 | { |
208 | struct rpc_clnt *clnt; | 372 | struct rpc_clnt *clnt; |
209 | 373 | ||
210 | dprintk("lockd: nlm_bind_host("NIPQUAD_FMT"->"NIPQUAD_FMT")\n", | 374 | dprintk("lockd: nlm_bind_host %s (%s), my addr=%s\n", |
211 | NIPQUAD(host->h_saddr.sin_addr), | 375 | host->h_name, host->h_addrbuf, host->h_srcaddrbuf); |
212 | NIPQUAD(host->h_addr.sin_addr)); | ||
213 | 376 | ||
214 | /* Lock host handle */ | 377 | /* Lock host handle */ |
215 | mutex_lock(&host->h_mutex); | 378 | mutex_lock(&host->h_mutex); |
@@ -221,7 +384,7 @@ nlm_bind_host(struct nlm_host *host) | |||
221 | if (time_after_eq(jiffies, host->h_nextrebind)) { | 384 | if (time_after_eq(jiffies, host->h_nextrebind)) { |
222 | rpc_force_rebind(clnt); | 385 | rpc_force_rebind(clnt); |
223 | host->h_nextrebind = jiffies + NLM_HOST_REBIND; | 386 | host->h_nextrebind = jiffies + NLM_HOST_REBIND; |
224 | dprintk("lockd: next rebind in %ld jiffies\n", | 387 | dprintk("lockd: next rebind in %lu jiffies\n", |
225 | host->h_nextrebind - jiffies); | 388 | host->h_nextrebind - jiffies); |
226 | } | 389 | } |
227 | } else { | 390 | } else { |
@@ -234,9 +397,9 @@ nlm_bind_host(struct nlm_host *host) | |||
234 | }; | 397 | }; |
235 | struct rpc_create_args args = { | 398 | struct rpc_create_args args = { |
236 | .protocol = host->h_proto, | 399 | .protocol = host->h_proto, |
237 | .address = (struct sockaddr *)&host->h_addr, | 400 | .address = nlm_addr(host), |
238 | .addrsize = sizeof(host->h_addr), | 401 | .addrsize = host->h_addrlen, |
239 | .saddress = (struct sockaddr *)&host->h_saddr, | 402 | .saddress = nlm_srcaddr(host), |
240 | .timeout = &timeparms, | 403 | .timeout = &timeparms, |
241 | .servername = host->h_name, | 404 | .servername = host->h_name, |
242 | .program = &nlm_program, | 405 | .program = &nlm_program, |
@@ -324,12 +487,16 @@ void nlm_host_rebooted(const struct sockaddr_in *sin, | |||
324 | struct nsm_handle *nsm; | 487 | struct nsm_handle *nsm; |
325 | struct nlm_host *host; | 488 | struct nlm_host *host; |
326 | 489 | ||
327 | dprintk("lockd: nlm_host_rebooted(%s, %u.%u.%u.%u)\n", | 490 | nsm = nsm_find((struct sockaddr *)sin, sizeof(*sin), |
328 | hostname, NIPQUAD(sin->sin_addr)); | 491 | hostname, hostname_len, 0); |
329 | 492 | if (nsm == NULL) { | |
330 | /* Find the NSM handle for this peer */ | 493 | dprintk("lockd: never saw rebooted peer '%.*s' before\n", |
331 | if (!(nsm = __nsm_find(sin, hostname, hostname_len, 0))) | 494 | hostname_len, hostname); |
332 | return; | 495 | return; |
496 | } | ||
497 | |||
498 | dprintk("lockd: nlm_host_rebooted(%.*s, %s)\n", | ||
499 | hostname_len, hostname, nsm->sm_addrbuf); | ||
333 | 500 | ||
334 | /* When reclaiming locks on this peer, make sure that | 501 | /* When reclaiming locks on this peer, make sure that |
335 | * we set up a new notification */ | 502 | * we set up a new notification */ |
@@ -461,22 +628,23 @@ nlm_gc_hosts(void) | |||
461 | static LIST_HEAD(nsm_handles); | 628 | static LIST_HEAD(nsm_handles); |
462 | static DEFINE_SPINLOCK(nsm_lock); | 629 | static DEFINE_SPINLOCK(nsm_lock); |
463 | 630 | ||
464 | static struct nsm_handle * | 631 | static struct nsm_handle *nsm_find(const struct sockaddr *sap, |
465 | __nsm_find(const struct sockaddr_in *sin, | 632 | const size_t salen, |
466 | const char *hostname, unsigned int hostname_len, | 633 | const char *hostname, |
467 | int create) | 634 | const size_t hostname_len, |
635 | const int create) | ||
468 | { | 636 | { |
469 | struct nsm_handle *nsm = NULL; | 637 | struct nsm_handle *nsm = NULL; |
470 | struct nsm_handle *pos; | 638 | struct nsm_handle *pos; |
471 | 639 | ||
472 | if (!sin) | 640 | if (!sap) |
473 | return NULL; | 641 | return NULL; |
474 | 642 | ||
475 | if (hostname && memchr(hostname, '/', hostname_len) != NULL) { | 643 | if (hostname && memchr(hostname, '/', hostname_len) != NULL) { |
476 | if (printk_ratelimit()) { | 644 | if (printk_ratelimit()) { |
477 | printk(KERN_WARNING "Invalid hostname \"%.*s\" " | 645 | printk(KERN_WARNING "Invalid hostname \"%.*s\" " |
478 | "in NFS lock request\n", | 646 | "in NFS lock request\n", |
479 | hostname_len, hostname); | 647 | (int)hostname_len, hostname); |
480 | } | 648 | } |
481 | return NULL; | 649 | return NULL; |
482 | } | 650 | } |
@@ -489,7 +657,7 @@ retry: | |||
489 | if (strlen(pos->sm_name) != hostname_len | 657 | if (strlen(pos->sm_name) != hostname_len |
490 | || memcmp(pos->sm_name, hostname, hostname_len)) | 658 | || memcmp(pos->sm_name, hostname, hostname_len)) |
491 | continue; | 659 | continue; |
492 | } else if (!nlm_cmp_addr(&pos->sm_addr, sin)) | 660 | } else if (!nlm_cmp_addr(nsm_addr(pos), sap)) |
493 | continue; | 661 | continue; |
494 | atomic_inc(&pos->sm_count); | 662 | atomic_inc(&pos->sm_count); |
495 | kfree(nsm); | 663 | kfree(nsm); |
@@ -509,10 +677,13 @@ retry: | |||
509 | if (nsm == NULL) | 677 | if (nsm == NULL) |
510 | return NULL; | 678 | return NULL; |
511 | 679 | ||
512 | nsm->sm_addr = *sin; | 680 | memcpy(nsm_addr(nsm), sap, salen); |
681 | nsm->sm_addrlen = salen; | ||
513 | nsm->sm_name = (char *) (nsm + 1); | 682 | nsm->sm_name = (char *) (nsm + 1); |
514 | memcpy(nsm->sm_name, hostname, hostname_len); | 683 | memcpy(nsm->sm_name, hostname, hostname_len); |
515 | nsm->sm_name[hostname_len] = '\0'; | 684 | nsm->sm_name[hostname_len] = '\0'; |
685 | nlm_display_address((struct sockaddr *)&nsm->sm_addr, | ||
686 | nsm->sm_addrbuf, sizeof(nsm->sm_addrbuf)); | ||
516 | atomic_set(&nsm->sm_count, 1); | 687 | atomic_set(&nsm->sm_count, 1); |
517 | goto retry; | 688 | goto retry; |
518 | 689 | ||
@@ -521,13 +692,6 @@ found: | |||
521 | return nsm; | 692 | return nsm; |
522 | } | 693 | } |
523 | 694 | ||
524 | static struct nsm_handle * | ||
525 | nsm_find(const struct sockaddr_in *sin, const char *hostname, | ||
526 | unsigned int hostname_len) | ||
527 | { | ||
528 | return __nsm_find(sin, hostname, hostname_len, 1); | ||
529 | } | ||
530 | |||
531 | /* | 695 | /* |
532 | * Release an NSM handle | 696 | * Release an NSM handle |
533 | */ | 697 | */ |
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index e4d563543b11..4e7e958e8f67 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c | |||
@@ -51,7 +51,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) | |||
51 | 51 | ||
52 | memset(&args, 0, sizeof(args)); | 52 | memset(&args, 0, sizeof(args)); |
53 | args.mon_name = nsm->sm_name; | 53 | args.mon_name = nsm->sm_name; |
54 | args.addr = nsm->sm_addr.sin_addr.s_addr; | 54 | args.addr = nsm_addr_in(nsm)->sin_addr.s_addr; |
55 | args.prog = NLM_PROGRAM; | 55 | args.prog = NLM_PROGRAM; |
56 | args.vers = 3; | 56 | args.vers = 3; |
57 | args.proc = NLMPROC_NSM_NOTIFY; | 57 | args.proc = NLMPROC_NSM_NOTIFY; |
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 5bd9bf0fa9df..c631a83931ce 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c | |||
@@ -51,7 +51,6 @@ static DEFINE_MUTEX(nlmsvc_mutex); | |||
51 | static unsigned int nlmsvc_users; | 51 | static unsigned int nlmsvc_users; |
52 | static struct task_struct *nlmsvc_task; | 52 | static struct task_struct *nlmsvc_task; |
53 | static struct svc_rqst *nlmsvc_rqst; | 53 | static struct svc_rqst *nlmsvc_rqst; |
54 | int nlmsvc_grace_period; | ||
55 | unsigned long nlmsvc_timeout; | 54 | unsigned long nlmsvc_timeout; |
56 | 55 | ||
57 | /* | 56 | /* |
@@ -85,27 +84,23 @@ static unsigned long get_lockd_grace_period(void) | |||
85 | return nlm_timeout * 5 * HZ; | 84 | return nlm_timeout * 5 * HZ; |
86 | } | 85 | } |
87 | 86 | ||
88 | unsigned long get_nfs_grace_period(void) | 87 | static struct lock_manager lockd_manager = { |
89 | { | 88 | }; |
90 | unsigned long lockdgrace = get_lockd_grace_period(); | ||
91 | unsigned long nfsdgrace = 0; | ||
92 | |||
93 | if (nlmsvc_ops) | ||
94 | nfsdgrace = nlmsvc_ops->get_grace_period(); | ||
95 | |||
96 | return max(lockdgrace, nfsdgrace); | ||
97 | } | ||
98 | EXPORT_SYMBOL(get_nfs_grace_period); | ||
99 | 89 | ||
100 | static unsigned long set_grace_period(void) | 90 | static void grace_ender(struct work_struct *not_used) |
101 | { | 91 | { |
102 | nlmsvc_grace_period = 1; | 92 | locks_end_grace(&lockd_manager); |
103 | return get_nfs_grace_period() + jiffies; | ||
104 | } | 93 | } |
105 | 94 | ||
106 | static inline void clear_grace_period(void) | 95 | static DECLARE_DELAYED_WORK(grace_period_end, grace_ender); |
96 | |||
97 | static void set_grace_period(void) | ||
107 | { | 98 | { |
108 | nlmsvc_grace_period = 0; | 99 | unsigned long grace_period = get_lockd_grace_period(); |
100 | |||
101 | locks_start_grace(&lockd_manager); | ||
102 | cancel_delayed_work_sync(&grace_period_end); | ||
103 | schedule_delayed_work(&grace_period_end, grace_period); | ||
109 | } | 104 | } |
110 | 105 | ||
111 | /* | 106 | /* |
@@ -116,7 +111,6 @@ lockd(void *vrqstp) | |||
116 | { | 111 | { |
117 | int err = 0, preverr = 0; | 112 | int err = 0, preverr = 0; |
118 | struct svc_rqst *rqstp = vrqstp; | 113 | struct svc_rqst *rqstp = vrqstp; |
119 | unsigned long grace_period_expire; | ||
120 | 114 | ||
121 | /* try_to_freeze() is called from svc_recv() */ | 115 | /* try_to_freeze() is called from svc_recv() */ |
122 | set_freezable(); | 116 | set_freezable(); |
@@ -139,7 +133,7 @@ lockd(void *vrqstp) | |||
139 | nlm_timeout = LOCKD_DFLT_TIMEO; | 133 | nlm_timeout = LOCKD_DFLT_TIMEO; |
140 | nlmsvc_timeout = nlm_timeout * HZ; | 134 | nlmsvc_timeout = nlm_timeout * HZ; |
141 | 135 | ||
142 | grace_period_expire = set_grace_period(); | 136 | set_grace_period(); |
143 | 137 | ||
144 | /* | 138 | /* |
145 | * The main request loop. We don't terminate until the last | 139 | * The main request loop. We don't terminate until the last |
@@ -153,21 +147,12 @@ lockd(void *vrqstp) | |||
153 | flush_signals(current); | 147 | flush_signals(current); |
154 | if (nlmsvc_ops) { | 148 | if (nlmsvc_ops) { |
155 | nlmsvc_invalidate_all(); | 149 | nlmsvc_invalidate_all(); |
156 | grace_period_expire = set_grace_period(); | 150 | set_grace_period(); |
157 | } | 151 | } |
158 | continue; | 152 | continue; |
159 | } | 153 | } |
160 | 154 | ||
161 | /* | 155 | timeout = nlmsvc_retry_blocked(); |
162 | * Retry any blocked locks that have been notified by | ||
163 | * the VFS. Don't do this during grace period. | ||
164 | * (Theoretically, there shouldn't even be blocked locks | ||
165 | * during grace period). | ||
166 | */ | ||
167 | if (!nlmsvc_grace_period) { | ||
168 | timeout = nlmsvc_retry_blocked(); | ||
169 | } else if (time_before(grace_period_expire, jiffies)) | ||
170 | clear_grace_period(); | ||
171 | 156 | ||
172 | /* | 157 | /* |
173 | * Find a socket with data available and call its | 158 | * Find a socket with data available and call its |
@@ -195,6 +180,7 @@ lockd(void *vrqstp) | |||
195 | svc_process(rqstp); | 180 | svc_process(rqstp); |
196 | } | 181 | } |
197 | flush_signals(current); | 182 | flush_signals(current); |
183 | cancel_delayed_work_sync(&grace_period_end); | ||
198 | if (nlmsvc_ops) | 184 | if (nlmsvc_ops) |
199 | nlmsvc_invalidate_all(); | 185 | nlmsvc_invalidate_all(); |
200 | nlm_shutdown_hosts(); | 186 | nlm_shutdown_hosts(); |
@@ -203,25 +189,28 @@ lockd(void *vrqstp) | |||
203 | } | 189 | } |
204 | 190 | ||
205 | /* | 191 | /* |
206 | * Make any sockets that are needed but not present. | 192 | * Ensure there are active UDP and TCP listeners for lockd. |
207 | * If nlm_udpport or nlm_tcpport were set as module | 193 | * |
208 | * options, make those sockets unconditionally | 194 | * Even if we have only TCP NFS mounts and/or TCP NFSDs, some |
195 | * local services (such as rpc.statd) still require UDP, and | ||
196 | * some NFS servers do not yet support NLM over TCP. | ||
197 | * | ||
198 | * Returns zero if all listeners are available; otherwise a | ||
199 | * negative errno value is returned. | ||
209 | */ | 200 | */ |
210 | static int make_socks(struct svc_serv *serv, int proto) | 201 | static int make_socks(struct svc_serv *serv) |
211 | { | 202 | { |
212 | static int warned; | 203 | static int warned; |
213 | struct svc_xprt *xprt; | 204 | struct svc_xprt *xprt; |
214 | int err = 0; | 205 | int err = 0; |
215 | 206 | ||
216 | if (proto == IPPROTO_UDP || nlm_udpport) { | 207 | xprt = svc_find_xprt(serv, "udp", 0, 0); |
217 | xprt = svc_find_xprt(serv, "udp", 0, 0); | 208 | if (!xprt) |
218 | if (!xprt) | 209 | err = svc_create_xprt(serv, "udp", nlm_udpport, |
219 | err = svc_create_xprt(serv, "udp", nlm_udpport, | 210 | SVC_SOCK_DEFAULTS); |
220 | SVC_SOCK_DEFAULTS); | 211 | else |
221 | else | 212 | svc_xprt_put(xprt); |
222 | svc_xprt_put(xprt); | 213 | if (err >= 0) { |
223 | } | ||
224 | if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport)) { | ||
225 | xprt = svc_find_xprt(serv, "tcp", 0, 0); | 214 | xprt = svc_find_xprt(serv, "tcp", 0, 0); |
226 | if (!xprt) | 215 | if (!xprt) |
227 | err = svc_create_xprt(serv, "tcp", nlm_tcpport, | 216 | err = svc_create_xprt(serv, "tcp", nlm_tcpport, |
@@ -241,8 +230,7 @@ static int make_socks(struct svc_serv *serv, int proto) | |||
241 | /* | 230 | /* |
242 | * Bring up the lockd process if it's not already up. | 231 | * Bring up the lockd process if it's not already up. |
243 | */ | 232 | */ |
244 | int | 233 | int lockd_up(void) |
245 | lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ | ||
246 | { | 234 | { |
247 | struct svc_serv *serv; | 235 | struct svc_serv *serv; |
248 | int error = 0; | 236 | int error = 0; |
@@ -251,11 +239,8 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ | |||
251 | /* | 239 | /* |
252 | * Check whether we're already up and running. | 240 | * Check whether we're already up and running. |
253 | */ | 241 | */ |
254 | if (nlmsvc_rqst) { | 242 | if (nlmsvc_rqst) |
255 | if (proto) | ||
256 | error = make_socks(nlmsvc_rqst->rq_server, proto); | ||
257 | goto out; | 243 | goto out; |
258 | } | ||
259 | 244 | ||
260 | /* | 245 | /* |
261 | * Sanity check: if there's no pid, | 246 | * Sanity check: if there's no pid, |
@@ -266,13 +251,14 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ | |||
266 | "lockd_up: no pid, %d users??\n", nlmsvc_users); | 251 | "lockd_up: no pid, %d users??\n", nlmsvc_users); |
267 | 252 | ||
268 | error = -ENOMEM; | 253 | error = -ENOMEM; |
269 | serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL); | 254 | serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, AF_INET, NULL); |
270 | if (!serv) { | 255 | if (!serv) { |
271 | printk(KERN_WARNING "lockd_up: create service failed\n"); | 256 | printk(KERN_WARNING "lockd_up: create service failed\n"); |
272 | goto out; | 257 | goto out; |
273 | } | 258 | } |
274 | 259 | ||
275 | if ((error = make_socks(serv, proto)) < 0) | 260 | error = make_socks(serv); |
261 | if (error < 0) | ||
276 | goto destroy_and_out; | 262 | goto destroy_and_out; |
277 | 263 | ||
278 | /* | 264 | /* |
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 4a714f64515b..014f6ce48172 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c | |||
@@ -88,12 +88,6 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
88 | dprintk("lockd: TEST4 called\n"); | 88 | dprintk("lockd: TEST4 called\n"); |
89 | resp->cookie = argp->cookie; | 89 | resp->cookie = argp->cookie; |
90 | 90 | ||
91 | /* Don't accept test requests during grace period */ | ||
92 | if (nlmsvc_grace_period) { | ||
93 | resp->status = nlm_lck_denied_grace_period; | ||
94 | return rc; | ||
95 | } | ||
96 | |||
97 | /* Obtain client and file */ | 91 | /* Obtain client and file */ |
98 | if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) | 92 | if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) |
99 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; | 93 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; |
@@ -122,12 +116,6 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
122 | 116 | ||
123 | resp->cookie = argp->cookie; | 117 | resp->cookie = argp->cookie; |
124 | 118 | ||
125 | /* Don't accept new lock requests during grace period */ | ||
126 | if (nlmsvc_grace_period && !argp->reclaim) { | ||
127 | resp->status = nlm_lck_denied_grace_period; | ||
128 | return rc; | ||
129 | } | ||
130 | |||
131 | /* Obtain client and file */ | 119 | /* Obtain client and file */ |
132 | if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) | 120 | if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) |
133 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; | 121 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; |
@@ -146,7 +134,8 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
146 | 134 | ||
147 | /* Now try to lock the file */ | 135 | /* Now try to lock the file */ |
148 | resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock, | 136 | resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock, |
149 | argp->block, &argp->cookie); | 137 | argp->block, &argp->cookie, |
138 | argp->reclaim); | ||
150 | if (resp->status == nlm_drop_reply) | 139 | if (resp->status == nlm_drop_reply) |
151 | rc = rpc_drop_reply; | 140 | rc = rpc_drop_reply; |
152 | else | 141 | else |
@@ -169,7 +158,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
169 | resp->cookie = argp->cookie; | 158 | resp->cookie = argp->cookie; |
170 | 159 | ||
171 | /* Don't accept requests during grace period */ | 160 | /* Don't accept requests during grace period */ |
172 | if (nlmsvc_grace_period) { | 161 | if (locks_in_grace()) { |
173 | resp->status = nlm_lck_denied_grace_period; | 162 | resp->status = nlm_lck_denied_grace_period; |
174 | return rpc_success; | 163 | return rpc_success; |
175 | } | 164 | } |
@@ -202,7 +191,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
202 | resp->cookie = argp->cookie; | 191 | resp->cookie = argp->cookie; |
203 | 192 | ||
204 | /* Don't accept new lock requests during grace period */ | 193 | /* Don't accept new lock requests during grace period */ |
205 | if (nlmsvc_grace_period) { | 194 | if (locks_in_grace()) { |
206 | resp->status = nlm_lck_denied_grace_period; | 195 | resp->status = nlm_lck_denied_grace_period; |
207 | return rpc_success; | 196 | return rpc_success; |
208 | } | 197 | } |
@@ -231,7 +220,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
231 | resp->cookie = argp->cookie; | 220 | resp->cookie = argp->cookie; |
232 | 221 | ||
233 | dprintk("lockd: GRANTED called\n"); | 222 | dprintk("lockd: GRANTED called\n"); |
234 | resp->status = nlmclnt_grant(svc_addr_in(rqstp), &argp->lock); | 223 | resp->status = nlmclnt_grant(svc_addr(rqstp), &argp->lock); |
235 | dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); | 224 | dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); |
236 | return rpc_success; | 225 | return rpc_success; |
237 | } | 226 | } |
@@ -341,7 +330,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
341 | resp->cookie = argp->cookie; | 330 | resp->cookie = argp->cookie; |
342 | 331 | ||
343 | /* Don't accept new lock requests during grace period */ | 332 | /* Don't accept new lock requests during grace period */ |
344 | if (nlmsvc_grace_period && !argp->reclaim) { | 333 | if (locks_in_grace() && !argp->reclaim) { |
345 | resp->status = nlm_lck_denied_grace_period; | 334 | resp->status = nlm_lck_denied_grace_period; |
346 | return rpc_success; | 335 | return rpc_success; |
347 | } | 336 | } |
@@ -374,7 +363,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
374 | resp->cookie = argp->cookie; | 363 | resp->cookie = argp->cookie; |
375 | 364 | ||
376 | /* Don't accept requests during grace period */ | 365 | /* Don't accept requests during grace period */ |
377 | if (nlmsvc_grace_period) { | 366 | if (locks_in_grace()) { |
378 | resp->status = nlm_lck_denied_grace_period; | 367 | resp->status = nlm_lck_denied_grace_period; |
379 | return rpc_success; | 368 | return rpc_success; |
380 | } | 369 | } |
@@ -432,11 +421,9 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, | |||
432 | { | 421 | { |
433 | struct sockaddr_in saddr; | 422 | struct sockaddr_in saddr; |
434 | 423 | ||
435 | memcpy(&saddr, svc_addr_in(rqstp), sizeof(saddr)); | ||
436 | |||
437 | dprintk("lockd: SM_NOTIFY called\n"); | 424 | dprintk("lockd: SM_NOTIFY called\n"); |
438 | if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) | 425 | |
439 | || ntohs(saddr.sin_port) >= 1024) { | 426 | if (!nlm_privileged_requester(rqstp)) { |
440 | char buf[RPC_MAX_ADDRBUFLEN]; | 427 | char buf[RPC_MAX_ADDRBUFLEN]; |
441 | printk(KERN_WARNING "lockd: rejected NSM callback from %s\n", | 428 | printk(KERN_WARNING "lockd: rejected NSM callback from %s\n", |
442 | svc_print_addr(rqstp, buf, sizeof(buf))); | 429 | svc_print_addr(rqstp, buf, sizeof(buf))); |
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index cf0d5c2c318d..6063a8e4b9f3 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
@@ -360,7 +360,7 @@ nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block) | |||
360 | __be32 | 360 | __be32 |
361 | nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, | 361 | nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, |
362 | struct nlm_host *host, struct nlm_lock *lock, int wait, | 362 | struct nlm_host *host, struct nlm_lock *lock, int wait, |
363 | struct nlm_cookie *cookie) | 363 | struct nlm_cookie *cookie, int reclaim) |
364 | { | 364 | { |
365 | struct nlm_block *block = NULL; | 365 | struct nlm_block *block = NULL; |
366 | int error; | 366 | int error; |
@@ -406,6 +406,15 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, | |||
406 | goto out; | 406 | goto out; |
407 | } | 407 | } |
408 | 408 | ||
409 | if (locks_in_grace() && !reclaim) { | ||
410 | ret = nlm_lck_denied_grace_period; | ||
411 | goto out; | ||
412 | } | ||
413 | if (reclaim && !locks_in_grace()) { | ||
414 | ret = nlm_lck_denied_grace_period; | ||
415 | goto out; | ||
416 | } | ||
417 | |||
409 | if (!wait) | 418 | if (!wait) |
410 | lock->fl.fl_flags &= ~FL_SLEEP; | 419 | lock->fl.fl_flags &= ~FL_SLEEP; |
411 | error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL); | 420 | error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL); |
@@ -502,6 +511,10 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, | |||
502 | goto out; | 511 | goto out; |
503 | } | 512 | } |
504 | 513 | ||
514 | if (locks_in_grace()) { | ||
515 | ret = nlm_lck_denied_grace_period; | ||
516 | goto out; | ||
517 | } | ||
505 | error = vfs_test_lock(file->f_file, &lock->fl); | 518 | error = vfs_test_lock(file->f_file, &lock->fl); |
506 | if (error == FILE_LOCK_DEFERRED) { | 519 | if (error == FILE_LOCK_DEFERRED) { |
507 | ret = nlmsvc_defer_lock_rqst(rqstp, block); | 520 | ret = nlmsvc_defer_lock_rqst(rqstp, block); |
@@ -582,6 +595,9 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock) | |||
582 | (long long)lock->fl.fl_start, | 595 | (long long)lock->fl.fl_start, |
583 | (long long)lock->fl.fl_end); | 596 | (long long)lock->fl.fl_end); |
584 | 597 | ||
598 | if (locks_in_grace()) | ||
599 | return nlm_lck_denied_grace_period; | ||
600 | |||
585 | mutex_lock(&file->f_mutex); | 601 | mutex_lock(&file->f_mutex); |
586 | block = nlmsvc_lookup_block(file, lock); | 602 | block = nlmsvc_lookup_block(file, lock); |
587 | mutex_unlock(&file->f_mutex); | 603 | mutex_unlock(&file->f_mutex); |
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 76262c1986f2..548b0bb2b84d 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c | |||
@@ -117,12 +117,6 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
117 | dprintk("lockd: TEST called\n"); | 117 | dprintk("lockd: TEST called\n"); |
118 | resp->cookie = argp->cookie; | 118 | resp->cookie = argp->cookie; |
119 | 119 | ||
120 | /* Don't accept test requests during grace period */ | ||
121 | if (nlmsvc_grace_period) { | ||
122 | resp->status = nlm_lck_denied_grace_period; | ||
123 | return rc; | ||
124 | } | ||
125 | |||
126 | /* Obtain client and file */ | 120 | /* Obtain client and file */ |
127 | if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) | 121 | if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) |
128 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; | 122 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; |
@@ -152,12 +146,6 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
152 | 146 | ||
153 | resp->cookie = argp->cookie; | 147 | resp->cookie = argp->cookie; |
154 | 148 | ||
155 | /* Don't accept new lock requests during grace period */ | ||
156 | if (nlmsvc_grace_period && !argp->reclaim) { | ||
157 | resp->status = nlm_lck_denied_grace_period; | ||
158 | return rc; | ||
159 | } | ||
160 | |||
161 | /* Obtain client and file */ | 149 | /* Obtain client and file */ |
162 | if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) | 150 | if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) |
163 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; | 151 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; |
@@ -176,7 +164,8 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
176 | 164 | ||
177 | /* Now try to lock the file */ | 165 | /* Now try to lock the file */ |
178 | resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock, | 166 | resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock, |
179 | argp->block, &argp->cookie)); | 167 | argp->block, &argp->cookie, |
168 | argp->reclaim)); | ||
180 | if (resp->status == nlm_drop_reply) | 169 | if (resp->status == nlm_drop_reply) |
181 | rc = rpc_drop_reply; | 170 | rc = rpc_drop_reply; |
182 | else | 171 | else |
@@ -199,7 +188,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
199 | resp->cookie = argp->cookie; | 188 | resp->cookie = argp->cookie; |
200 | 189 | ||
201 | /* Don't accept requests during grace period */ | 190 | /* Don't accept requests during grace period */ |
202 | if (nlmsvc_grace_period) { | 191 | if (locks_in_grace()) { |
203 | resp->status = nlm_lck_denied_grace_period; | 192 | resp->status = nlm_lck_denied_grace_period; |
204 | return rpc_success; | 193 | return rpc_success; |
205 | } | 194 | } |
@@ -232,7 +221,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
232 | resp->cookie = argp->cookie; | 221 | resp->cookie = argp->cookie; |
233 | 222 | ||
234 | /* Don't accept new lock requests during grace period */ | 223 | /* Don't accept new lock requests during grace period */ |
235 | if (nlmsvc_grace_period) { | 224 | if (locks_in_grace()) { |
236 | resp->status = nlm_lck_denied_grace_period; | 225 | resp->status = nlm_lck_denied_grace_period; |
237 | return rpc_success; | 226 | return rpc_success; |
238 | } | 227 | } |
@@ -261,7 +250,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
261 | resp->cookie = argp->cookie; | 250 | resp->cookie = argp->cookie; |
262 | 251 | ||
263 | dprintk("lockd: GRANTED called\n"); | 252 | dprintk("lockd: GRANTED called\n"); |
264 | resp->status = nlmclnt_grant(svc_addr_in(rqstp), &argp->lock); | 253 | resp->status = nlmclnt_grant(svc_addr(rqstp), &argp->lock); |
265 | dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); | 254 | dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); |
266 | return rpc_success; | 255 | return rpc_success; |
267 | } | 256 | } |
@@ -373,7 +362,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
373 | resp->cookie = argp->cookie; | 362 | resp->cookie = argp->cookie; |
374 | 363 | ||
375 | /* Don't accept new lock requests during grace period */ | 364 | /* Don't accept new lock requests during grace period */ |
376 | if (nlmsvc_grace_period && !argp->reclaim) { | 365 | if (locks_in_grace() && !argp->reclaim) { |
377 | resp->status = nlm_lck_denied_grace_period; | 366 | resp->status = nlm_lck_denied_grace_period; |
378 | return rpc_success; | 367 | return rpc_success; |
379 | } | 368 | } |
@@ -406,7 +395,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
406 | resp->cookie = argp->cookie; | 395 | resp->cookie = argp->cookie; |
407 | 396 | ||
408 | /* Don't accept requests during grace period */ | 397 | /* Don't accept requests during grace period */ |
409 | if (nlmsvc_grace_period) { | 398 | if (locks_in_grace()) { |
410 | resp->status = nlm_lck_denied_grace_period; | 399 | resp->status = nlm_lck_denied_grace_period; |
411 | return rpc_success; | 400 | return rpc_success; |
412 | } | 401 | } |
@@ -464,11 +453,9 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, | |||
464 | { | 453 | { |
465 | struct sockaddr_in saddr; | 454 | struct sockaddr_in saddr; |
466 | 455 | ||
467 | memcpy(&saddr, svc_addr_in(rqstp), sizeof(saddr)); | ||
468 | |||
469 | dprintk("lockd: SM_NOTIFY called\n"); | 456 | dprintk("lockd: SM_NOTIFY called\n"); |
470 | if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) | 457 | |
471 | || ntohs(saddr.sin_port) >= 1024) { | 458 | if (!nlm_privileged_requester(rqstp)) { |
472 | char buf[RPC_MAX_ADDRBUFLEN]; | 459 | char buf[RPC_MAX_ADDRBUFLEN]; |
473 | printk(KERN_WARNING "lockd: rejected NSM callback from %s\n", | 460 | printk(KERN_WARNING "lockd: rejected NSM callback from %s\n", |
474 | svc_print_addr(rqstp, buf, sizeof(buf))); | 461 | svc_print_addr(rqstp, buf, sizeof(buf))); |
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 198b4e55b373..34c2766e27c7 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c | |||
@@ -418,7 +418,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb); | |||
418 | static int | 418 | static int |
419 | nlmsvc_match_ip(void *datap, struct nlm_host *host) | 419 | nlmsvc_match_ip(void *datap, struct nlm_host *host) |
420 | { | 420 | { |
421 | return nlm_cmp_addr(&host->h_saddr, datap); | 421 | return nlm_cmp_addr(nlm_srcaddr(host), datap); |
422 | } | 422 | } |
423 | 423 | ||
424 | /** | 424 | /** |
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 3e459e18cc31..1f226290c67c 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c | |||
@@ -351,8 +351,6 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp) | |||
351 | argp->state = ntohl(*p++); | 351 | argp->state = ntohl(*p++); |
352 | /* Preserve the address in network byte order */ | 352 | /* Preserve the address in network byte order */ |
353 | argp->addr = *p++; | 353 | argp->addr = *p++; |
354 | argp->vers = *p++; | ||
355 | argp->proto = *p++; | ||
356 | return xdr_argsize_check(rqstp, p); | 354 | return xdr_argsize_check(rqstp, p); |
357 | } | 355 | } |
358 | 356 | ||
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 43ff9397e6c6..50c493a8ad8e 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c | |||
@@ -358,8 +358,6 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp | |||
358 | argp->state = ntohl(*p++); | 358 | argp->state = ntohl(*p++); |
359 | /* Preserve the address in network byte order */ | 359 | /* Preserve the address in network byte order */ |
360 | argp->addr = *p++; | 360 | argp->addr = *p++; |
361 | argp->vers = *p++; | ||
362 | argp->proto = *p++; | ||
363 | return xdr_argsize_check(rqstp, p); | 361 | return xdr_argsize_check(rqstp, p); |
364 | } | 362 | } |
365 | 363 | ||
diff --git a/fs/mpage.c b/fs/mpage.c index dbcc7af76a15..552b80b3facc 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -6,7 +6,7 @@ | |||
6 | * Contains functions related to preparing and submitting BIOs which contain | 6 | * Contains functions related to preparing and submitting BIOs which contain |
7 | * multiple pagecache pages. | 7 | * multiple pagecache pages. |
8 | * | 8 | * |
9 | * 15May2002 akpm@zip.com.au | 9 | * 15May2002 Andrew Morton |
10 | * Initial version | 10 | * Initial version |
11 | * 27Jun2002 axboe@suse.de | 11 | * 27Jun2002 axboe@suse.de |
12 | * use bio_add_page() to build bio's just the right size | 12 | * use bio_add_page() to build bio's just the right size |
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index f447f4b4476c..6a09760c5960 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -105,7 +105,8 @@ int nfs_callback_up(void) | |||
105 | mutex_lock(&nfs_callback_mutex); | 105 | mutex_lock(&nfs_callback_mutex); |
106 | if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) | 106 | if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) |
107 | goto out; | 107 | goto out; |
108 | serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); | 108 | serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, |
109 | AF_INET, NULL); | ||
109 | ret = -ENOMEM; | 110 | ret = -ENOMEM; |
110 | if (!serv) | 111 | if (!serv) |
111 | goto out_err; | 112 | goto out_err; |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 5ee23e7058b3..7547600b6174 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -675,7 +675,7 @@ static int nfs_init_server(struct nfs_server *server, | |||
675 | server->nfs_client = clp; | 675 | server->nfs_client = clp; |
676 | 676 | ||
677 | /* Initialise the client representation from the mount data */ | 677 | /* Initialise the client representation from the mount data */ |
678 | server->flags = data->flags & NFS_MOUNT_FLAGMASK; | 678 | server->flags = data->flags; |
679 | 679 | ||
680 | if (data->rsize) | 680 | if (data->rsize) |
681 | server->rsize = nfs_block_size(data->rsize, NULL); | 681 | server->rsize = nfs_block_size(data->rsize, NULL); |
@@ -850,7 +850,6 @@ static struct nfs_server *nfs_alloc_server(void) | |||
850 | INIT_LIST_HEAD(&server->client_link); | 850 | INIT_LIST_HEAD(&server->client_link); |
851 | INIT_LIST_HEAD(&server->master_link); | 851 | INIT_LIST_HEAD(&server->master_link); |
852 | 852 | ||
853 | init_waitqueue_head(&server->active_wq); | ||
854 | atomic_set(&server->active, 0); | 853 | atomic_set(&server->active, 0); |
855 | 854 | ||
856 | server->io_stats = nfs_alloc_iostats(); | 855 | server->io_stats = nfs_alloc_iostats(); |
@@ -1073,7 +1072,7 @@ static int nfs4_init_server(struct nfs_server *server, | |||
1073 | goto error; | 1072 | goto error; |
1074 | 1073 | ||
1075 | /* Initialise the client representation from the mount data */ | 1074 | /* Initialise the client representation from the mount data */ |
1076 | server->flags = data->flags & NFS_MOUNT_FLAGMASK; | 1075 | server->flags = data->flags; |
1077 | server->caps |= NFS_CAP_ATOMIC_OPEN; | 1076 | server->caps |= NFS_CAP_ATOMIC_OPEN; |
1078 | 1077 | ||
1079 | if (data->rsize) | 1078 | if (data->rsize) |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 74f92b717f78..2ab70d46ecbc 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -156,6 +156,7 @@ typedef struct { | |||
156 | decode_dirent_t decode; | 156 | decode_dirent_t decode; |
157 | int plus; | 157 | int plus; |
158 | unsigned long timestamp; | 158 | unsigned long timestamp; |
159 | unsigned long gencount; | ||
159 | int timestamp_valid; | 160 | int timestamp_valid; |
160 | } nfs_readdir_descriptor_t; | 161 | } nfs_readdir_descriptor_t; |
161 | 162 | ||
@@ -177,7 +178,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | |||
177 | struct file *file = desc->file; | 178 | struct file *file = desc->file; |
178 | struct inode *inode = file->f_path.dentry->d_inode; | 179 | struct inode *inode = file->f_path.dentry->d_inode; |
179 | struct rpc_cred *cred = nfs_file_cred(file); | 180 | struct rpc_cred *cred = nfs_file_cred(file); |
180 | unsigned long timestamp; | 181 | unsigned long timestamp, gencount; |
181 | int error; | 182 | int error; |
182 | 183 | ||
183 | dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", | 184 | dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", |
@@ -186,6 +187,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | |||
186 | 187 | ||
187 | again: | 188 | again: |
188 | timestamp = jiffies; | 189 | timestamp = jiffies; |
190 | gencount = nfs_inc_attr_generation_counter(); | ||
189 | error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, | 191 | error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, |
190 | NFS_SERVER(inode)->dtsize, desc->plus); | 192 | NFS_SERVER(inode)->dtsize, desc->plus); |
191 | if (error < 0) { | 193 | if (error < 0) { |
@@ -199,6 +201,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | |||
199 | goto error; | 201 | goto error; |
200 | } | 202 | } |
201 | desc->timestamp = timestamp; | 203 | desc->timestamp = timestamp; |
204 | desc->gencount = gencount; | ||
202 | desc->timestamp_valid = 1; | 205 | desc->timestamp_valid = 1; |
203 | SetPageUptodate(page); | 206 | SetPageUptodate(page); |
204 | /* Ensure consistent page alignment of the data. | 207 | /* Ensure consistent page alignment of the data. |
@@ -224,9 +227,10 @@ int dir_decode(nfs_readdir_descriptor_t *desc) | |||
224 | if (IS_ERR(p)) | 227 | if (IS_ERR(p)) |
225 | return PTR_ERR(p); | 228 | return PTR_ERR(p); |
226 | desc->ptr = p; | 229 | desc->ptr = p; |
227 | if (desc->timestamp_valid) | 230 | if (desc->timestamp_valid) { |
228 | desc->entry->fattr->time_start = desc->timestamp; | 231 | desc->entry->fattr->time_start = desc->timestamp; |
229 | else | 232 | desc->entry->fattr->gencount = desc->gencount; |
233 | } else | ||
230 | desc->entry->fattr->valid &= ~NFS_ATTR_FATTR; | 234 | desc->entry->fattr->valid &= ~NFS_ATTR_FATTR; |
231 | return 0; | 235 | return 0; |
232 | } | 236 | } |
@@ -471,7 +475,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
471 | struct rpc_cred *cred = nfs_file_cred(file); | 475 | struct rpc_cred *cred = nfs_file_cred(file); |
472 | struct page *page = NULL; | 476 | struct page *page = NULL; |
473 | int status; | 477 | int status; |
474 | unsigned long timestamp; | 478 | unsigned long timestamp, gencount; |
475 | 479 | ||
476 | dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", | 480 | dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", |
477 | (unsigned long long)*desc->dir_cookie); | 481 | (unsigned long long)*desc->dir_cookie); |
@@ -482,6 +486,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
482 | goto out; | 486 | goto out; |
483 | } | 487 | } |
484 | timestamp = jiffies; | 488 | timestamp = jiffies; |
489 | gencount = nfs_inc_attr_generation_counter(); | ||
485 | status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, | 490 | status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, |
486 | *desc->dir_cookie, page, | 491 | *desc->dir_cookie, page, |
487 | NFS_SERVER(inode)->dtsize, | 492 | NFS_SERVER(inode)->dtsize, |
@@ -490,6 +495,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
490 | desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ | 495 | desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ |
491 | if (status >= 0) { | 496 | if (status >= 0) { |
492 | desc->timestamp = timestamp; | 497 | desc->timestamp = timestamp; |
498 | desc->gencount = gencount; | ||
493 | desc->timestamp_valid = 1; | 499 | desc->timestamp_valid = 1; |
494 | if ((status = dir_decode(desc)) == 0) | 500 | if ((status = dir_decode(desc)) == 0) |
495 | desc->entry->prev_cookie = *desc->dir_cookie; | 501 | desc->entry->prev_cookie = *desc->dir_cookie; |
@@ -655,7 +661,7 @@ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) | |||
655 | */ | 661 | */ |
656 | void nfs_force_lookup_revalidate(struct inode *dir) | 662 | void nfs_force_lookup_revalidate(struct inode *dir) |
657 | { | 663 | { |
658 | NFS_I(dir)->cache_change_attribute = jiffies; | 664 | NFS_I(dir)->cache_change_attribute++; |
659 | } | 665 | } |
660 | 666 | ||
661 | /* | 667 | /* |
@@ -667,6 +673,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) | |||
667 | { | 673 | { |
668 | if (IS_ROOT(dentry)) | 674 | if (IS_ROOT(dentry)) |
669 | return 1; | 675 | return 1; |
676 | if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) | ||
677 | return 0; | ||
670 | if (!nfs_verify_change_attribute(dir, dentry->d_time)) | 678 | if (!nfs_verify_change_attribute(dir, dentry->d_time)) |
671 | return 0; | 679 | return 0; |
672 | /* Revalidate nfsi->cache_change_attribute before we declare a match */ | 680 | /* Revalidate nfsi->cache_change_attribute before we declare a match */ |
@@ -750,6 +758,8 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, | |||
750 | /* Don't revalidate a negative dentry if we're creating a new file */ | 758 | /* Don't revalidate a negative dentry if we're creating a new file */ |
751 | if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0) | 759 | if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0) |
752 | return 0; | 760 | return 0; |
761 | if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) | ||
762 | return 1; | ||
753 | return !nfs_check_verifier(dir, dentry); | 763 | return !nfs_check_verifier(dir, dentry); |
754 | } | 764 | } |
755 | 765 | ||
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 78460657f5cb..d319b49f8f06 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -188,13 +188,16 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) | |||
188 | /* origin == SEEK_END => we must revalidate the cached file length */ | 188 | /* origin == SEEK_END => we must revalidate the cached file length */ |
189 | if (origin == SEEK_END) { | 189 | if (origin == SEEK_END) { |
190 | struct inode *inode = filp->f_mapping->host; | 190 | struct inode *inode = filp->f_mapping->host; |
191 | |||
191 | int retval = nfs_revalidate_file_size(inode, filp); | 192 | int retval = nfs_revalidate_file_size(inode, filp); |
192 | if (retval < 0) | 193 | if (retval < 0) |
193 | return (loff_t)retval; | 194 | return (loff_t)retval; |
194 | } | 195 | |
195 | lock_kernel(); /* BKL needed? */ | 196 | spin_lock(&inode->i_lock); |
196 | loff = generic_file_llseek_unlocked(filp, offset, origin); | 197 | loff = generic_file_llseek_unlocked(filp, offset, origin); |
197 | unlock_kernel(); | 198 | spin_unlock(&inode->i_lock); |
199 | } else | ||
200 | loff = generic_file_llseek_unlocked(filp, offset, origin); | ||
198 | return loff; | 201 | return loff; |
199 | } | 202 | } |
200 | 203 | ||
@@ -699,13 +702,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) | |||
699 | filp->f_path.dentry->d_name.name, | 702 | filp->f_path.dentry->d_name.name, |
700 | fl->fl_type, fl->fl_flags); | 703 | fl->fl_type, fl->fl_flags); |
701 | 704 | ||
702 | /* | ||
703 | * No BSD flocks over NFS allowed. | ||
704 | * Note: we could try to fake a POSIX lock request here by | ||
705 | * using ((u32) filp | 0x80000000) or some such as the pid. | ||
706 | * Not sure whether that would be unique, though, or whether | ||
707 | * that would break in other places. | ||
708 | */ | ||
709 | if (!(fl->fl_flags & FL_FLOCK)) | 705 | if (!(fl->fl_flags & FL_FLOCK)) |
710 | return -ENOLCK; | 706 | return -ENOLCK; |
711 | 707 | ||
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 52daefa2f521..b9195c02a863 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -305,8 +305,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
305 | init_special_inode(inode, inode->i_mode, fattr->rdev); | 305 | init_special_inode(inode, inode->i_mode, fattr->rdev); |
306 | 306 | ||
307 | nfsi->read_cache_jiffies = fattr->time_start; | 307 | nfsi->read_cache_jiffies = fattr->time_start; |
308 | nfsi->last_updated = now; | 308 | nfsi->attr_gencount = fattr->gencount; |
309 | nfsi->cache_change_attribute = now; | ||
310 | inode->i_atime = fattr->atime; | 309 | inode->i_atime = fattr->atime; |
311 | inode->i_mtime = fattr->mtime; | 310 | inode->i_mtime = fattr->mtime; |
312 | inode->i_ctime = fattr->ctime; | 311 | inode->i_ctime = fattr->ctime; |
@@ -453,6 +452,7 @@ out_big: | |||
453 | void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) | 452 | void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) |
454 | { | 453 | { |
455 | if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { | 454 | if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { |
455 | spin_lock(&inode->i_lock); | ||
456 | if ((attr->ia_valid & ATTR_MODE) != 0) { | 456 | if ((attr->ia_valid & ATTR_MODE) != 0) { |
457 | int mode = attr->ia_mode & S_IALLUGO; | 457 | int mode = attr->ia_mode & S_IALLUGO; |
458 | mode |= inode->i_mode & ~S_IALLUGO; | 458 | mode |= inode->i_mode & ~S_IALLUGO; |
@@ -462,7 +462,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) | |||
462 | inode->i_uid = attr->ia_uid; | 462 | inode->i_uid = attr->ia_uid; |
463 | if ((attr->ia_valid & ATTR_GID) != 0) | 463 | if ((attr->ia_valid & ATTR_GID) != 0) |
464 | inode->i_gid = attr->ia_gid; | 464 | inode->i_gid = attr->ia_gid; |
465 | spin_lock(&inode->i_lock); | ||
466 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 465 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
467 | spin_unlock(&inode->i_lock); | 466 | spin_unlock(&inode->i_lock); |
468 | } | 467 | } |
@@ -472,37 +471,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) | |||
472 | } | 471 | } |
473 | } | 472 | } |
474 | 473 | ||
475 | static int nfs_wait_schedule(void *word) | ||
476 | { | ||
477 | if (signal_pending(current)) | ||
478 | return -ERESTARTSYS; | ||
479 | schedule(); | ||
480 | return 0; | ||
481 | } | ||
482 | |||
483 | /* | ||
484 | * Wait for the inode to get unlocked. | ||
485 | */ | ||
486 | static int nfs_wait_on_inode(struct inode *inode) | ||
487 | { | ||
488 | struct nfs_inode *nfsi = NFS_I(inode); | ||
489 | int error; | ||
490 | |||
491 | error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING, | ||
492 | nfs_wait_schedule, TASK_KILLABLE); | ||
493 | |||
494 | return error; | ||
495 | } | ||
496 | |||
497 | static void nfs_wake_up_inode(struct inode *inode) | ||
498 | { | ||
499 | struct nfs_inode *nfsi = NFS_I(inode); | ||
500 | |||
501 | clear_bit(NFS_INO_REVALIDATING, &nfsi->flags); | ||
502 | smp_mb__after_clear_bit(); | ||
503 | wake_up_bit(&nfsi->flags, NFS_INO_REVALIDATING); | ||
504 | } | ||
505 | |||
506 | int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | 474 | int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) |
507 | { | 475 | { |
508 | struct inode *inode = dentry->d_inode; | 476 | struct inode *inode = dentry->d_inode; |
@@ -697,20 +665,15 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
697 | dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", | 665 | dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", |
698 | inode->i_sb->s_id, (long long)NFS_FILEID(inode)); | 666 | inode->i_sb->s_id, (long long)NFS_FILEID(inode)); |
699 | 667 | ||
700 | nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); | ||
701 | if (is_bad_inode(inode)) | 668 | if (is_bad_inode(inode)) |
702 | goto out_nowait; | 669 | goto out; |
703 | if (NFS_STALE(inode)) | 670 | if (NFS_STALE(inode)) |
704 | goto out_nowait; | ||
705 | |||
706 | status = nfs_wait_on_inode(inode); | ||
707 | if (status < 0) | ||
708 | goto out; | 671 | goto out; |
709 | 672 | ||
710 | status = -ESTALE; | ||
711 | if (NFS_STALE(inode)) | 673 | if (NFS_STALE(inode)) |
712 | goto out; | 674 | goto out; |
713 | 675 | ||
676 | nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); | ||
714 | status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); | 677 | status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); |
715 | if (status != 0) { | 678 | if (status != 0) { |
716 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", | 679 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", |
@@ -724,16 +687,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
724 | goto out; | 687 | goto out; |
725 | } | 688 | } |
726 | 689 | ||
727 | spin_lock(&inode->i_lock); | 690 | status = nfs_refresh_inode(inode, &fattr); |
728 | status = nfs_update_inode(inode, &fattr); | ||
729 | if (status) { | 691 | if (status) { |
730 | spin_unlock(&inode->i_lock); | ||
731 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", | 692 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", |
732 | inode->i_sb->s_id, | 693 | inode->i_sb->s_id, |
733 | (long long)NFS_FILEID(inode), status); | 694 | (long long)NFS_FILEID(inode), status); |
734 | goto out; | 695 | goto out; |
735 | } | 696 | } |
736 | spin_unlock(&inode->i_lock); | ||
737 | 697 | ||
738 | if (nfsi->cache_validity & NFS_INO_INVALID_ACL) | 698 | if (nfsi->cache_validity & NFS_INO_INVALID_ACL) |
739 | nfs_zap_acl_cache(inode); | 699 | nfs_zap_acl_cache(inode); |
@@ -743,9 +703,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
743 | (long long)NFS_FILEID(inode)); | 703 | (long long)NFS_FILEID(inode)); |
744 | 704 | ||
745 | out: | 705 | out: |
746 | nfs_wake_up_inode(inode); | ||
747 | |||
748 | out_nowait: | ||
749 | return status; | 706 | return status; |
750 | } | 707 | } |
751 | 708 | ||
@@ -908,9 +865,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat | |||
908 | return -EIO; | 865 | return -EIO; |
909 | } | 866 | } |
910 | 867 | ||
911 | /* Do atomic weak cache consistency updates */ | ||
912 | nfs_wcc_update_inode(inode, fattr); | ||
913 | |||
914 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && | 868 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && |
915 | nfsi->change_attr != fattr->change_attr) | 869 | nfsi->change_attr != fattr->change_attr) |
916 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | 870 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; |
@@ -939,15 +893,81 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat | |||
939 | 893 | ||
940 | if (invalid != 0) | 894 | if (invalid != 0) |
941 | nfsi->cache_validity |= invalid; | 895 | nfsi->cache_validity |= invalid; |
942 | else | ||
943 | nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | ||
944 | | NFS_INO_INVALID_ATIME | ||
945 | | NFS_INO_REVAL_PAGECACHE); | ||
946 | 896 | ||
947 | nfsi->read_cache_jiffies = fattr->time_start; | 897 | nfsi->read_cache_jiffies = fattr->time_start; |
948 | return 0; | 898 | return 0; |
949 | } | 899 | } |
950 | 900 | ||
901 | static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr) | ||
902 | { | ||
903 | return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0; | ||
904 | } | ||
905 | |||
906 | static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr) | ||
907 | { | ||
908 | return nfs_size_to_loff_t(fattr->size) > i_size_read(inode); | ||
909 | } | ||
910 | |||
911 | static unsigned long nfs_attr_generation_counter; | ||
912 | |||
913 | static unsigned long nfs_read_attr_generation_counter(void) | ||
914 | { | ||
915 | smp_rmb(); | ||
916 | return nfs_attr_generation_counter; | ||
917 | } | ||
918 | |||
919 | unsigned long nfs_inc_attr_generation_counter(void) | ||
920 | { | ||
921 | unsigned long ret; | ||
922 | smp_rmb(); | ||
923 | ret = ++nfs_attr_generation_counter; | ||
924 | smp_wmb(); | ||
925 | return ret; | ||
926 | } | ||
927 | |||
928 | void nfs_fattr_init(struct nfs_fattr *fattr) | ||
929 | { | ||
930 | fattr->valid = 0; | ||
931 | fattr->time_start = jiffies; | ||
932 | fattr->gencount = nfs_inc_attr_generation_counter(); | ||
933 | } | ||
934 | |||
935 | /** | ||
936 | * nfs_inode_attrs_need_update - check if the inode attributes need updating | ||
937 | * @inode - pointer to inode | ||
938 | * @fattr - attributes | ||
939 | * | ||
940 | * Attempt to divine whether or not an RPC call reply carrying stale | ||
941 | * attributes got scheduled after another call carrying updated ones. | ||
942 | * | ||
943 | * To do so, the function first assumes that a more recent ctime means | ||
944 | * that the attributes in fattr are newer, however it also attempt to | ||
945 | * catch the case where ctime either didn't change, or went backwards | ||
946 | * (if someone reset the clock on the server) by looking at whether | ||
947 | * or not this RPC call was started after the inode was last updated. | ||
948 | * Note also the check for wraparound of 'attr_gencount' | ||
949 | * | ||
950 | * The function returns 'true' if it thinks the attributes in 'fattr' are | ||
951 | * more recent than the ones cached in the inode. | ||
952 | * | ||
953 | */ | ||
954 | static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr) | ||
955 | { | ||
956 | const struct nfs_inode *nfsi = NFS_I(inode); | ||
957 | |||
958 | return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || | ||
959 | nfs_ctime_need_update(inode, fattr) || | ||
960 | nfs_size_need_update(inode, fattr) || | ||
961 | ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); | ||
962 | } | ||
963 | |||
964 | static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) | ||
965 | { | ||
966 | if (nfs_inode_attrs_need_update(inode, fattr)) | ||
967 | return nfs_update_inode(inode, fattr); | ||
968 | return nfs_check_inode_attributes(inode, fattr); | ||
969 | } | ||
970 | |||
951 | /** | 971 | /** |
952 | * nfs_refresh_inode - try to update the inode attribute cache | 972 | * nfs_refresh_inode - try to update the inode attribute cache |
953 | * @inode - pointer to inode | 973 | * @inode - pointer to inode |
@@ -960,21 +980,28 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat | |||
960 | */ | 980 | */ |
961 | int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) | 981 | int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) |
962 | { | 982 | { |
963 | struct nfs_inode *nfsi = NFS_I(inode); | ||
964 | int status; | 983 | int status; |
965 | 984 | ||
966 | if ((fattr->valid & NFS_ATTR_FATTR) == 0) | 985 | if ((fattr->valid & NFS_ATTR_FATTR) == 0) |
967 | return 0; | 986 | return 0; |
968 | spin_lock(&inode->i_lock); | 987 | spin_lock(&inode->i_lock); |
969 | if (time_after(fattr->time_start, nfsi->last_updated)) | 988 | status = nfs_refresh_inode_locked(inode, fattr); |
970 | status = nfs_update_inode(inode, fattr); | ||
971 | else | ||
972 | status = nfs_check_inode_attributes(inode, fattr); | ||
973 | |||
974 | spin_unlock(&inode->i_lock); | 989 | spin_unlock(&inode->i_lock); |
975 | return status; | 990 | return status; |
976 | } | 991 | } |
977 | 992 | ||
993 | static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) | ||
994 | { | ||
995 | struct nfs_inode *nfsi = NFS_I(inode); | ||
996 | |||
997 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | ||
998 | if (S_ISDIR(inode->i_mode)) | ||
999 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | ||
1000 | if ((fattr->valid & NFS_ATTR_FATTR) == 0) | ||
1001 | return 0; | ||
1002 | return nfs_refresh_inode_locked(inode, fattr); | ||
1003 | } | ||
1004 | |||
978 | /** | 1005 | /** |
979 | * nfs_post_op_update_inode - try to update the inode attribute cache | 1006 | * nfs_post_op_update_inode - try to update the inode attribute cache |
980 | * @inode - pointer to inode | 1007 | * @inode - pointer to inode |
@@ -991,14 +1018,12 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
991 | */ | 1018 | */ |
992 | int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) | 1019 | int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) |
993 | { | 1020 | { |
994 | struct nfs_inode *nfsi = NFS_I(inode); | 1021 | int status; |
995 | 1022 | ||
996 | spin_lock(&inode->i_lock); | 1023 | spin_lock(&inode->i_lock); |
997 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | 1024 | status = nfs_post_op_update_inode_locked(inode, fattr); |
998 | if (S_ISDIR(inode->i_mode)) | ||
999 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | ||
1000 | spin_unlock(&inode->i_lock); | 1025 | spin_unlock(&inode->i_lock); |
1001 | return nfs_refresh_inode(inode, fattr); | 1026 | return status; |
1002 | } | 1027 | } |
1003 | 1028 | ||
1004 | /** | 1029 | /** |
@@ -1014,6 +1039,15 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1014 | */ | 1039 | */ |
1015 | int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) | 1040 | int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) |
1016 | { | 1041 | { |
1042 | int status; | ||
1043 | |||
1044 | spin_lock(&inode->i_lock); | ||
1045 | /* Don't do a WCC update if these attributes are already stale */ | ||
1046 | if ((fattr->valid & NFS_ATTR_FATTR) == 0 || | ||
1047 | !nfs_inode_attrs_need_update(inode, fattr)) { | ||
1048 | fattr->valid &= ~(NFS_ATTR_WCC_V4|NFS_ATTR_WCC); | ||
1049 | goto out_noforce; | ||
1050 | } | ||
1017 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && | 1051 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && |
1018 | (fattr->valid & NFS_ATTR_WCC_V4) == 0) { | 1052 | (fattr->valid & NFS_ATTR_WCC_V4) == 0) { |
1019 | fattr->pre_change_attr = NFS_I(inode)->change_attr; | 1053 | fattr->pre_change_attr = NFS_I(inode)->change_attr; |
@@ -1026,7 +1060,10 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa | |||
1026 | fattr->pre_size = i_size_read(inode); | 1060 | fattr->pre_size = i_size_read(inode); |
1027 | fattr->valid |= NFS_ATTR_WCC; | 1061 | fattr->valid |= NFS_ATTR_WCC; |
1028 | } | 1062 | } |
1029 | return nfs_post_op_update_inode(inode, fattr); | 1063 | out_noforce: |
1064 | status = nfs_post_op_update_inode_locked(inode, fattr); | ||
1065 | spin_unlock(&inode->i_lock); | ||
1066 | return status; | ||
1030 | } | 1067 | } |
1031 | 1068 | ||
1032 | /* | 1069 | /* |
@@ -1092,7 +1129,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1092 | } | 1129 | } |
1093 | /* If ctime has changed we should definitely clear access+acl caches */ | 1130 | /* If ctime has changed we should definitely clear access+acl caches */ |
1094 | if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) | 1131 | if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) |
1095 | invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 1132 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
1096 | } else if (nfsi->change_attr != fattr->change_attr) { | 1133 | } else if (nfsi->change_attr != fattr->change_attr) { |
1097 | dprintk("NFS: change_attr change on server for file %s/%ld\n", | 1134 | dprintk("NFS: change_attr change on server for file %s/%ld\n", |
1098 | inode->i_sb->s_id, inode->i_ino); | 1135 | inode->i_sb->s_id, inode->i_ino); |
@@ -1126,6 +1163,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1126 | inode->i_gid != fattr->gid) | 1163 | inode->i_gid != fattr->gid) |
1127 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 1164 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
1128 | 1165 | ||
1166 | if (inode->i_nlink != fattr->nlink) | ||
1167 | invalid |= NFS_INO_INVALID_ATTR; | ||
1168 | |||
1129 | inode->i_mode = fattr->mode; | 1169 | inode->i_mode = fattr->mode; |
1130 | inode->i_nlink = fattr->nlink; | 1170 | inode->i_nlink = fattr->nlink; |
1131 | inode->i_uid = fattr->uid; | 1171 | inode->i_uid = fattr->uid; |
@@ -1145,18 +1185,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1145 | nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); | 1185 | nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); |
1146 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); | 1186 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); |
1147 | nfsi->attrtimeo_timestamp = now; | 1187 | nfsi->attrtimeo_timestamp = now; |
1148 | nfsi->last_updated = now; | 1188 | nfsi->attr_gencount = nfs_inc_attr_generation_counter(); |
1149 | } else { | 1189 | } else { |
1150 | if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { | 1190 | if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { |
1151 | if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) | 1191 | if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) |
1152 | nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); | 1192 | nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); |
1153 | nfsi->attrtimeo_timestamp = now; | 1193 | nfsi->attrtimeo_timestamp = now; |
1154 | } | 1194 | } |
1155 | /* | ||
1156 | * Avoid jiffy wraparound issues with nfsi->last_updated | ||
1157 | */ | ||
1158 | if (!time_in_range(nfsi->last_updated, nfsi->read_cache_jiffies, now)) | ||
1159 | nfsi->last_updated = nfsi->read_cache_jiffies; | ||
1160 | } | 1195 | } |
1161 | invalid &= ~NFS_INO_INVALID_ATTR; | 1196 | invalid &= ~NFS_INO_INVALID_ATTR; |
1162 | /* Don't invalidate the data if we were to blame */ | 1197 | /* Don't invalidate the data if we were to blame */ |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 24241fcbb98d..d212ee41caf2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -153,6 +153,7 @@ extern void nfs4_clear_inode(struct inode *); | |||
153 | void nfs_zap_acl_cache(struct inode *inode); | 153 | void nfs_zap_acl_cache(struct inode *inode); |
154 | 154 | ||
155 | /* super.c */ | 155 | /* super.c */ |
156 | void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *); | ||
156 | extern struct file_system_type nfs_xdev_fs_type; | 157 | extern struct file_system_type nfs_xdev_fs_type; |
157 | #ifdef CONFIG_NFS_V4 | 158 | #ifdef CONFIG_NFS_V4 |
158 | extern struct file_system_type nfs4_xdev_fs_type; | 159 | extern struct file_system_type nfs4_xdev_fs_type; |
@@ -163,8 +164,8 @@ extern struct rpc_stat nfs_rpcstat; | |||
163 | 164 | ||
164 | extern int __init register_nfs_fs(void); | 165 | extern int __init register_nfs_fs(void); |
165 | extern void __exit unregister_nfs_fs(void); | 166 | extern void __exit unregister_nfs_fs(void); |
166 | extern void nfs_sb_active(struct nfs_server *server); | 167 | extern void nfs_sb_active(struct super_block *sb); |
167 | extern void nfs_sb_deactive(struct nfs_server *server); | 168 | extern void nfs_sb_deactive(struct super_block *sb); |
168 | 169 | ||
169 | /* namespace.c */ | 170 | /* namespace.c */ |
170 | extern char *nfs_path(const char *base, | 171 | extern char *nfs_path(const char *base, |
@@ -276,3 +277,23 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) | |||
276 | PAGE_SIZE - 1) >> PAGE_SHIFT; | 277 | PAGE_SIZE - 1) >> PAGE_SHIFT; |
277 | } | 278 | } |
278 | 279 | ||
280 | #define IPV6_SCOPE_DELIMITER '%' | ||
281 | |||
282 | /* | ||
283 | * Set the port number in an address. Be agnostic about the address | ||
284 | * family. | ||
285 | */ | ||
286 | static inline void nfs_set_port(struct sockaddr *sap, unsigned short port) | ||
287 | { | ||
288 | struct sockaddr_in *ap = (struct sockaddr_in *)sap; | ||
289 | struct sockaddr_in6 *ap6 = (struct sockaddr_in6 *)sap; | ||
290 | |||
291 | switch (sap->sa_family) { | ||
292 | case AF_INET: | ||
293 | ap->sin_port = htons(port); | ||
294 | break; | ||
295 | case AF_INET6: | ||
296 | ap6->sin6_port = htons(port); | ||
297 | break; | ||
298 | } | ||
299 | } | ||
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 779d2eb649c5..086a6830d785 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/sunrpc/clnt.h> | 14 | #include <linux/sunrpc/clnt.h> |
15 | #include <linux/sunrpc/sched.h> | 15 | #include <linux/sunrpc/sched.h> |
16 | #include <linux/nfs_fs.h> | 16 | #include <linux/nfs_fs.h> |
17 | #include "internal.h" | ||
17 | 18 | ||
18 | #ifdef RPC_DEBUG | 19 | #ifdef RPC_DEBUG |
19 | # define NFSDBG_FACILITY NFSDBG_MOUNT | 20 | # define NFSDBG_FACILITY NFSDBG_MOUNT |
@@ -98,7 +99,7 @@ out_call_err: | |||
98 | 99 | ||
99 | out_mnt_err: | 100 | out_mnt_err: |
100 | dprintk("NFS: MNT server returned result %d\n", result.status); | 101 | dprintk("NFS: MNT server returned result %d\n", result.status); |
101 | status = -EACCES; | 102 | status = nfs_stat_to_errno(result.status); |
102 | goto out; | 103 | goto out; |
103 | } | 104 | } |
104 | 105 | ||
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 66df08dd1caf..64a288ee046d 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c | |||
@@ -105,7 +105,10 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) | |||
105 | 105 | ||
106 | dprintk("--> nfs_follow_mountpoint()\n"); | 106 | dprintk("--> nfs_follow_mountpoint()\n"); |
107 | 107 | ||
108 | BUG_ON(IS_ROOT(dentry)); | 108 | err = -ESTALE; |
109 | if (IS_ROOT(dentry)) | ||
110 | goto out_err; | ||
111 | |||
109 | dprintk("%s: enter\n", __func__); | 112 | dprintk("%s: enter\n", __func__); |
110 | dput(nd->path.dentry); | 113 | dput(nd->path.dentry); |
111 | nd->path.dentry = dget(dentry); | 114 | nd->path.dentry = dget(dentry); |
@@ -189,7 +192,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, | |||
189 | struct nfs_clone_mount *mountdata) | 192 | struct nfs_clone_mount *mountdata) |
190 | { | 193 | { |
191 | #ifdef CONFIG_NFS_V4 | 194 | #ifdef CONFIG_NFS_V4 |
192 | struct vfsmount *mnt = NULL; | 195 | struct vfsmount *mnt = ERR_PTR(-EINVAL); |
193 | switch (server->nfs_client->rpc_ops->version) { | 196 | switch (server->nfs_client->rpc_ops->version) { |
194 | case 2: | 197 | case 2: |
195 | case 3: | 198 | case 3: |
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 423842f51ac9..cef62557c87d 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c | |||
@@ -229,6 +229,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) | |||
229 | 229 | ||
230 | dprintk("NFS call getacl\n"); | 230 | dprintk("NFS call getacl\n"); |
231 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL]; | 231 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL]; |
232 | nfs_fattr_init(&fattr); | ||
232 | status = rpc_call_sync(server->client_acl, &msg, 0); | 233 | status = rpc_call_sync(server->client_acl, &msg, 0); |
233 | dprintk("NFS reply getacl: %d\n", status); | 234 | dprintk("NFS reply getacl: %d\n", status); |
234 | 235 | ||
@@ -322,6 +323,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, | |||
322 | 323 | ||
323 | dprintk("NFS call setacl\n"); | 324 | dprintk("NFS call setacl\n"); |
324 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; | 325 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; |
326 | nfs_fattr_init(&fattr); | ||
325 | status = rpc_call_sync(server->client_acl, &msg, 0); | 327 | status = rpc_call_sync(server->client_acl, &msg, 0); |
326 | nfs_access_zap_cache(inode); | 328 | nfs_access_zap_cache(inode); |
327 | nfs_zap_acl_cache(inode); | 329 | nfs_zap_acl_cache(inode); |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 1e750e4574a9..c55be7a7679e 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -699,7 +699,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, | |||
699 | } | 699 | } |
700 | 700 | ||
701 | static int | 701 | static int |
702 | nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, | 702 | do_proc_fsinfo(struct rpc_clnt *client, struct nfs_fh *fhandle, |
703 | struct nfs_fsinfo *info) | 703 | struct nfs_fsinfo *info) |
704 | { | 704 | { |
705 | struct rpc_message msg = { | 705 | struct rpc_message msg = { |
@@ -711,11 +711,27 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, | |||
711 | 711 | ||
712 | dprintk("NFS call fsinfo\n"); | 712 | dprintk("NFS call fsinfo\n"); |
713 | nfs_fattr_init(info->fattr); | 713 | nfs_fattr_init(info->fattr); |
714 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | 714 | status = rpc_call_sync(client, &msg, 0); |
715 | dprintk("NFS reply fsinfo: %d\n", status); | 715 | dprintk("NFS reply fsinfo: %d\n", status); |
716 | return status; | 716 | return status; |
717 | } | 717 | } |
718 | 718 | ||
719 | /* | ||
720 | * Bare-bones access to fsinfo: this is for nfs_get_root/nfs_get_sb via | ||
721 | * nfs_create_server | ||
722 | */ | ||
723 | static int | ||
724 | nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, | ||
725 | struct nfs_fsinfo *info) | ||
726 | { | ||
727 | int status; | ||
728 | |||
729 | status = do_proc_fsinfo(server->client, fhandle, info); | ||
730 | if (status && server->nfs_client->cl_rpcclient != server->client) | ||
731 | status = do_proc_fsinfo(server->nfs_client->cl_rpcclient, fhandle, info); | ||
732 | return status; | ||
733 | } | ||
734 | |||
719 | static int | 735 | static int |
720 | nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, | 736 | nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, |
721 | struct nfs_pathconf *info) | 737 | struct nfs_pathconf *info) |
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index b112857301f7..30befc39b3c6 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c | |||
@@ -93,21 +93,52 @@ static int nfs4_validate_fspath(const struct vfsmount *mnt_parent, | |||
93 | return 0; | 93 | return 0; |
94 | } | 94 | } |
95 | 95 | ||
96 | /* | 96 | static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, |
97 | * Check if the string represents a "valid" IPv4 address | 97 | char *page, char *page2, |
98 | */ | 98 | const struct nfs4_fs_location *location) |
99 | static inline int valid_ipaddr4(const char *buf) | ||
100 | { | 99 | { |
101 | int rc, count, in[4]; | 100 | struct vfsmount *mnt = ERR_PTR(-ENOENT); |
102 | 101 | char *mnt_path; | |
103 | rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); | 102 | int page2len; |
104 | if (rc != 4) | 103 | unsigned int s; |
105 | return -EINVAL; | 104 | |
106 | for (count = 0; count < 4; count++) { | 105 | mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); |
107 | if (in[count] > 255) | 106 | if (IS_ERR(mnt_path)) |
108 | return -EINVAL; | 107 | return mnt; |
108 | mountdata->mnt_path = mnt_path; | ||
109 | page2 += strlen(mnt_path) + 1; | ||
110 | page2len = PAGE_SIZE - strlen(mnt_path) - 1; | ||
111 | |||
112 | for (s = 0; s < location->nservers; s++) { | ||
113 | const struct nfs4_string *buf = &location->servers[s]; | ||
114 | struct sockaddr_storage addr; | ||
115 | |||
116 | if (buf->len <= 0 || buf->len >= PAGE_SIZE) | ||
117 | continue; | ||
118 | |||
119 | mountdata->addr = (struct sockaddr *)&addr; | ||
120 | |||
121 | if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len)) | ||
122 | continue; | ||
123 | nfs_parse_ip_address(buf->data, buf->len, | ||
124 | mountdata->addr, &mountdata->addrlen); | ||
125 | if (mountdata->addr->sa_family == AF_UNSPEC) | ||
126 | continue; | ||
127 | nfs_set_port(mountdata->addr, NFS_PORT); | ||
128 | |||
129 | strncpy(page2, buf->data, page2len); | ||
130 | page2[page2len] = '\0'; | ||
131 | mountdata->hostname = page2; | ||
132 | |||
133 | snprintf(page, PAGE_SIZE, "%s:%s", | ||
134 | mountdata->hostname, | ||
135 | mountdata->mnt_path); | ||
136 | |||
137 | mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata); | ||
138 | if (!IS_ERR(mnt)) | ||
139 | break; | ||
109 | } | 140 | } |
110 | return 0; | 141 | return mnt; |
111 | } | 142 | } |
112 | 143 | ||
113 | /** | 144 | /** |
@@ -128,7 +159,6 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, | |||
128 | .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, | 159 | .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, |
129 | }; | 160 | }; |
130 | char *page = NULL, *page2 = NULL; | 161 | char *page = NULL, *page2 = NULL; |
131 | unsigned int s; | ||
132 | int loc, error; | 162 | int loc, error; |
133 | 163 | ||
134 | if (locations == NULL || locations->nlocations <= 0) | 164 | if (locations == NULL || locations->nlocations <= 0) |
@@ -152,53 +182,16 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, | |||
152 | goto out; | 182 | goto out; |
153 | } | 183 | } |
154 | 184 | ||
155 | loc = 0; | 185 | for (loc = 0; loc < locations->nlocations; loc++) { |
156 | while (loc < locations->nlocations && IS_ERR(mnt)) { | ||
157 | const struct nfs4_fs_location *location = &locations->locations[loc]; | 186 | const struct nfs4_fs_location *location = &locations->locations[loc]; |
158 | char *mnt_path; | ||
159 | 187 | ||
160 | if (location == NULL || location->nservers <= 0 || | 188 | if (location == NULL || location->nservers <= 0 || |
161 | location->rootpath.ncomponents == 0) { | 189 | location->rootpath.ncomponents == 0) |
162 | loc++; | ||
163 | continue; | 190 | continue; |
164 | } | ||
165 | 191 | ||
166 | mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); | 192 | mnt = try_location(&mountdata, page, page2, location); |
167 | if (IS_ERR(mnt_path)) { | 193 | if (!IS_ERR(mnt)) |
168 | loc++; | 194 | break; |
169 | continue; | ||
170 | } | ||
171 | mountdata.mnt_path = mnt_path; | ||
172 | |||
173 | s = 0; | ||
174 | while (s < location->nservers) { | ||
175 | struct sockaddr_in addr = { | ||
176 | .sin_family = AF_INET, | ||
177 | .sin_port = htons(NFS_PORT), | ||
178 | }; | ||
179 | |||
180 | if (location->servers[s].len <= 0 || | ||
181 | valid_ipaddr4(location->servers[s].data) < 0) { | ||
182 | s++; | ||
183 | continue; | ||
184 | } | ||
185 | |||
186 | mountdata.hostname = location->servers[s].data; | ||
187 | addr.sin_addr.s_addr = in_aton(mountdata.hostname), | ||
188 | mountdata.addr = (struct sockaddr *)&addr; | ||
189 | mountdata.addrlen = sizeof(addr); | ||
190 | |||
191 | snprintf(page, PAGE_SIZE, "%s:%s", | ||
192 | mountdata.hostname, | ||
193 | mountdata.mnt_path); | ||
194 | |||
195 | mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, &mountdata); | ||
196 | if (!IS_ERR(mnt)) { | ||
197 | break; | ||
198 | } | ||
199 | s++; | ||
200 | } | ||
201 | loc++; | ||
202 | } | 195 | } |
203 | 196 | ||
204 | out: | 197 | out: |
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 46763d1cd397..8478fc25daee 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c | |||
@@ -127,7 +127,7 @@ enum { | |||
127 | Opt_err | 127 | Opt_err |
128 | }; | 128 | }; |
129 | 129 | ||
130 | static match_table_t __initdata tokens = { | 130 | static match_table_t __initconst tokens = { |
131 | {Opt_port, "port=%u"}, | 131 | {Opt_port, "port=%u"}, |
132 | {Opt_rsize, "rsize=%u"}, | 132 | {Opt_rsize, "rsize=%u"}, |
133 | {Opt_wsize, "wsize=%u"}, | 133 | {Opt_wsize, "wsize=%u"}, |
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 4dbb84df1b68..193465210d7c 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
@@ -65,14 +65,20 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, | |||
65 | 65 | ||
66 | dprintk("%s: call getattr\n", __func__); | 66 | dprintk("%s: call getattr\n", __func__); |
67 | nfs_fattr_init(fattr); | 67 | nfs_fattr_init(fattr); |
68 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | 68 | status = rpc_call_sync(server->client, &msg, 0); |
69 | /* Retry with default authentication if different */ | ||
70 | if (status && server->nfs_client->cl_rpcclient != server->client) | ||
71 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | ||
69 | dprintk("%s: reply getattr: %d\n", __func__, status); | 72 | dprintk("%s: reply getattr: %d\n", __func__, status); |
70 | if (status) | 73 | if (status) |
71 | return status; | 74 | return status; |
72 | dprintk("%s: call statfs\n", __func__); | 75 | dprintk("%s: call statfs\n", __func__); |
73 | msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; | 76 | msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; |
74 | msg.rpc_resp = &fsinfo; | 77 | msg.rpc_resp = &fsinfo; |
75 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | 78 | status = rpc_call_sync(server->client, &msg, 0); |
79 | /* Retry with default authentication if different */ | ||
80 | if (status && server->nfs_client->cl_rpcclient != server->client) | ||
81 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | ||
76 | dprintk("%s: reply statfs: %d\n", __func__, status); | 82 | dprintk("%s: reply statfs: %d\n", __func__, status); |
77 | if (status) | 83 | if (status) |
78 | return status; | 84 | return status; |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 9abcd2b329f7..8b28b95c9e44 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -91,6 +91,7 @@ enum { | |||
91 | /* Mount options that take string arguments */ | 91 | /* Mount options that take string arguments */ |
92 | Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, | 92 | Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, |
93 | Opt_addr, Opt_mountaddr, Opt_clientaddr, | 93 | Opt_addr, Opt_mountaddr, Opt_clientaddr, |
94 | Opt_lookupcache, | ||
94 | 95 | ||
95 | /* Special mount options */ | 96 | /* Special mount options */ |
96 | Opt_userspace, Opt_deprecated, Opt_sloppy, | 97 | Opt_userspace, Opt_deprecated, Opt_sloppy, |
@@ -98,7 +99,7 @@ enum { | |||
98 | Opt_err | 99 | Opt_err |
99 | }; | 100 | }; |
100 | 101 | ||
101 | static match_table_t nfs_mount_option_tokens = { | 102 | static const match_table_t nfs_mount_option_tokens = { |
102 | { Opt_userspace, "bg" }, | 103 | { Opt_userspace, "bg" }, |
103 | { Opt_userspace, "fg" }, | 104 | { Opt_userspace, "fg" }, |
104 | { Opt_userspace, "retry=%s" }, | 105 | { Opt_userspace, "retry=%s" }, |
@@ -154,6 +155,8 @@ static match_table_t nfs_mount_option_tokens = { | |||
154 | { Opt_mounthost, "mounthost=%s" }, | 155 | { Opt_mounthost, "mounthost=%s" }, |
155 | { Opt_mountaddr, "mountaddr=%s" }, | 156 | { Opt_mountaddr, "mountaddr=%s" }, |
156 | 157 | ||
158 | { Opt_lookupcache, "lookupcache=%s" }, | ||
159 | |||
157 | { Opt_err, NULL } | 160 | { Opt_err, NULL } |
158 | }; | 161 | }; |
159 | 162 | ||
@@ -163,7 +166,7 @@ enum { | |||
163 | Opt_xprt_err | 166 | Opt_xprt_err |
164 | }; | 167 | }; |
165 | 168 | ||
166 | static match_table_t nfs_xprt_protocol_tokens = { | 169 | static const match_table_t nfs_xprt_protocol_tokens = { |
167 | { Opt_xprt_udp, "udp" }, | 170 | { Opt_xprt_udp, "udp" }, |
168 | { Opt_xprt_tcp, "tcp" }, | 171 | { Opt_xprt_tcp, "tcp" }, |
169 | { Opt_xprt_rdma, "rdma" }, | 172 | { Opt_xprt_rdma, "rdma" }, |
@@ -180,7 +183,7 @@ enum { | |||
180 | Opt_sec_err | 183 | Opt_sec_err |
181 | }; | 184 | }; |
182 | 185 | ||
183 | static match_table_t nfs_secflavor_tokens = { | 186 | static const match_table_t nfs_secflavor_tokens = { |
184 | { Opt_sec_none, "none" }, | 187 | { Opt_sec_none, "none" }, |
185 | { Opt_sec_none, "null" }, | 188 | { Opt_sec_none, "null" }, |
186 | { Opt_sec_sys, "sys" }, | 189 | { Opt_sec_sys, "sys" }, |
@@ -200,6 +203,22 @@ static match_table_t nfs_secflavor_tokens = { | |||
200 | { Opt_sec_err, NULL } | 203 | { Opt_sec_err, NULL } |
201 | }; | 204 | }; |
202 | 205 | ||
206 | enum { | ||
207 | Opt_lookupcache_all, Opt_lookupcache_positive, | ||
208 | Opt_lookupcache_none, | ||
209 | |||
210 | Opt_lookupcache_err | ||
211 | }; | ||
212 | |||
213 | static match_table_t nfs_lookupcache_tokens = { | ||
214 | { Opt_lookupcache_all, "all" }, | ||
215 | { Opt_lookupcache_positive, "pos" }, | ||
216 | { Opt_lookupcache_positive, "positive" }, | ||
217 | { Opt_lookupcache_none, "none" }, | ||
218 | |||
219 | { Opt_lookupcache_err, NULL } | ||
220 | }; | ||
221 | |||
203 | 222 | ||
204 | static void nfs_umount_begin(struct super_block *); | 223 | static void nfs_umount_begin(struct super_block *); |
205 | static int nfs_statfs(struct dentry *, struct kstatfs *); | 224 | static int nfs_statfs(struct dentry *, struct kstatfs *); |
@@ -209,7 +228,6 @@ static int nfs_get_sb(struct file_system_type *, int, const char *, void *, stru | |||
209 | static int nfs_xdev_get_sb(struct file_system_type *fs_type, | 228 | static int nfs_xdev_get_sb(struct file_system_type *fs_type, |
210 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); | 229 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); |
211 | static void nfs_kill_super(struct super_block *); | 230 | static void nfs_kill_super(struct super_block *); |
212 | static void nfs_put_super(struct super_block *); | ||
213 | static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); | 231 | static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); |
214 | 232 | ||
215 | static struct file_system_type nfs_fs_type = { | 233 | static struct file_system_type nfs_fs_type = { |
@@ -232,7 +250,6 @@ static const struct super_operations nfs_sops = { | |||
232 | .alloc_inode = nfs_alloc_inode, | 250 | .alloc_inode = nfs_alloc_inode, |
233 | .destroy_inode = nfs_destroy_inode, | 251 | .destroy_inode = nfs_destroy_inode, |
234 | .write_inode = nfs_write_inode, | 252 | .write_inode = nfs_write_inode, |
235 | .put_super = nfs_put_super, | ||
236 | .statfs = nfs_statfs, | 253 | .statfs = nfs_statfs, |
237 | .clear_inode = nfs_clear_inode, | 254 | .clear_inode = nfs_clear_inode, |
238 | .umount_begin = nfs_umount_begin, | 255 | .umount_begin = nfs_umount_begin, |
@@ -337,26 +354,20 @@ void __exit unregister_nfs_fs(void) | |||
337 | unregister_filesystem(&nfs_fs_type); | 354 | unregister_filesystem(&nfs_fs_type); |
338 | } | 355 | } |
339 | 356 | ||
340 | void nfs_sb_active(struct nfs_server *server) | 357 | void nfs_sb_active(struct super_block *sb) |
341 | { | 358 | { |
342 | atomic_inc(&server->active); | 359 | struct nfs_server *server = NFS_SB(sb); |
343 | } | ||
344 | 360 | ||
345 | void nfs_sb_deactive(struct nfs_server *server) | 361 | if (atomic_inc_return(&server->active) == 1) |
346 | { | 362 | atomic_inc(&sb->s_active); |
347 | if (atomic_dec_and_test(&server->active)) | ||
348 | wake_up(&server->active_wq); | ||
349 | } | 363 | } |
350 | 364 | ||
351 | static void nfs_put_super(struct super_block *sb) | 365 | void nfs_sb_deactive(struct super_block *sb) |
352 | { | 366 | { |
353 | struct nfs_server *server = NFS_SB(sb); | 367 | struct nfs_server *server = NFS_SB(sb); |
354 | /* | 368 | |
355 | * Make sure there are no outstanding ops to this server. | 369 | if (atomic_dec_and_test(&server->active)) |
356 | * If so, wait for them to finish before allowing the | 370 | deactivate_super(sb); |
357 | * unmount to continue. | ||
358 | */ | ||
359 | wait_event(server->active_wq, atomic_read(&server->active) == 0); | ||
360 | } | 371 | } |
361 | 372 | ||
362 | /* | 373 | /* |
@@ -664,25 +675,6 @@ static void nfs_umount_begin(struct super_block *sb) | |||
664 | } | 675 | } |
665 | 676 | ||
666 | /* | 677 | /* |
667 | * Set the port number in an address. Be agnostic about the address family. | ||
668 | */ | ||
669 | static void nfs_set_port(struct sockaddr *sap, unsigned short port) | ||
670 | { | ||
671 | switch (sap->sa_family) { | ||
672 | case AF_INET: { | ||
673 | struct sockaddr_in *ap = (struct sockaddr_in *)sap; | ||
674 | ap->sin_port = htons(port); | ||
675 | break; | ||
676 | } | ||
677 | case AF_INET6: { | ||
678 | struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; | ||
679 | ap->sin6_port = htons(port); | ||
680 | break; | ||
681 | } | ||
682 | } | ||
683 | } | ||
684 | |||
685 | /* | ||
686 | * Sanity-check a server address provided by the mount command. | 678 | * Sanity-check a server address provided by the mount command. |
687 | * | 679 | * |
688 | * Address family must be initialized, and address must not be | 680 | * Address family must be initialized, and address must not be |
@@ -724,20 +716,22 @@ static void nfs_parse_ipv4_address(char *string, size_t str_len, | |||
724 | *addr_len = 0; | 716 | *addr_len = 0; |
725 | } | 717 | } |
726 | 718 | ||
727 | #define IPV6_SCOPE_DELIMITER '%' | ||
728 | |||
729 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 719 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
730 | static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, | 720 | static int nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, |
731 | const char *delim, | 721 | const char *delim, |
732 | struct sockaddr_in6 *sin6) | 722 | struct sockaddr_in6 *sin6) |
733 | { | 723 | { |
734 | char *p; | 724 | char *p; |
735 | size_t len; | 725 | size_t len; |
736 | 726 | ||
737 | if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) | 727 | if ((string + str_len) == delim) |
738 | return ; | 728 | return 1; |
729 | |||
739 | if (*delim != IPV6_SCOPE_DELIMITER) | 730 | if (*delim != IPV6_SCOPE_DELIMITER) |
740 | return; | 731 | return 0; |
732 | |||
733 | if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) | ||
734 | return 0; | ||
741 | 735 | ||
742 | len = (string + str_len) - delim - 1; | 736 | len = (string + str_len) - delim - 1; |
743 | p = kstrndup(delim + 1, len, GFP_KERNEL); | 737 | p = kstrndup(delim + 1, len, GFP_KERNEL); |
@@ -750,14 +744,20 @@ static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, | |||
750 | scope_id = dev->ifindex; | 744 | scope_id = dev->ifindex; |
751 | dev_put(dev); | 745 | dev_put(dev); |
752 | } else { | 746 | } else { |
753 | /* scope_id is set to zero on error */ | 747 | if (strict_strtoul(p, 10, &scope_id) == 0) { |
754 | strict_strtoul(p, 10, &scope_id); | 748 | kfree(p); |
749 | return 0; | ||
750 | } | ||
755 | } | 751 | } |
756 | 752 | ||
757 | kfree(p); | 753 | kfree(p); |
754 | |||
758 | sin6->sin6_scope_id = scope_id; | 755 | sin6->sin6_scope_id = scope_id; |
759 | dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id); | 756 | dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id); |
757 | return 1; | ||
760 | } | 758 | } |
759 | |||
760 | return 0; | ||
761 | } | 761 | } |
762 | 762 | ||
763 | static void nfs_parse_ipv6_address(char *string, size_t str_len, | 763 | static void nfs_parse_ipv6_address(char *string, size_t str_len, |
@@ -773,9 +773,11 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len, | |||
773 | 773 | ||
774 | sin6->sin6_family = AF_INET6; | 774 | sin6->sin6_family = AF_INET6; |
775 | *addr_len = sizeof(*sin6); | 775 | *addr_len = sizeof(*sin6); |
776 | if (in6_pton(string, str_len, addr, IPV6_SCOPE_DELIMITER, &delim)) { | 776 | if (in6_pton(string, str_len, addr, |
777 | nfs_parse_ipv6_scope_id(string, str_len, delim, sin6); | 777 | IPV6_SCOPE_DELIMITER, &delim) != 0) { |
778 | return; | 778 | if (nfs_parse_ipv6_scope_id(string, str_len, |
779 | delim, sin6) != 0) | ||
780 | return; | ||
779 | } | 781 | } |
780 | } | 782 | } |
781 | 783 | ||
@@ -798,7 +800,7 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len, | |||
798 | * If there is a problem constructing the new sockaddr, set the address | 800 | * If there is a problem constructing the new sockaddr, set the address |
799 | * family to AF_UNSPEC. | 801 | * family to AF_UNSPEC. |
800 | */ | 802 | */ |
801 | static void nfs_parse_ip_address(char *string, size_t str_len, | 803 | void nfs_parse_ip_address(char *string, size_t str_len, |
802 | struct sockaddr *sap, size_t *addr_len) | 804 | struct sockaddr *sap, size_t *addr_len) |
803 | { | 805 | { |
804 | unsigned int i, colons; | 806 | unsigned int i, colons; |
@@ -1258,6 +1260,30 @@ static int nfs_parse_mount_options(char *raw, | |||
1258 | &mnt->mount_server.addrlen); | 1260 | &mnt->mount_server.addrlen); |
1259 | kfree(string); | 1261 | kfree(string); |
1260 | break; | 1262 | break; |
1263 | case Opt_lookupcache: | ||
1264 | string = match_strdup(args); | ||
1265 | if (string == NULL) | ||
1266 | goto out_nomem; | ||
1267 | token = match_token(string, | ||
1268 | nfs_lookupcache_tokens, args); | ||
1269 | kfree(string); | ||
1270 | switch (token) { | ||
1271 | case Opt_lookupcache_all: | ||
1272 | mnt->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); | ||
1273 | break; | ||
1274 | case Opt_lookupcache_positive: | ||
1275 | mnt->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; | ||
1276 | mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; | ||
1277 | break; | ||
1278 | case Opt_lookupcache_none: | ||
1279 | mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; | ||
1280 | break; | ||
1281 | default: | ||
1282 | errors++; | ||
1283 | dfprintk(MOUNT, "NFS: invalid " | ||
1284 | "lookupcache argument\n"); | ||
1285 | }; | ||
1286 | break; | ||
1261 | 1287 | ||
1262 | /* | 1288 | /* |
1263 | * Special options | 1289 | * Special options |
@@ -1279,6 +1305,12 @@ static int nfs_parse_mount_options(char *raw, | |||
1279 | } | 1305 | } |
1280 | } | 1306 | } |
1281 | 1307 | ||
1308 | if (errors > 0) { | ||
1309 | dfprintk(MOUNT, "NFS: parsing encountered %d error%s\n", | ||
1310 | errors, (errors == 1 ? "" : "s")); | ||
1311 | if (!sloppy) | ||
1312 | return 0; | ||
1313 | } | ||
1282 | return 1; | 1314 | return 1; |
1283 | 1315 | ||
1284 | out_nomem: | 1316 | out_nomem: |
@@ -1552,7 +1584,7 @@ static int nfs_validate_mount_data(void *options, | |||
1552 | * Translate to nfs_parsed_mount_data, which nfs_fill_super | 1584 | * Translate to nfs_parsed_mount_data, which nfs_fill_super |
1553 | * can deal with. | 1585 | * can deal with. |
1554 | */ | 1586 | */ |
1555 | args->flags = data->flags; | 1587 | args->flags = data->flags & NFS_MOUNT_FLAGMASK; |
1556 | args->rsize = data->rsize; | 1588 | args->rsize = data->rsize; |
1557 | args->wsize = data->wsize; | 1589 | args->wsize = data->wsize; |
1558 | args->timeo = data->timeo; | 1590 | args->timeo = data->timeo; |
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index f089e5839d7d..ecc295347775 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c | |||
@@ -99,7 +99,7 @@ static void nfs_async_unlink_release(void *calldata) | |||
99 | 99 | ||
100 | nfs_dec_sillycount(data->dir); | 100 | nfs_dec_sillycount(data->dir); |
101 | nfs_free_unlinkdata(data); | 101 | nfs_free_unlinkdata(data); |
102 | nfs_sb_deactive(NFS_SB(sb)); | 102 | nfs_sb_deactive(sb); |
103 | } | 103 | } |
104 | 104 | ||
105 | static const struct rpc_call_ops nfs_unlink_ops = { | 105 | static const struct rpc_call_ops nfs_unlink_ops = { |
@@ -118,6 +118,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n | |||
118 | .rpc_message = &msg, | 118 | .rpc_message = &msg, |
119 | .callback_ops = &nfs_unlink_ops, | 119 | .callback_ops = &nfs_unlink_ops, |
120 | .callback_data = data, | 120 | .callback_data = data, |
121 | .workqueue = nfsiod_workqueue, | ||
121 | .flags = RPC_TASK_ASYNC, | 122 | .flags = RPC_TASK_ASYNC, |
122 | }; | 123 | }; |
123 | struct rpc_task *task; | 124 | struct rpc_task *task; |
@@ -149,7 +150,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n | |||
149 | nfs_dec_sillycount(dir); | 150 | nfs_dec_sillycount(dir); |
150 | return 0; | 151 | return 0; |
151 | } | 152 | } |
152 | nfs_sb_active(NFS_SERVER(dir)); | 153 | nfs_sb_active(dir->i_sb); |
153 | data->args.fh = NFS_FH(dir); | 154 | data->args.fh = NFS_FH(dir); |
154 | nfs_fattr_init(&data->res.dir_attr); | 155 | nfs_fattr_init(&data->res.dir_attr); |
155 | 156 | ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3229e217c773..9f9845859fc1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -1427,8 +1427,9 @@ static int nfs_write_mapping(struct address_space *mapping, int how) | |||
1427 | .bdi = mapping->backing_dev_info, | 1427 | .bdi = mapping->backing_dev_info, |
1428 | .sync_mode = WB_SYNC_NONE, | 1428 | .sync_mode = WB_SYNC_NONE, |
1429 | .nr_to_write = LONG_MAX, | 1429 | .nr_to_write = LONG_MAX, |
1430 | .range_start = 0, | ||
1431 | .range_end = LLONG_MAX, | ||
1430 | .for_writepages = 1, | 1432 | .for_writepages = 1, |
1431 | .range_cyclic = 1, | ||
1432 | }; | 1433 | }; |
1433 | int ret; | 1434 | int ret; |
1434 | 1435 | ||
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 15c6faeec77c..b2786a5f9afe 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c | |||
@@ -70,7 +70,6 @@ nlm_fclose(struct file *filp) | |||
70 | static struct nlmsvc_binding nfsd_nlm_ops = { | 70 | static struct nlmsvc_binding nfsd_nlm_ops = { |
71 | .fopen = nlm_fopen, /* open file for locking */ | 71 | .fopen = nlm_fopen, /* open file for locking */ |
72 | .fclose = nlm_fclose, /* close file */ | 72 | .fclose = nlm_fclose, /* close file */ |
73 | .get_grace_period = get_nfs4_grace_period, | ||
74 | }; | 73 | }; |
75 | 74 | ||
76 | void | 75 | void |
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 4d617ea28cfc..9dbd2eb91281 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c | |||
@@ -63,7 +63,8 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, | |||
63 | SVCFH_fmt(&argp->fh)); | 63 | SVCFH_fmt(&argp->fh)); |
64 | 64 | ||
65 | fh_copy(&resp->fh, &argp->fh); | 65 | fh_copy(&resp->fh, &argp->fh); |
66 | nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); | 66 | nfserr = fh_verify(rqstp, &resp->fh, 0, |
67 | NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); | ||
67 | if (nfserr) | 68 | if (nfserr) |
68 | RETURN_STATUS(nfserr); | 69 | RETURN_STATUS(nfserr); |
69 | 70 | ||
@@ -530,7 +531,7 @@ nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, | |||
530 | dprintk("nfsd: FSSTAT(3) %s\n", | 531 | dprintk("nfsd: FSSTAT(3) %s\n", |
531 | SVCFH_fmt(&argp->fh)); | 532 | SVCFH_fmt(&argp->fh)); |
532 | 533 | ||
533 | nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats); | 534 | nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0); |
534 | fh_put(&argp->fh); | 535 | fh_put(&argp->fh); |
535 | RETURN_STATUS(nfserr); | 536 | RETURN_STATUS(nfserr); |
536 | } | 537 | } |
@@ -558,7 +559,8 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, | |||
558 | resp->f_maxfilesize = ~(u32) 0; | 559 | resp->f_maxfilesize = ~(u32) 0; |
559 | resp->f_properties = NFS3_FSF_DEFAULT; | 560 | resp->f_properties = NFS3_FSF_DEFAULT; |
560 | 561 | ||
561 | nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP); | 562 | nfserr = fh_verify(rqstp, &argp->fh, 0, |
563 | NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); | ||
562 | 564 | ||
563 | /* Check special features of the file system. May request | 565 | /* Check special features of the file system. May request |
564 | * different read/write sizes for file systems known to have | 566 | * different read/write sizes for file systems known to have |
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index b6ed38380ab8..54b8b4140c8f 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c | |||
@@ -443,7 +443,7 @@ init_state(struct posix_acl_state *state, int cnt) | |||
443 | * enough space for either: | 443 | * enough space for either: |
444 | */ | 444 | */ |
445 | alloc = sizeof(struct posix_ace_state_array) | 445 | alloc = sizeof(struct posix_ace_state_array) |
446 | + cnt*sizeof(struct posix_ace_state); | 446 | + cnt*sizeof(struct posix_user_ace_state); |
447 | state->users = kzalloc(alloc, GFP_KERNEL); | 447 | state->users = kzalloc(alloc, GFP_KERNEL); |
448 | if (!state->users) | 448 | if (!state->users) |
449 | return -ENOMEM; | 449 | return -ENOMEM; |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 702fa577aa6e..094747a1227c 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -225,7 +225,8 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) | |||
225 | 225 | ||
226 | RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); | 226 | RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); |
227 | WRITE32(OP_CB_RECALL); | 227 | WRITE32(OP_CB_RECALL); |
228 | WRITEMEM(&cb_rec->cbr_stateid, sizeof(stateid_t)); | 228 | WRITE32(cb_rec->cbr_stateid.si_generation); |
229 | WRITEMEM(&cb_rec->cbr_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
229 | WRITE32(cb_rec->cbr_trunc); | 230 | WRITE32(cb_rec->cbr_trunc); |
230 | WRITE32(len); | 231 | WRITE32(len); |
231 | WRITEMEM(cb_rec->cbr_fhval, len); | 232 | WRITEMEM(cb_rec->cbr_fhval, len); |
@@ -379,6 +380,7 @@ static int do_probe_callback(void *data) | |||
379 | .addrsize = sizeof(addr), | 380 | .addrsize = sizeof(addr), |
380 | .timeout = &timeparms, | 381 | .timeout = &timeparms, |
381 | .program = &cb_program, | 382 | .program = &cb_program, |
383 | .prognumber = cb->cb_prog, | ||
382 | .version = nfs_cb_version[1]->number, | 384 | .version = nfs_cb_version[1]->number, |
383 | .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ | 385 | .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ |
384 | .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), | 386 | .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), |
@@ -396,9 +398,6 @@ static int do_probe_callback(void *data) | |||
396 | addr.sin_port = htons(cb->cb_port); | 398 | addr.sin_port = htons(cb->cb_port); |
397 | addr.sin_addr.s_addr = htonl(cb->cb_addr); | 399 | addr.sin_addr.s_addr = htonl(cb->cb_addr); |
398 | 400 | ||
399 | /* Initialize rpc_stat */ | ||
400 | memset(args.program->stats, 0, sizeof(struct rpc_stat)); | ||
401 | |||
402 | /* Create RPC client */ | 401 | /* Create RPC client */ |
403 | client = rpc_create(&args); | 402 | client = rpc_create(&args); |
404 | if (IS_ERR(client)) { | 403 | if (IS_ERR(client)) { |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 2e51adac65de..669461e291ae 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -201,10 +201,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
201 | /* Openowner is now set, so sequence id will get bumped. Now we need | 201 | /* Openowner is now set, so sequence id will get bumped. Now we need |
202 | * these checks before we do any creates: */ | 202 | * these checks before we do any creates: */ |
203 | status = nfserr_grace; | 203 | status = nfserr_grace; |
204 | if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) | 204 | if (locks_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) |
205 | goto out; | 205 | goto out; |
206 | status = nfserr_no_grace; | 206 | status = nfserr_no_grace; |
207 | if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) | 207 | if (!locks_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) |
208 | goto out; | 208 | goto out; |
209 | 209 | ||
210 | switch (open->op_claim_type) { | 210 | switch (open->op_claim_type) { |
@@ -575,7 +575,7 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
575 | { | 575 | { |
576 | __be32 status; | 576 | __be32 status; |
577 | 577 | ||
578 | if (nfs4_in_grace()) | 578 | if (locks_in_grace()) |
579 | return nfserr_grace; | 579 | return nfserr_grace; |
580 | status = nfsd_unlink(rqstp, &cstate->current_fh, 0, | 580 | status = nfsd_unlink(rqstp, &cstate->current_fh, 0, |
581 | remove->rm_name, remove->rm_namelen); | 581 | remove->rm_name, remove->rm_namelen); |
@@ -596,7 +596,7 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
596 | 596 | ||
597 | if (!cstate->save_fh.fh_dentry) | 597 | if (!cstate->save_fh.fh_dentry) |
598 | return status; | 598 | return status; |
599 | if (nfs4_in_grace() && !(cstate->save_fh.fh_export->ex_flags | 599 | if (locks_in_grace() && !(cstate->save_fh.fh_export->ex_flags |
600 | & NFSEXP_NOSUBTREECHECK)) | 600 | & NFSEXP_NOSUBTREECHECK)) |
601 | return nfserr_grace; | 601 | return nfserr_grace; |
602 | status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, | 602 | status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, |
@@ -867,11 +867,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
867 | int slack_bytes; | 867 | int slack_bytes; |
868 | __be32 status; | 868 | __be32 status; |
869 | 869 | ||
870 | status = nfserr_resource; | ||
871 | cstate = cstate_alloc(); | ||
872 | if (cstate == NULL) | ||
873 | goto out; | ||
874 | |||
875 | resp->xbuf = &rqstp->rq_res; | 870 | resp->xbuf = &rqstp->rq_res; |
876 | resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; | 871 | resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; |
877 | resp->tagp = resp->p; | 872 | resp->tagp = resp->p; |
@@ -890,6 +885,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
890 | if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) | 885 | if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) |
891 | goto out; | 886 | goto out; |
892 | 887 | ||
888 | status = nfserr_resource; | ||
889 | cstate = cstate_alloc(); | ||
890 | if (cstate == NULL) | ||
891 | goto out; | ||
892 | |||
893 | status = nfs_ok; | 893 | status = nfs_ok; |
894 | while (!status && resp->opcnt < args->opcnt) { | 894 | while (!status && resp->opcnt < args->opcnt) { |
895 | op = &args->ops[resp->opcnt++]; | 895 | op = &args->ops[resp->opcnt++]; |
@@ -957,9 +957,9 @@ encode_op: | |||
957 | nfsd4_increment_op_stats(op->opnum); | 957 | nfsd4_increment_op_stats(op->opnum); |
958 | } | 958 | } |
959 | 959 | ||
960 | cstate_free(cstate); | ||
960 | out: | 961 | out: |
961 | nfsd4_release_compoundargs(args); | 962 | nfsd4_release_compoundargs(args); |
962 | cstate_free(cstate); | ||
963 | dprintk("nfsv4 compound returned %d\n", ntohl(status)); | 963 | dprintk("nfsv4 compound returned %d\n", ntohl(status)); |
964 | return status; | 964 | return status; |
965 | } | 965 | } |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1578d7a2667e..0cc7ff5d5ab5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -61,7 +61,6 @@ | |||
61 | static time_t lease_time = 90; /* default lease time */ | 61 | static time_t lease_time = 90; /* default lease time */ |
62 | static time_t user_lease_time = 90; | 62 | static time_t user_lease_time = 90; |
63 | static time_t boot_time; | 63 | static time_t boot_time; |
64 | static int in_grace = 1; | ||
65 | static u32 current_ownerid = 1; | 64 | static u32 current_ownerid = 1; |
66 | static u32 current_fileid = 1; | 65 | static u32 current_fileid = 1; |
67 | static u32 current_delegid = 1; | 66 | static u32 current_delegid = 1; |
@@ -1640,7 +1639,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta | |||
1640 | case NFS4_OPEN_CLAIM_NULL: | 1639 | case NFS4_OPEN_CLAIM_NULL: |
1641 | /* Let's not give out any delegations till everyone's | 1640 | /* Let's not give out any delegations till everyone's |
1642 | * had the chance to reclaim theirs.... */ | 1641 | * had the chance to reclaim theirs.... */ |
1643 | if (nfs4_in_grace()) | 1642 | if (locks_in_grace()) |
1644 | goto out; | 1643 | goto out; |
1645 | if (!atomic_read(&cb->cb_set) || !sop->so_confirmed) | 1644 | if (!atomic_read(&cb->cb_set) || !sop->so_confirmed) |
1646 | goto out; | 1645 | goto out; |
@@ -1816,12 +1815,15 @@ out: | |||
1816 | return status; | 1815 | return status; |
1817 | } | 1816 | } |
1818 | 1817 | ||
1818 | struct lock_manager nfsd4_manager = { | ||
1819 | }; | ||
1820 | |||
1819 | static void | 1821 | static void |
1820 | end_grace(void) | 1822 | nfsd4_end_grace(void) |
1821 | { | 1823 | { |
1822 | dprintk("NFSD: end of grace period\n"); | 1824 | dprintk("NFSD: end of grace period\n"); |
1823 | nfsd4_recdir_purge_old(); | 1825 | nfsd4_recdir_purge_old(); |
1824 | in_grace = 0; | 1826 | locks_end_grace(&nfsd4_manager); |
1825 | } | 1827 | } |
1826 | 1828 | ||
1827 | static time_t | 1829 | static time_t |
@@ -1838,8 +1840,8 @@ nfs4_laundromat(void) | |||
1838 | nfs4_lock_state(); | 1840 | nfs4_lock_state(); |
1839 | 1841 | ||
1840 | dprintk("NFSD: laundromat service - starting\n"); | 1842 | dprintk("NFSD: laundromat service - starting\n"); |
1841 | if (in_grace) | 1843 | if (locks_in_grace()) |
1842 | end_grace(); | 1844 | nfsd4_end_grace(); |
1843 | list_for_each_safe(pos, next, &client_lru) { | 1845 | list_for_each_safe(pos, next, &client_lru) { |
1844 | clp = list_entry(pos, struct nfs4_client, cl_lru); | 1846 | clp = list_entry(pos, struct nfs4_client, cl_lru); |
1845 | if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { | 1847 | if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { |
@@ -1974,7 +1976,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) | |||
1974 | return nfserr_bad_stateid; | 1976 | return nfserr_bad_stateid; |
1975 | else if (ONE_STATEID(stateid) && (flags & RD_STATE)) | 1977 | else if (ONE_STATEID(stateid) && (flags & RD_STATE)) |
1976 | return nfs_ok; | 1978 | return nfs_ok; |
1977 | else if (nfs4_in_grace()) { | 1979 | else if (locks_in_grace()) { |
1978 | /* Answer in remaining cases depends on existance of | 1980 | /* Answer in remaining cases depends on existance of |
1979 | * conflicting state; so we must wait out the grace period. */ | 1981 | * conflicting state; so we must wait out the grace period. */ |
1980 | return nfserr_grace; | 1982 | return nfserr_grace; |
@@ -1993,7 +1995,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) | |||
1993 | static inline int | 1995 | static inline int |
1994 | io_during_grace_disallowed(struct inode *inode, int flags) | 1996 | io_during_grace_disallowed(struct inode *inode, int flags) |
1995 | { | 1997 | { |
1996 | return nfs4_in_grace() && (flags & (RD_STATE | WR_STATE)) | 1998 | return locks_in_grace() && (flags & (RD_STATE | WR_STATE)) |
1997 | && mandatory_lock(inode); | 1999 | && mandatory_lock(inode); |
1998 | } | 2000 | } |
1999 | 2001 | ||
@@ -2693,10 +2695,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2693 | filp = lock_stp->st_vfs_file; | 2695 | filp = lock_stp->st_vfs_file; |
2694 | 2696 | ||
2695 | status = nfserr_grace; | 2697 | status = nfserr_grace; |
2696 | if (nfs4_in_grace() && !lock->lk_reclaim) | 2698 | if (locks_in_grace() && !lock->lk_reclaim) |
2697 | goto out; | 2699 | goto out; |
2698 | status = nfserr_no_grace; | 2700 | status = nfserr_no_grace; |
2699 | if (!nfs4_in_grace() && lock->lk_reclaim) | 2701 | if (!locks_in_grace() && lock->lk_reclaim) |
2700 | goto out; | 2702 | goto out; |
2701 | 2703 | ||
2702 | locks_init_lock(&file_lock); | 2704 | locks_init_lock(&file_lock); |
@@ -2779,7 +2781,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2779 | int error; | 2781 | int error; |
2780 | __be32 status; | 2782 | __be32 status; |
2781 | 2783 | ||
2782 | if (nfs4_in_grace()) | 2784 | if (locks_in_grace()) |
2783 | return nfserr_grace; | 2785 | return nfserr_grace; |
2784 | 2786 | ||
2785 | if (check_lock_length(lockt->lt_offset, lockt->lt_length)) | 2787 | if (check_lock_length(lockt->lt_offset, lockt->lt_length)) |
@@ -3192,9 +3194,9 @@ __nfs4_state_start(void) | |||
3192 | unsigned long grace_time; | 3194 | unsigned long grace_time; |
3193 | 3195 | ||
3194 | boot_time = get_seconds(); | 3196 | boot_time = get_seconds(); |
3195 | grace_time = get_nfs_grace_period(); | 3197 | grace_time = get_nfs4_grace_period(); |
3196 | lease_time = user_lease_time; | 3198 | lease_time = user_lease_time; |
3197 | in_grace = 1; | 3199 | locks_start_grace(&nfsd4_manager); |
3198 | printk(KERN_INFO "NFSD: starting %ld-second grace period\n", | 3200 | printk(KERN_INFO "NFSD: starting %ld-second grace period\n", |
3199 | grace_time/HZ); | 3201 | grace_time/HZ); |
3200 | laundry_wq = create_singlethread_workqueue("nfsd4"); | 3202 | laundry_wq = create_singlethread_workqueue("nfsd4"); |
@@ -3213,12 +3215,6 @@ nfs4_state_start(void) | |||
3213 | return; | 3215 | return; |
3214 | } | 3216 | } |
3215 | 3217 | ||
3216 | int | ||
3217 | nfs4_in_grace(void) | ||
3218 | { | ||
3219 | return in_grace; | ||
3220 | } | ||
3221 | |||
3222 | time_t | 3218 | time_t |
3223 | nfs4_lease_time(void) | 3219 | nfs4_lease_time(void) |
3224 | { | 3220 | { |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 14ba4d9b2859..afcdf4b76843 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -413,6 +413,18 @@ out_nfserr: | |||
413 | } | 413 | } |
414 | 414 | ||
415 | static __be32 | 415 | static __be32 |
416 | nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid) | ||
417 | { | ||
418 | DECODE_HEAD; | ||
419 | |||
420 | READ_BUF(sizeof(stateid_t)); | ||
421 | READ32(sid->si_generation); | ||
422 | COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); | ||
423 | |||
424 | DECODE_TAIL; | ||
425 | } | ||
426 | |||
427 | static __be32 | ||
416 | nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access) | 428 | nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access) |
417 | { | 429 | { |
418 | DECODE_HEAD; | 430 | DECODE_HEAD; |
@@ -429,10 +441,9 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) | |||
429 | DECODE_HEAD; | 441 | DECODE_HEAD; |
430 | 442 | ||
431 | close->cl_stateowner = NULL; | 443 | close->cl_stateowner = NULL; |
432 | READ_BUF(4 + sizeof(stateid_t)); | 444 | READ_BUF(4); |
433 | READ32(close->cl_seqid); | 445 | READ32(close->cl_seqid); |
434 | READ32(close->cl_stateid.si_generation); | 446 | return nfsd4_decode_stateid(argp, &close->cl_stateid); |
435 | COPYMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
436 | 447 | ||
437 | DECODE_TAIL; | 448 | DECODE_TAIL; |
438 | } | 449 | } |
@@ -493,13 +504,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create | |||
493 | static inline __be32 | 504 | static inline __be32 |
494 | nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) | 505 | nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) |
495 | { | 506 | { |
496 | DECODE_HEAD; | 507 | return nfsd4_decode_stateid(argp, &dr->dr_stateid); |
497 | |||
498 | READ_BUF(sizeof(stateid_t)); | ||
499 | READ32(dr->dr_stateid.si_generation); | ||
500 | COPYMEM(&dr->dr_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
501 | |||
502 | DECODE_TAIL; | ||
503 | } | 508 | } |
504 | 509 | ||
505 | static inline __be32 | 510 | static inline __be32 |
@@ -542,20 +547,22 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) | |||
542 | READ32(lock->lk_is_new); | 547 | READ32(lock->lk_is_new); |
543 | 548 | ||
544 | if (lock->lk_is_new) { | 549 | if (lock->lk_is_new) { |
545 | READ_BUF(36); | 550 | READ_BUF(4); |
546 | READ32(lock->lk_new_open_seqid); | 551 | READ32(lock->lk_new_open_seqid); |
547 | READ32(lock->lk_new_open_stateid.si_generation); | 552 | status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid); |
548 | 553 | if (status) | |
549 | COPYMEM(&lock->lk_new_open_stateid.si_opaque, sizeof(stateid_opaque_t)); | 554 | return status; |
555 | READ_BUF(8 + sizeof(clientid_t)); | ||
550 | READ32(lock->lk_new_lock_seqid); | 556 | READ32(lock->lk_new_lock_seqid); |
551 | COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); | 557 | COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); |
552 | READ32(lock->lk_new_owner.len); | 558 | READ32(lock->lk_new_owner.len); |
553 | READ_BUF(lock->lk_new_owner.len); | 559 | READ_BUF(lock->lk_new_owner.len); |
554 | READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len); | 560 | READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len); |
555 | } else { | 561 | } else { |
556 | READ_BUF(20); | 562 | status = nfsd4_decode_stateid(argp, &lock->lk_old_lock_stateid); |
557 | READ32(lock->lk_old_lock_stateid.si_generation); | 563 | if (status) |
558 | COPYMEM(&lock->lk_old_lock_stateid.si_opaque, sizeof(stateid_opaque_t)); | 564 | return status; |
565 | READ_BUF(4); | ||
559 | READ32(lock->lk_old_lock_seqid); | 566 | READ32(lock->lk_old_lock_seqid); |
560 | } | 567 | } |
561 | 568 | ||
@@ -587,13 +594,15 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) | |||
587 | DECODE_HEAD; | 594 | DECODE_HEAD; |
588 | 595 | ||
589 | locku->lu_stateowner = NULL; | 596 | locku->lu_stateowner = NULL; |
590 | READ_BUF(24 + sizeof(stateid_t)); | 597 | READ_BUF(8); |
591 | READ32(locku->lu_type); | 598 | READ32(locku->lu_type); |
592 | if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) | 599 | if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) |
593 | goto xdr_error; | 600 | goto xdr_error; |
594 | READ32(locku->lu_seqid); | 601 | READ32(locku->lu_seqid); |
595 | READ32(locku->lu_stateid.si_generation); | 602 | status = nfsd4_decode_stateid(argp, &locku->lu_stateid); |
596 | COPYMEM(&locku->lu_stateid.si_opaque, sizeof(stateid_opaque_t)); | 603 | if (status) |
604 | return status; | ||
605 | READ_BUF(16); | ||
597 | READ64(locku->lu_offset); | 606 | READ64(locku->lu_offset); |
598 | READ64(locku->lu_length); | 607 | READ64(locku->lu_length); |
599 | 608 | ||
@@ -678,8 +687,10 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) | |||
678 | READ32(open->op_delegate_type); | 687 | READ32(open->op_delegate_type); |
679 | break; | 688 | break; |
680 | case NFS4_OPEN_CLAIM_DELEGATE_CUR: | 689 | case NFS4_OPEN_CLAIM_DELEGATE_CUR: |
681 | READ_BUF(sizeof(stateid_t) + 4); | 690 | status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); |
682 | COPYMEM(&open->op_delegate_stateid, sizeof(stateid_t)); | 691 | if (status) |
692 | return status; | ||
693 | READ_BUF(4); | ||
683 | READ32(open->op_fname.len); | 694 | READ32(open->op_fname.len); |
684 | READ_BUF(open->op_fname.len); | 695 | READ_BUF(open->op_fname.len); |
685 | SAVEMEM(open->op_fname.data, open->op_fname.len); | 696 | SAVEMEM(open->op_fname.data, open->op_fname.len); |
@@ -699,9 +710,10 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con | |||
699 | DECODE_HEAD; | 710 | DECODE_HEAD; |
700 | 711 | ||
701 | open_conf->oc_stateowner = NULL; | 712 | open_conf->oc_stateowner = NULL; |
702 | READ_BUF(4 + sizeof(stateid_t)); | 713 | status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid); |
703 | READ32(open_conf->oc_req_stateid.si_generation); | 714 | if (status) |
704 | COPYMEM(&open_conf->oc_req_stateid.si_opaque, sizeof(stateid_opaque_t)); | 715 | return status; |
716 | READ_BUF(4); | ||
705 | READ32(open_conf->oc_seqid); | 717 | READ32(open_conf->oc_seqid); |
706 | 718 | ||
707 | DECODE_TAIL; | 719 | DECODE_TAIL; |
@@ -713,9 +725,10 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d | |||
713 | DECODE_HEAD; | 725 | DECODE_HEAD; |
714 | 726 | ||
715 | open_down->od_stateowner = NULL; | 727 | open_down->od_stateowner = NULL; |
716 | READ_BUF(12 + sizeof(stateid_t)); | 728 | status = nfsd4_decode_stateid(argp, &open_down->od_stateid); |
717 | READ32(open_down->od_stateid.si_generation); | 729 | if (status) |
718 | COPYMEM(&open_down->od_stateid.si_opaque, sizeof(stateid_opaque_t)); | 730 | return status; |
731 | READ_BUF(12); | ||
719 | READ32(open_down->od_seqid); | 732 | READ32(open_down->od_seqid); |
720 | READ32(open_down->od_share_access); | 733 | READ32(open_down->od_share_access); |
721 | READ32(open_down->od_share_deny); | 734 | READ32(open_down->od_share_deny); |
@@ -743,9 +756,10 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) | |||
743 | { | 756 | { |
744 | DECODE_HEAD; | 757 | DECODE_HEAD; |
745 | 758 | ||
746 | READ_BUF(sizeof(stateid_t) + 12); | 759 | status = nfsd4_decode_stateid(argp, &read->rd_stateid); |
747 | READ32(read->rd_stateid.si_generation); | 760 | if (status) |
748 | COPYMEM(&read->rd_stateid.si_opaque, sizeof(stateid_opaque_t)); | 761 | return status; |
762 | READ_BUF(12); | ||
749 | READ64(read->rd_offset); | 763 | READ64(read->rd_offset); |
750 | READ32(read->rd_length); | 764 | READ32(read->rd_length); |
751 | 765 | ||
@@ -834,15 +848,13 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, | |||
834 | static __be32 | 848 | static __be32 |
835 | nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) | 849 | nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) |
836 | { | 850 | { |
837 | DECODE_HEAD; | 851 | __be32 status; |
838 | |||
839 | READ_BUF(sizeof(stateid_t)); | ||
840 | READ32(setattr->sa_stateid.si_generation); | ||
841 | COPYMEM(&setattr->sa_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
842 | if ((status = nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, &setattr->sa_acl))) | ||
843 | goto out; | ||
844 | 852 | ||
845 | DECODE_TAIL; | 853 | status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); |
854 | if (status) | ||
855 | return status; | ||
856 | return nfsd4_decode_fattr(argp, setattr->sa_bmval, | ||
857 | &setattr->sa_iattr, &setattr->sa_acl); | ||
846 | } | 858 | } |
847 | 859 | ||
848 | static __be32 | 860 | static __be32 |
@@ -927,9 +939,10 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) | |||
927 | int len; | 939 | int len; |
928 | DECODE_HEAD; | 940 | DECODE_HEAD; |
929 | 941 | ||
930 | READ_BUF(sizeof(stateid_opaque_t) + 20); | 942 | status = nfsd4_decode_stateid(argp, &write->wr_stateid); |
931 | READ32(write->wr_stateid.si_generation); | 943 | if (status) |
932 | COPYMEM(&write->wr_stateid.si_opaque, sizeof(stateid_opaque_t)); | 944 | return status; |
945 | READ_BUF(16); | ||
933 | READ64(write->wr_offset); | 946 | READ64(write->wr_offset); |
934 | READ32(write->wr_stable_how); | 947 | READ32(write->wr_stable_how); |
935 | if (write->wr_stable_how > 2) | 948 | if (write->wr_stable_how > 2) |
@@ -1183,7 +1196,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) | |||
1183 | * Header routine to setup seqid operation replay cache | 1196 | * Header routine to setup seqid operation replay cache |
1184 | */ | 1197 | */ |
1185 | #define ENCODE_SEQID_OP_HEAD \ | 1198 | #define ENCODE_SEQID_OP_HEAD \ |
1186 | __be32 *p; \ | ||
1187 | __be32 *save; \ | 1199 | __be32 *save; \ |
1188 | \ | 1200 | \ |
1189 | save = resp->p; | 1201 | save = resp->p; |
@@ -1950,6 +1962,17 @@ fail: | |||
1950 | return -EINVAL; | 1962 | return -EINVAL; |
1951 | } | 1963 | } |
1952 | 1964 | ||
1965 | static void | ||
1966 | nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid) | ||
1967 | { | ||
1968 | ENCODE_HEAD; | ||
1969 | |||
1970 | RESERVE_SPACE(sizeof(stateid_t)); | ||
1971 | WRITE32(sid->si_generation); | ||
1972 | WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); | ||
1973 | ADJUST_ARGS(); | ||
1974 | } | ||
1975 | |||
1953 | static __be32 | 1976 | static __be32 |
1954 | nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) | 1977 | nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) |
1955 | { | 1978 | { |
@@ -1969,12 +1992,9 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c | |||
1969 | { | 1992 | { |
1970 | ENCODE_SEQID_OP_HEAD; | 1993 | ENCODE_SEQID_OP_HEAD; |
1971 | 1994 | ||
1972 | if (!nfserr) { | 1995 | if (!nfserr) |
1973 | RESERVE_SPACE(sizeof(stateid_t)); | 1996 | nfsd4_encode_stateid(resp, &close->cl_stateid); |
1974 | WRITE32(close->cl_stateid.si_generation); | 1997 | |
1975 | WRITEMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
1976 | ADJUST_ARGS(); | ||
1977 | } | ||
1978 | ENCODE_SEQID_OP_TAIL(close->cl_stateowner); | 1998 | ENCODE_SEQID_OP_TAIL(close->cl_stateowner); |
1979 | return nfserr; | 1999 | return nfserr; |
1980 | } | 2000 | } |
@@ -2074,12 +2094,9 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo | |||
2074 | { | 2094 | { |
2075 | ENCODE_SEQID_OP_HEAD; | 2095 | ENCODE_SEQID_OP_HEAD; |
2076 | 2096 | ||
2077 | if (!nfserr) { | 2097 | if (!nfserr) |
2078 | RESERVE_SPACE(4 + sizeof(stateid_t)); | 2098 | nfsd4_encode_stateid(resp, &lock->lk_resp_stateid); |
2079 | WRITE32(lock->lk_resp_stateid.si_generation); | 2099 | else if (nfserr == nfserr_denied) |
2080 | WRITEMEM(&lock->lk_resp_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
2081 | ADJUST_ARGS(); | ||
2082 | } else if (nfserr == nfserr_denied) | ||
2083 | nfsd4_encode_lock_denied(resp, &lock->lk_denied); | 2100 | nfsd4_encode_lock_denied(resp, &lock->lk_denied); |
2084 | 2101 | ||
2085 | ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner); | 2102 | ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner); |
@@ -2099,13 +2116,9 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l | |||
2099 | { | 2116 | { |
2100 | ENCODE_SEQID_OP_HEAD; | 2117 | ENCODE_SEQID_OP_HEAD; |
2101 | 2118 | ||
2102 | if (!nfserr) { | 2119 | if (!nfserr) |
2103 | RESERVE_SPACE(sizeof(stateid_t)); | 2120 | nfsd4_encode_stateid(resp, &locku->lu_stateid); |
2104 | WRITE32(locku->lu_stateid.si_generation); | 2121 | |
2105 | WRITEMEM(&locku->lu_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
2106 | ADJUST_ARGS(); | ||
2107 | } | ||
2108 | |||
2109 | ENCODE_SEQID_OP_TAIL(locku->lu_stateowner); | 2122 | ENCODE_SEQID_OP_TAIL(locku->lu_stateowner); |
2110 | return nfserr; | 2123 | return nfserr; |
2111 | } | 2124 | } |
@@ -2128,14 +2141,14 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li | |||
2128 | static __be32 | 2141 | static __be32 |
2129 | nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) | 2142 | nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) |
2130 | { | 2143 | { |
2144 | ENCODE_HEAD; | ||
2131 | ENCODE_SEQID_OP_HEAD; | 2145 | ENCODE_SEQID_OP_HEAD; |
2132 | 2146 | ||
2133 | if (nfserr) | 2147 | if (nfserr) |
2134 | goto out; | 2148 | goto out; |
2135 | 2149 | ||
2136 | RESERVE_SPACE(36 + sizeof(stateid_t)); | 2150 | nfsd4_encode_stateid(resp, &open->op_stateid); |
2137 | WRITE32(open->op_stateid.si_generation); | 2151 | RESERVE_SPACE(40); |
2138 | WRITEMEM(&open->op_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
2139 | WRITECINFO(open->op_cinfo); | 2152 | WRITECINFO(open->op_cinfo); |
2140 | WRITE32(open->op_rflags); | 2153 | WRITE32(open->op_rflags); |
2141 | WRITE32(2); | 2154 | WRITE32(2); |
@@ -2148,8 +2161,8 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op | |||
2148 | case NFS4_OPEN_DELEGATE_NONE: | 2161 | case NFS4_OPEN_DELEGATE_NONE: |
2149 | break; | 2162 | break; |
2150 | case NFS4_OPEN_DELEGATE_READ: | 2163 | case NFS4_OPEN_DELEGATE_READ: |
2151 | RESERVE_SPACE(20 + sizeof(stateid_t)); | 2164 | nfsd4_encode_stateid(resp, &open->op_delegate_stateid); |
2152 | WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); | 2165 | RESERVE_SPACE(20); |
2153 | WRITE32(open->op_recall); | 2166 | WRITE32(open->op_recall); |
2154 | 2167 | ||
2155 | /* | 2168 | /* |
@@ -2162,8 +2175,8 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op | |||
2162 | ADJUST_ARGS(); | 2175 | ADJUST_ARGS(); |
2163 | break; | 2176 | break; |
2164 | case NFS4_OPEN_DELEGATE_WRITE: | 2177 | case NFS4_OPEN_DELEGATE_WRITE: |
2165 | RESERVE_SPACE(32 + sizeof(stateid_t)); | 2178 | nfsd4_encode_stateid(resp, &open->op_delegate_stateid); |
2166 | WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); | 2179 | RESERVE_SPACE(32); |
2167 | WRITE32(0); | 2180 | WRITE32(0); |
2168 | 2181 | ||
2169 | /* | 2182 | /* |
@@ -2195,13 +2208,9 @@ static __be32 | |||
2195 | nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) | 2208 | nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) |
2196 | { | 2209 | { |
2197 | ENCODE_SEQID_OP_HEAD; | 2210 | ENCODE_SEQID_OP_HEAD; |
2198 | 2211 | ||
2199 | if (!nfserr) { | 2212 | if (!nfserr) |
2200 | RESERVE_SPACE(sizeof(stateid_t)); | 2213 | nfsd4_encode_stateid(resp, &oc->oc_resp_stateid); |
2201 | WRITE32(oc->oc_resp_stateid.si_generation); | ||
2202 | WRITEMEM(&oc->oc_resp_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
2203 | ADJUST_ARGS(); | ||
2204 | } | ||
2205 | 2214 | ||
2206 | ENCODE_SEQID_OP_TAIL(oc->oc_stateowner); | 2215 | ENCODE_SEQID_OP_TAIL(oc->oc_stateowner); |
2207 | return nfserr; | 2216 | return nfserr; |
@@ -2211,13 +2220,9 @@ static __be32 | |||
2211 | nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) | 2220 | nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) |
2212 | { | 2221 | { |
2213 | ENCODE_SEQID_OP_HEAD; | 2222 | ENCODE_SEQID_OP_HEAD; |
2214 | 2223 | ||
2215 | if (!nfserr) { | 2224 | if (!nfserr) |
2216 | RESERVE_SPACE(sizeof(stateid_t)); | 2225 | nfsd4_encode_stateid(resp, &od->od_stateid); |
2217 | WRITE32(od->od_stateid.si_generation); | ||
2218 | WRITEMEM(&od->od_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
2219 | ADJUST_ARGS(); | ||
2220 | } | ||
2221 | 2226 | ||
2222 | ENCODE_SEQID_OP_TAIL(od->od_stateowner); | 2227 | ENCODE_SEQID_OP_TAIL(od->od_stateowner); |
2223 | return nfserr; | 2228 | return nfserr; |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index c53e65f8f3a2..97543df58242 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -614,10 +614,9 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) | |||
614 | return -EINVAL; | 614 | return -EINVAL; |
615 | err = nfsd_create_serv(); | 615 | err = nfsd_create_serv(); |
616 | if (!err) { | 616 | if (!err) { |
617 | int proto = 0; | 617 | err = svc_addsock(nfsd_serv, fd, buf); |
618 | err = svc_addsock(nfsd_serv, fd, buf, &proto); | ||
619 | if (err >= 0) { | 618 | if (err >= 0) { |
620 | err = lockd_up(proto); | 619 | err = lockd_up(); |
621 | if (err < 0) | 620 | if (err < 0) |
622 | svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf); | 621 | svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf); |
623 | } | 622 | } |
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index ea37c96f0445..cd25d91895a1 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c | |||
@@ -302,17 +302,27 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
302 | if (error) | 302 | if (error) |
303 | goto out; | 303 | goto out; |
304 | 304 | ||
305 | if (!(access & NFSD_MAY_LOCK)) { | 305 | /* |
306 | /* | 306 | * pseudoflavor restrictions are not enforced on NLM, |
307 | * pseudoflavor restrictions are not enforced on NLM, | 307 | * which clients virtually always use auth_sys for, |
308 | * which clients virtually always use auth_sys for, | 308 | * even while using RPCSEC_GSS for NFS. |
309 | * even while using RPCSEC_GSS for NFS. | 309 | */ |
310 | */ | 310 | if (access & NFSD_MAY_LOCK) |
311 | error = check_nfsd_access(exp, rqstp); | 311 | goto skip_pseudoflavor_check; |
312 | if (error) | 312 | /* |
313 | goto out; | 313 | * Clients may expect to be able to use auth_sys during mount, |
314 | } | 314 | * even if they use gss for everything else; see section 2.3.2 |
315 | * of rfc 2623. | ||
316 | */ | ||
317 | if (access & NFSD_MAY_BYPASS_GSS_ON_ROOT | ||
318 | && exp->ex_path.dentry == dentry) | ||
319 | goto skip_pseudoflavor_check; | ||
320 | |||
321 | error = check_nfsd_access(exp, rqstp); | ||
322 | if (error) | ||
323 | goto out; | ||
315 | 324 | ||
325 | skip_pseudoflavor_check: | ||
316 | /* Finally, check access permissions. */ | 326 | /* Finally, check access permissions. */ |
317 | error = nfsd_permission(rqstp, exp, dentry, access); | 327 | error = nfsd_permission(rqstp, exp, dentry, access); |
318 | 328 | ||
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 0766f95d236a..5cffeca7acef 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c | |||
@@ -65,7 +65,8 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, | |||
65 | dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); | 65 | dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); |
66 | 66 | ||
67 | fh_copy(&resp->fh, &argp->fh); | 67 | fh_copy(&resp->fh, &argp->fh); |
68 | nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); | 68 | nfserr = fh_verify(rqstp, &resp->fh, 0, |
69 | NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); | ||
69 | return nfsd_return_attrs(nfserr, resp); | 70 | return nfsd_return_attrs(nfserr, resp); |
70 | } | 71 | } |
71 | 72 | ||
@@ -521,7 +522,8 @@ nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, | |||
521 | 522 | ||
522 | dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh)); | 523 | dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh)); |
523 | 524 | ||
524 | nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats); | 525 | nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats, |
526 | NFSD_MAY_BYPASS_GSS_ON_ROOT); | ||
525 | fh_put(&argp->fh); | 527 | fh_put(&argp->fh); |
526 | return nfserr; | 528 | return nfserr; |
527 | } | 529 | } |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 80292ff5e924..59eeb46f82c5 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -229,6 +229,7 @@ int nfsd_create_serv(void) | |||
229 | 229 | ||
230 | atomic_set(&nfsd_busy, 0); | 230 | atomic_set(&nfsd_busy, 0); |
231 | nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, | 231 | nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, |
232 | AF_INET, | ||
232 | nfsd_last_thread, nfsd, THIS_MODULE); | 233 | nfsd_last_thread, nfsd, THIS_MODULE); |
233 | if (nfsd_serv == NULL) | 234 | if (nfsd_serv == NULL) |
234 | err = -ENOMEM; | 235 | err = -ENOMEM; |
@@ -243,25 +244,20 @@ static int nfsd_init_socks(int port) | |||
243 | if (!list_empty(&nfsd_serv->sv_permsocks)) | 244 | if (!list_empty(&nfsd_serv->sv_permsocks)) |
244 | return 0; | 245 | return 0; |
245 | 246 | ||
246 | error = lockd_up(IPPROTO_UDP); | 247 | error = svc_create_xprt(nfsd_serv, "udp", port, |
247 | if (error >= 0) { | ||
248 | error = svc_create_xprt(nfsd_serv, "udp", port, | ||
249 | SVC_SOCK_DEFAULTS); | 248 | SVC_SOCK_DEFAULTS); |
250 | if (error < 0) | ||
251 | lockd_down(); | ||
252 | } | ||
253 | if (error < 0) | 249 | if (error < 0) |
254 | return error; | 250 | return error; |
255 | 251 | ||
256 | error = lockd_up(IPPROTO_TCP); | 252 | error = svc_create_xprt(nfsd_serv, "tcp", port, |
257 | if (error >= 0) { | ||
258 | error = svc_create_xprt(nfsd_serv, "tcp", port, | ||
259 | SVC_SOCK_DEFAULTS); | 253 | SVC_SOCK_DEFAULTS); |
260 | if (error < 0) | ||
261 | lockd_down(); | ||
262 | } | ||
263 | if (error < 0) | 254 | if (error < 0) |
264 | return error; | 255 | return error; |
256 | |||
257 | error = lockd_up(); | ||
258 | if (error < 0) | ||
259 | return error; | ||
260 | |||
265 | return 0; | 261 | return 0; |
266 | } | 262 | } |
267 | 263 | ||
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 18060bed5267..aa1d0d6489a1 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -83,7 +83,6 @@ struct raparm_hbucket { | |||
83 | spinlock_t pb_lock; | 83 | spinlock_t pb_lock; |
84 | } ____cacheline_aligned_in_smp; | 84 | } ____cacheline_aligned_in_smp; |
85 | 85 | ||
86 | static struct raparms * raparml; | ||
87 | #define RAPARM_HASH_BITS 4 | 86 | #define RAPARM_HASH_BITS 4 |
88 | #define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS) | 87 | #define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS) |
89 | #define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) | 88 | #define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) |
@@ -1866,9 +1865,9 @@ out: | |||
1866 | * N.B. After this call fhp needs an fh_put | 1865 | * N.B. After this call fhp needs an fh_put |
1867 | */ | 1866 | */ |
1868 | __be32 | 1867 | __be32 |
1869 | nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat) | 1868 | nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) |
1870 | { | 1869 | { |
1871 | __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP); | 1870 | __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); |
1872 | if (!err && vfs_statfs(fhp->fh_dentry,stat)) | 1871 | if (!err && vfs_statfs(fhp->fh_dentry,stat)) |
1873 | err = nfserr_io; | 1872 | err = nfserr_io; |
1874 | return err; | 1873 | return err; |
@@ -1966,11 +1965,20 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, | |||
1966 | void | 1965 | void |
1967 | nfsd_racache_shutdown(void) | 1966 | nfsd_racache_shutdown(void) |
1968 | { | 1967 | { |
1969 | if (!raparml) | 1968 | struct raparms *raparm, *last_raparm; |
1970 | return; | 1969 | unsigned int i; |
1970 | |||
1971 | dprintk("nfsd: freeing readahead buffers.\n"); | 1971 | dprintk("nfsd: freeing readahead buffers.\n"); |
1972 | kfree(raparml); | 1972 | |
1973 | raparml = NULL; | 1973 | for (i = 0; i < RAPARM_HASH_SIZE; i++) { |
1974 | raparm = raparm_hash[i].pb_head; | ||
1975 | while(raparm) { | ||
1976 | last_raparm = raparm; | ||
1977 | raparm = raparm->p_next; | ||
1978 | kfree(last_raparm); | ||
1979 | } | ||
1980 | raparm_hash[i].pb_head = NULL; | ||
1981 | } | ||
1974 | } | 1982 | } |
1975 | /* | 1983 | /* |
1976 | * Initialize readahead param cache | 1984 | * Initialize readahead param cache |
@@ -1981,35 +1989,38 @@ nfsd_racache_init(int cache_size) | |||
1981 | int i; | 1989 | int i; |
1982 | int j = 0; | 1990 | int j = 0; |
1983 | int nperbucket; | 1991 | int nperbucket; |
1992 | struct raparms **raparm = NULL; | ||
1984 | 1993 | ||
1985 | 1994 | ||
1986 | if (raparml) | 1995 | if (raparm_hash[0].pb_head) |
1987 | return 0; | 1996 | return 0; |
1988 | if (cache_size < 2*RAPARM_HASH_SIZE) | 1997 | nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); |
1989 | cache_size = 2*RAPARM_HASH_SIZE; | 1998 | if (nperbucket < 2) |
1990 | raparml = kcalloc(cache_size, sizeof(struct raparms), GFP_KERNEL); | 1999 | nperbucket = 2; |
1991 | 2000 | cache_size = nperbucket * RAPARM_HASH_SIZE; | |
1992 | if (!raparml) { | ||
1993 | printk(KERN_WARNING | ||
1994 | "nfsd: Could not allocate memory read-ahead cache.\n"); | ||
1995 | return -ENOMEM; | ||
1996 | } | ||
1997 | 2001 | ||
1998 | dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); | 2002 | dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); |
1999 | for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) { | 2003 | |
2000 | raparm_hash[i].pb_head = NULL; | 2004 | for (i = 0; i < RAPARM_HASH_SIZE; i++) { |
2001 | spin_lock_init(&raparm_hash[i].pb_lock); | 2005 | spin_lock_init(&raparm_hash[i].pb_lock); |
2002 | } | 2006 | |
2003 | nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); | 2007 | raparm = &raparm_hash[i].pb_head; |
2004 | for (i = 0; i < cache_size - 1; i++) { | 2008 | for (j = 0; j < nperbucket; j++) { |
2005 | if (i % nperbucket == 0) | 2009 | *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL); |
2006 | raparm_hash[j++].pb_head = raparml + i; | 2010 | if (!*raparm) |
2007 | if (i % nperbucket < nperbucket-1) | 2011 | goto out_nomem; |
2008 | raparml[i].p_next = raparml + i + 1; | 2012 | raparm = &(*raparm)->p_next; |
2013 | } | ||
2014 | *raparm = NULL; | ||
2009 | } | 2015 | } |
2010 | 2016 | ||
2011 | nfsdstats.ra_size = cache_size; | 2017 | nfsdstats.ra_size = cache_size; |
2012 | return 0; | 2018 | return 0; |
2019 | |||
2020 | out_nomem: | ||
2021 | dprintk("nfsd: kmalloc failed, freeing readahead buffers\n"); | ||
2022 | nfsd_racache_shutdown(); | ||
2023 | return -ENOMEM; | ||
2013 | } | 2024 | } |
2014 | 2025 | ||
2015 | #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) | 2026 | #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) |
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 64965e1c21c4..9b0efdad8910 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c | |||
@@ -13,9 +13,7 @@ | |||
13 | #include <linux/nls.h> | 13 | #include <linux/nls.h> |
14 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
15 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
16 | #ifdef CONFIG_KMOD | ||
17 | #include <linux/kmod.h> | 16 | #include <linux/kmod.h> |
18 | #endif | ||
19 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
20 | 18 | ||
21 | static struct nls_table default_table; | 19 | static struct nls_table default_table; |
@@ -215,24 +213,7 @@ static struct nls_table *find_nls(char *charset) | |||
215 | 213 | ||
216 | struct nls_table *load_nls(char *charset) | 214 | struct nls_table *load_nls(char *charset) |
217 | { | 215 | { |
218 | struct nls_table *nls; | 216 | return try_then_request_module(find_nls(charset), "nls_%s", charset); |
219 | #ifdef CONFIG_KMOD | ||
220 | int ret; | ||
221 | #endif | ||
222 | |||
223 | nls = find_nls(charset); | ||
224 | if (nls) | ||
225 | return nls; | ||
226 | |||
227 | #ifdef CONFIG_KMOD | ||
228 | ret = request_module("nls_%s", charset); | ||
229 | if (ret != 0) { | ||
230 | printk("Unable to load NLS charset %s\n", charset); | ||
231 | return NULL; | ||
232 | } | ||
233 | nls = find_nls(charset); | ||
234 | #endif | ||
235 | return nls; | ||
236 | } | 217 | } |
237 | 218 | ||
238 | void unload_nls(struct nls_table *nls) | 219 | void unload_nls(struct nls_table *nls) |
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index e1781c8b1650..9e8a95be7a1e 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c | |||
@@ -174,7 +174,6 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, | |||
174 | // TODO: Consider moving this lot to a separate function! (AIA) | 174 | // TODO: Consider moving this lot to a separate function! (AIA) |
175 | handle_name: | 175 | handle_name: |
176 | { | 176 | { |
177 | struct dentry *real_dent, *new_dent; | ||
178 | MFT_RECORD *m; | 177 | MFT_RECORD *m; |
179 | ntfs_attr_search_ctx *ctx; | 178 | ntfs_attr_search_ctx *ctx; |
180 | ntfs_inode *ni = NTFS_I(dent_inode); | 179 | ntfs_inode *ni = NTFS_I(dent_inode); |
@@ -255,93 +254,9 @@ handle_name: | |||
255 | } | 254 | } |
256 | nls_name.hash = full_name_hash(nls_name.name, nls_name.len); | 255 | nls_name.hash = full_name_hash(nls_name.name, nls_name.len); |
257 | 256 | ||
258 | /* | 257 | dent = d_add_ci(dent, dent_inode, &nls_name); |
259 | * Note: No need for dent->d_lock lock as i_mutex is held on the | ||
260 | * parent inode. | ||
261 | */ | ||
262 | |||
263 | /* Does a dentry matching the nls_name exist already? */ | ||
264 | real_dent = d_lookup(dent->d_parent, &nls_name); | ||
265 | /* If not, create it now. */ | ||
266 | if (!real_dent) { | ||
267 | real_dent = d_alloc(dent->d_parent, &nls_name); | ||
268 | kfree(nls_name.name); | ||
269 | if (!real_dent) { | ||
270 | err = -ENOMEM; | ||
271 | goto err_out; | ||
272 | } | ||
273 | new_dent = d_splice_alias(dent_inode, real_dent); | ||
274 | if (new_dent) | ||
275 | dput(real_dent); | ||
276 | else | ||
277 | new_dent = real_dent; | ||
278 | ntfs_debug("Done. (Created new dentry.)"); | ||
279 | return new_dent; | ||
280 | } | ||
281 | kfree(nls_name.name); | 258 | kfree(nls_name.name); |
282 | /* Matching dentry exists, check if it is negative. */ | 259 | return dent; |
283 | if (real_dent->d_inode) { | ||
284 | if (unlikely(real_dent->d_inode != dent_inode)) { | ||
285 | /* This can happen because bad inodes are unhashed. */ | ||
286 | BUG_ON(!is_bad_inode(dent_inode)); | ||
287 | BUG_ON(!is_bad_inode(real_dent->d_inode)); | ||
288 | } | ||
289 | /* | ||
290 | * Already have the inode and the dentry attached, decrement | ||
291 | * the reference count to balance the ntfs_iget() we did | ||
292 | * earlier on. We found the dentry using d_lookup() so it | ||
293 | * cannot be disconnected and thus we do not need to worry | ||
294 | * about any NFS/disconnectedness issues here. | ||
295 | */ | ||
296 | iput(dent_inode); | ||
297 | ntfs_debug("Done. (Already had inode and dentry.)"); | ||
298 | return real_dent; | ||
299 | } | ||
300 | /* | ||
301 | * Negative dentry: instantiate it unless the inode is a directory and | ||
302 | * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED), | ||
303 | * in which case d_move() that in place of the found dentry. | ||
304 | */ | ||
305 | if (!S_ISDIR(dent_inode->i_mode)) { | ||
306 | /* Not a directory; everything is easy. */ | ||
307 | d_instantiate(real_dent, dent_inode); | ||
308 | ntfs_debug("Done. (Already had negative file dentry.)"); | ||
309 | return real_dent; | ||
310 | } | ||
311 | spin_lock(&dcache_lock); | ||
312 | if (list_empty(&dent_inode->i_dentry)) { | ||
313 | /* | ||
314 | * Directory without a 'disconnected' dentry; we need to do | ||
315 | * d_instantiate() by hand because it takes dcache_lock which | ||
316 | * we already hold. | ||
317 | */ | ||
318 | list_add(&real_dent->d_alias, &dent_inode->i_dentry); | ||
319 | real_dent->d_inode = dent_inode; | ||
320 | spin_unlock(&dcache_lock); | ||
321 | security_d_instantiate(real_dent, dent_inode); | ||
322 | ntfs_debug("Done. (Already had negative directory dentry.)"); | ||
323 | return real_dent; | ||
324 | } | ||
325 | /* | ||
326 | * Directory with a 'disconnected' dentry; get a reference to the | ||
327 | * 'disconnected' dentry. | ||
328 | */ | ||
329 | new_dent = list_entry(dent_inode->i_dentry.next, struct dentry, | ||
330 | d_alias); | ||
331 | dget_locked(new_dent); | ||
332 | spin_unlock(&dcache_lock); | ||
333 | /* Do security vodoo. */ | ||
334 | security_d_instantiate(real_dent, dent_inode); | ||
335 | /* Move new_dent in place of real_dent. */ | ||
336 | d_move(new_dent, real_dent); | ||
337 | /* Balance the ntfs_iget() we did above. */ | ||
338 | iput(dent_inode); | ||
339 | /* Throw away real_dent. */ | ||
340 | dput(real_dent); | ||
341 | /* Use new_dent as the actual dentry. */ | ||
342 | ntfs_debug("Done. (Already had negative, disconnected directory " | ||
343 | "dentry.)"); | ||
344 | return new_dent; | ||
345 | 260 | ||
346 | eio_err_out: | 261 | eio_err_out: |
347 | ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk."); | 262 | ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk."); |
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h index 3a8af75351e8..4087fbdac327 100644 --- a/fs/ntfs/usnjrnl.h +++ b/fs/ntfs/usnjrnl.h | |||
@@ -113,7 +113,7 @@ typedef struct { | |||
113 | * Reason flags (32-bit). Cumulative flags describing the change(s) to the | 113 | * Reason flags (32-bit). Cumulative flags describing the change(s) to the |
114 | * file since it was last opened. I think the names speak for themselves but | 114 | * file since it was last opened. I think the names speak for themselves but |
115 | * if you disagree check out the descriptions in the Linux NTFS project NTFS | 115 | * if you disagree check out the descriptions in the Linux NTFS project NTFS |
116 | * documentation: http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html | 116 | * documentation: http://www.linux-ntfs.org/ |
117 | */ | 117 | */ |
118 | enum { | 118 | enum { |
119 | USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001), | 119 | USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001), |
@@ -145,7 +145,7 @@ typedef le32 USN_REASON_FLAGS; | |||
145 | * Source info flags (32-bit). Information about the source of the change(s) | 145 | * Source info flags (32-bit). Information about the source of the change(s) |
146 | * to the file. For detailed descriptions of what these mean, see the Linux | 146 | * to the file. For detailed descriptions of what these mean, see the Linux |
147 | * NTFS project NTFS documentation: | 147 | * NTFS project NTFS documentation: |
148 | * http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html | 148 | * http://www.linux-ntfs.org/ |
149 | */ | 149 | */ |
150 | enum { | 150 | enum { |
151 | USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001), | 151 | USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001), |
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index f6956de56fdb..589dcdfdfe3c 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile | |||
@@ -34,7 +34,8 @@ ocfs2-objs := \ | |||
34 | symlink.o \ | 34 | symlink.o \ |
35 | sysfile.o \ | 35 | sysfile.o \ |
36 | uptodate.o \ | 36 | uptodate.o \ |
37 | ver.o | 37 | ver.o \ |
38 | xattr.o | ||
38 | 39 | ||
39 | ocfs2_stackglue-objs := stackglue.o | 40 | ocfs2_stackglue-objs := stackglue.o |
40 | ocfs2_stack_o2cb-objs := stack_o2cb.o | 41 | ocfs2_stack_o2cb-objs := stack_o2cb.o |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 10bfb466e068..0cc2deb9394c 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -49,6 +49,340 @@ | |||
49 | 49 | ||
50 | #include "buffer_head_io.h" | 50 | #include "buffer_head_io.h" |
51 | 51 | ||
52 | |||
53 | /* | ||
54 | * Operations for a specific extent tree type. | ||
55 | * | ||
56 | * To implement an on-disk btree (extent tree) type in ocfs2, add | ||
57 | * an ocfs2_extent_tree_operations structure and the matching | ||
58 | * ocfs2_init_<thingy>_extent_tree() function. That's pretty much it | ||
59 | * for the allocation portion of the extent tree. | ||
60 | */ | ||
61 | struct ocfs2_extent_tree_operations { | ||
62 | /* | ||
63 | * last_eb_blk is the block number of the right most leaf extent | ||
64 | * block. Most on-disk structures containing an extent tree store | ||
65 | * this value for fast access. The ->eo_set_last_eb_blk() and | ||
66 | * ->eo_get_last_eb_blk() operations access this value. They are | ||
67 | * both required. | ||
68 | */ | ||
69 | void (*eo_set_last_eb_blk)(struct ocfs2_extent_tree *et, | ||
70 | u64 blkno); | ||
71 | u64 (*eo_get_last_eb_blk)(struct ocfs2_extent_tree *et); | ||
72 | |||
73 | /* | ||
74 | * The on-disk structure usually keeps track of how many total | ||
75 | * clusters are stored in this extent tree. This function updates | ||
76 | * that value. new_clusters is the delta, and must be | ||
77 | * added to the total. Required. | ||
78 | */ | ||
79 | void (*eo_update_clusters)(struct inode *inode, | ||
80 | struct ocfs2_extent_tree *et, | ||
81 | u32 new_clusters); | ||
82 | |||
83 | /* | ||
84 | * If ->eo_insert_check() exists, it is called before rec is | ||
85 | * inserted into the extent tree. It is optional. | ||
86 | */ | ||
87 | int (*eo_insert_check)(struct inode *inode, | ||
88 | struct ocfs2_extent_tree *et, | ||
89 | struct ocfs2_extent_rec *rec); | ||
90 | int (*eo_sanity_check)(struct inode *inode, struct ocfs2_extent_tree *et); | ||
91 | |||
92 | /* | ||
93 | * -------------------------------------------------------------- | ||
94 | * The remaining are internal to ocfs2_extent_tree and don't have | ||
95 | * accessor functions | ||
96 | */ | ||
97 | |||
98 | /* | ||
99 | * ->eo_fill_root_el() takes et->et_object and sets et->et_root_el. | ||
100 | * It is required. | ||
101 | */ | ||
102 | void (*eo_fill_root_el)(struct ocfs2_extent_tree *et); | ||
103 | |||
104 | /* | ||
105 | * ->eo_fill_max_leaf_clusters sets et->et_max_leaf_clusters if | ||
106 | * it exists. If it does not, et->et_max_leaf_clusters is set | ||
107 | * to 0 (unlimited). Optional. | ||
108 | */ | ||
109 | void (*eo_fill_max_leaf_clusters)(struct inode *inode, | ||
110 | struct ocfs2_extent_tree *et); | ||
111 | }; | ||
112 | |||
113 | |||
114 | /* | ||
115 | * Pre-declare ocfs2_dinode_et_ops so we can use it as a sanity check | ||
116 | * in the methods. | ||
117 | */ | ||
118 | static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et); | ||
119 | static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et, | ||
120 | u64 blkno); | ||
121 | static void ocfs2_dinode_update_clusters(struct inode *inode, | ||
122 | struct ocfs2_extent_tree *et, | ||
123 | u32 clusters); | ||
124 | static int ocfs2_dinode_insert_check(struct inode *inode, | ||
125 | struct ocfs2_extent_tree *et, | ||
126 | struct ocfs2_extent_rec *rec); | ||
127 | static int ocfs2_dinode_sanity_check(struct inode *inode, | ||
128 | struct ocfs2_extent_tree *et); | ||
129 | static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et); | ||
130 | static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = { | ||
131 | .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk, | ||
132 | .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk, | ||
133 | .eo_update_clusters = ocfs2_dinode_update_clusters, | ||
134 | .eo_insert_check = ocfs2_dinode_insert_check, | ||
135 | .eo_sanity_check = ocfs2_dinode_sanity_check, | ||
136 | .eo_fill_root_el = ocfs2_dinode_fill_root_el, | ||
137 | }; | ||
138 | |||
139 | static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et, | ||
140 | u64 blkno) | ||
141 | { | ||
142 | struct ocfs2_dinode *di = et->et_object; | ||
143 | |||
144 | BUG_ON(et->et_ops != &ocfs2_dinode_et_ops); | ||
145 | di->i_last_eb_blk = cpu_to_le64(blkno); | ||
146 | } | ||
147 | |||
148 | static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et) | ||
149 | { | ||
150 | struct ocfs2_dinode *di = et->et_object; | ||
151 | |||
152 | BUG_ON(et->et_ops != &ocfs2_dinode_et_ops); | ||
153 | return le64_to_cpu(di->i_last_eb_blk); | ||
154 | } | ||
155 | |||
156 | static void ocfs2_dinode_update_clusters(struct inode *inode, | ||
157 | struct ocfs2_extent_tree *et, | ||
158 | u32 clusters) | ||
159 | { | ||
160 | struct ocfs2_dinode *di = et->et_object; | ||
161 | |||
162 | le32_add_cpu(&di->i_clusters, clusters); | ||
163 | spin_lock(&OCFS2_I(inode)->ip_lock); | ||
164 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters); | ||
165 | spin_unlock(&OCFS2_I(inode)->ip_lock); | ||
166 | } | ||
167 | |||
168 | static int ocfs2_dinode_insert_check(struct inode *inode, | ||
169 | struct ocfs2_extent_tree *et, | ||
170 | struct ocfs2_extent_rec *rec) | ||
171 | { | ||
172 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
173 | |||
174 | BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL); | ||
175 | mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) && | ||
176 | (OCFS2_I(inode)->ip_clusters != rec->e_cpos), | ||
177 | "Device %s, asking for sparse allocation: inode %llu, " | ||
178 | "cpos %u, clusters %u\n", | ||
179 | osb->dev_str, | ||
180 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
181 | rec->e_cpos, | ||
182 | OCFS2_I(inode)->ip_clusters); | ||
183 | |||
184 | return 0; | ||
185 | } | ||
186 | |||
187 | static int ocfs2_dinode_sanity_check(struct inode *inode, | ||
188 | struct ocfs2_extent_tree *et) | ||
189 | { | ||
190 | int ret = 0; | ||
191 | struct ocfs2_dinode *di; | ||
192 | |||
193 | BUG_ON(et->et_ops != &ocfs2_dinode_et_ops); | ||
194 | |||
195 | di = et->et_object; | ||
196 | if (!OCFS2_IS_VALID_DINODE(di)) { | ||
197 | ret = -EIO; | ||
198 | ocfs2_error(inode->i_sb, | ||
199 | "Inode %llu has invalid path root", | ||
200 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
201 | } | ||
202 | |||
203 | return ret; | ||
204 | } | ||
205 | |||
206 | static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et) | ||
207 | { | ||
208 | struct ocfs2_dinode *di = et->et_object; | ||
209 | |||
210 | et->et_root_el = &di->id2.i_list; | ||
211 | } | ||
212 | |||
213 | |||
214 | static void ocfs2_xattr_value_fill_root_el(struct ocfs2_extent_tree *et) | ||
215 | { | ||
216 | struct ocfs2_xattr_value_root *xv = et->et_object; | ||
217 | |||
218 | et->et_root_el = &xv->xr_list; | ||
219 | } | ||
220 | |||
221 | static void ocfs2_xattr_value_set_last_eb_blk(struct ocfs2_extent_tree *et, | ||
222 | u64 blkno) | ||
223 | { | ||
224 | struct ocfs2_xattr_value_root *xv = | ||
225 | (struct ocfs2_xattr_value_root *)et->et_object; | ||
226 | |||
227 | xv->xr_last_eb_blk = cpu_to_le64(blkno); | ||
228 | } | ||
229 | |||
230 | static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et) | ||
231 | { | ||
232 | struct ocfs2_xattr_value_root *xv = | ||
233 | (struct ocfs2_xattr_value_root *) et->et_object; | ||
234 | |||
235 | return le64_to_cpu(xv->xr_last_eb_blk); | ||
236 | } | ||
237 | |||
238 | static void ocfs2_xattr_value_update_clusters(struct inode *inode, | ||
239 | struct ocfs2_extent_tree *et, | ||
240 | u32 clusters) | ||
241 | { | ||
242 | struct ocfs2_xattr_value_root *xv = | ||
243 | (struct ocfs2_xattr_value_root *)et->et_object; | ||
244 | |||
245 | le32_add_cpu(&xv->xr_clusters, clusters); | ||
246 | } | ||
247 | |||
248 | static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = { | ||
249 | .eo_set_last_eb_blk = ocfs2_xattr_value_set_last_eb_blk, | ||
250 | .eo_get_last_eb_blk = ocfs2_xattr_value_get_last_eb_blk, | ||
251 | .eo_update_clusters = ocfs2_xattr_value_update_clusters, | ||
252 | .eo_fill_root_el = ocfs2_xattr_value_fill_root_el, | ||
253 | }; | ||
254 | |||
255 | static void ocfs2_xattr_tree_fill_root_el(struct ocfs2_extent_tree *et) | ||
256 | { | ||
257 | struct ocfs2_xattr_block *xb = et->et_object; | ||
258 | |||
259 | et->et_root_el = &xb->xb_attrs.xb_root.xt_list; | ||
260 | } | ||
261 | |||
262 | static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct inode *inode, | ||
263 | struct ocfs2_extent_tree *et) | ||
264 | { | ||
265 | et->et_max_leaf_clusters = | ||
266 | ocfs2_clusters_for_bytes(inode->i_sb, | ||
267 | OCFS2_MAX_XATTR_TREE_LEAF_SIZE); | ||
268 | } | ||
269 | |||
270 | static void ocfs2_xattr_tree_set_last_eb_blk(struct ocfs2_extent_tree *et, | ||
271 | u64 blkno) | ||
272 | { | ||
273 | struct ocfs2_xattr_block *xb = et->et_object; | ||
274 | struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root; | ||
275 | |||
276 | xt->xt_last_eb_blk = cpu_to_le64(blkno); | ||
277 | } | ||
278 | |||
279 | static u64 ocfs2_xattr_tree_get_last_eb_blk(struct ocfs2_extent_tree *et) | ||
280 | { | ||
281 | struct ocfs2_xattr_block *xb = et->et_object; | ||
282 | struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root; | ||
283 | |||
284 | return le64_to_cpu(xt->xt_last_eb_blk); | ||
285 | } | ||
286 | |||
287 | static void ocfs2_xattr_tree_update_clusters(struct inode *inode, | ||
288 | struct ocfs2_extent_tree *et, | ||
289 | u32 clusters) | ||
290 | { | ||
291 | struct ocfs2_xattr_block *xb = et->et_object; | ||
292 | |||
293 | le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, clusters); | ||
294 | } | ||
295 | |||
296 | static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = { | ||
297 | .eo_set_last_eb_blk = ocfs2_xattr_tree_set_last_eb_blk, | ||
298 | .eo_get_last_eb_blk = ocfs2_xattr_tree_get_last_eb_blk, | ||
299 | .eo_update_clusters = ocfs2_xattr_tree_update_clusters, | ||
300 | .eo_fill_root_el = ocfs2_xattr_tree_fill_root_el, | ||
301 | .eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters, | ||
302 | }; | ||
303 | |||
304 | static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, | ||
305 | struct inode *inode, | ||
306 | struct buffer_head *bh, | ||
307 | void *obj, | ||
308 | struct ocfs2_extent_tree_operations *ops) | ||
309 | { | ||
310 | et->et_ops = ops; | ||
311 | et->et_root_bh = bh; | ||
312 | if (!obj) | ||
313 | obj = (void *)bh->b_data; | ||
314 | et->et_object = obj; | ||
315 | |||
316 | et->et_ops->eo_fill_root_el(et); | ||
317 | if (!et->et_ops->eo_fill_max_leaf_clusters) | ||
318 | et->et_max_leaf_clusters = 0; | ||
319 | else | ||
320 | et->et_ops->eo_fill_max_leaf_clusters(inode, et); | ||
321 | } | ||
322 | |||
323 | void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et, | ||
324 | struct inode *inode, | ||
325 | struct buffer_head *bh) | ||
326 | { | ||
327 | __ocfs2_init_extent_tree(et, inode, bh, NULL, &ocfs2_dinode_et_ops); | ||
328 | } | ||
329 | |||
330 | void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, | ||
331 | struct inode *inode, | ||
332 | struct buffer_head *bh) | ||
333 | { | ||
334 | __ocfs2_init_extent_tree(et, inode, bh, NULL, | ||
335 | &ocfs2_xattr_tree_et_ops); | ||
336 | } | ||
337 | |||
338 | void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, | ||
339 | struct inode *inode, | ||
340 | struct buffer_head *bh, | ||
341 | struct ocfs2_xattr_value_root *xv) | ||
342 | { | ||
343 | __ocfs2_init_extent_tree(et, inode, bh, xv, | ||
344 | &ocfs2_xattr_value_et_ops); | ||
345 | } | ||
346 | |||
347 | static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et, | ||
348 | u64 new_last_eb_blk) | ||
349 | { | ||
350 | et->et_ops->eo_set_last_eb_blk(et, new_last_eb_blk); | ||
351 | } | ||
352 | |||
353 | static inline u64 ocfs2_et_get_last_eb_blk(struct ocfs2_extent_tree *et) | ||
354 | { | ||
355 | return et->et_ops->eo_get_last_eb_blk(et); | ||
356 | } | ||
357 | |||
358 | static inline void ocfs2_et_update_clusters(struct inode *inode, | ||
359 | struct ocfs2_extent_tree *et, | ||
360 | u32 clusters) | ||
361 | { | ||
362 | et->et_ops->eo_update_clusters(inode, et, clusters); | ||
363 | } | ||
364 | |||
365 | static inline int ocfs2_et_insert_check(struct inode *inode, | ||
366 | struct ocfs2_extent_tree *et, | ||
367 | struct ocfs2_extent_rec *rec) | ||
368 | { | ||
369 | int ret = 0; | ||
370 | |||
371 | if (et->et_ops->eo_insert_check) | ||
372 | ret = et->et_ops->eo_insert_check(inode, et, rec); | ||
373 | return ret; | ||
374 | } | ||
375 | |||
376 | static inline int ocfs2_et_sanity_check(struct inode *inode, | ||
377 | struct ocfs2_extent_tree *et) | ||
378 | { | ||
379 | int ret = 0; | ||
380 | |||
381 | if (et->et_ops->eo_sanity_check) | ||
382 | ret = et->et_ops->eo_sanity_check(inode, et); | ||
383 | return ret; | ||
384 | } | ||
385 | |||
52 | static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); | 386 | static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); |
53 | static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, | 387 | static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, |
54 | struct ocfs2_extent_block *eb); | 388 | struct ocfs2_extent_block *eb); |
@@ -205,17 +539,6 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh, | |||
205 | } | 539 | } |
206 | 540 | ||
207 | /* | 541 | /* |
208 | * Allocate and initialize a new path based on a disk inode tree. | ||
209 | */ | ||
210 | static struct ocfs2_path *ocfs2_new_inode_path(struct buffer_head *di_bh) | ||
211 | { | ||
212 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
213 | struct ocfs2_extent_list *el = &di->id2.i_list; | ||
214 | |||
215 | return ocfs2_new_path(di_bh, el); | ||
216 | } | ||
217 | |||
218 | /* | ||
219 | * Convenience function to journal all components in a path. | 542 | * Convenience function to journal all components in a path. |
220 | */ | 543 | */ |
221 | static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle, | 544 | static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle, |
@@ -368,39 +691,35 @@ struct ocfs2_merge_ctxt { | |||
368 | */ | 691 | */ |
369 | int ocfs2_num_free_extents(struct ocfs2_super *osb, | 692 | int ocfs2_num_free_extents(struct ocfs2_super *osb, |
370 | struct inode *inode, | 693 | struct inode *inode, |
371 | struct ocfs2_dinode *fe) | 694 | struct ocfs2_extent_tree *et) |
372 | { | 695 | { |
373 | int retval; | 696 | int retval; |
374 | struct ocfs2_extent_list *el; | 697 | struct ocfs2_extent_list *el = NULL; |
375 | struct ocfs2_extent_block *eb; | 698 | struct ocfs2_extent_block *eb; |
376 | struct buffer_head *eb_bh = NULL; | 699 | struct buffer_head *eb_bh = NULL; |
700 | u64 last_eb_blk = 0; | ||
377 | 701 | ||
378 | mlog_entry_void(); | 702 | mlog_entry_void(); |
379 | 703 | ||
380 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 704 | el = et->et_root_el; |
381 | OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); | 705 | last_eb_blk = ocfs2_et_get_last_eb_blk(et); |
382 | retval = -EIO; | ||
383 | goto bail; | ||
384 | } | ||
385 | 706 | ||
386 | if (fe->i_last_eb_blk) { | 707 | if (last_eb_blk) { |
387 | retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), | 708 | retval = ocfs2_read_block(inode, last_eb_blk, |
388 | &eb_bh, OCFS2_BH_CACHED, inode); | 709 | &eb_bh); |
389 | if (retval < 0) { | 710 | if (retval < 0) { |
390 | mlog_errno(retval); | 711 | mlog_errno(retval); |
391 | goto bail; | 712 | goto bail; |
392 | } | 713 | } |
393 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | 714 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; |
394 | el = &eb->h_list; | 715 | el = &eb->h_list; |
395 | } else | 716 | } |
396 | el = &fe->id2.i_list; | ||
397 | 717 | ||
398 | BUG_ON(el->l_tree_depth != 0); | 718 | BUG_ON(el->l_tree_depth != 0); |
399 | 719 | ||
400 | retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec); | 720 | retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec); |
401 | bail: | 721 | bail: |
402 | if (eb_bh) | 722 | brelse(eb_bh); |
403 | brelse(eb_bh); | ||
404 | 723 | ||
405 | mlog_exit(retval); | 724 | mlog_exit(retval); |
406 | return retval; | 725 | return retval; |
@@ -486,8 +805,7 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, | |||
486 | bail: | 805 | bail: |
487 | if (status < 0) { | 806 | if (status < 0) { |
488 | for(i = 0; i < wanted; i++) { | 807 | for(i = 0; i < wanted; i++) { |
489 | if (bhs[i]) | 808 | brelse(bhs[i]); |
490 | brelse(bhs[i]); | ||
491 | bhs[i] = NULL; | 809 | bhs[i] = NULL; |
492 | } | 810 | } |
493 | } | 811 | } |
@@ -531,7 +849,7 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el) | |||
531 | static int ocfs2_add_branch(struct ocfs2_super *osb, | 849 | static int ocfs2_add_branch(struct ocfs2_super *osb, |
532 | handle_t *handle, | 850 | handle_t *handle, |
533 | struct inode *inode, | 851 | struct inode *inode, |
534 | struct buffer_head *fe_bh, | 852 | struct ocfs2_extent_tree *et, |
535 | struct buffer_head *eb_bh, | 853 | struct buffer_head *eb_bh, |
536 | struct buffer_head **last_eb_bh, | 854 | struct buffer_head **last_eb_bh, |
537 | struct ocfs2_alloc_context *meta_ac) | 855 | struct ocfs2_alloc_context *meta_ac) |
@@ -540,7 +858,6 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
540 | u64 next_blkno, new_last_eb_blk; | 858 | u64 next_blkno, new_last_eb_blk; |
541 | struct buffer_head *bh; | 859 | struct buffer_head *bh; |
542 | struct buffer_head **new_eb_bhs = NULL; | 860 | struct buffer_head **new_eb_bhs = NULL; |
543 | struct ocfs2_dinode *fe; | ||
544 | struct ocfs2_extent_block *eb; | 861 | struct ocfs2_extent_block *eb; |
545 | struct ocfs2_extent_list *eb_el; | 862 | struct ocfs2_extent_list *eb_el; |
546 | struct ocfs2_extent_list *el; | 863 | struct ocfs2_extent_list *el; |
@@ -550,13 +867,11 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
550 | 867 | ||
551 | BUG_ON(!last_eb_bh || !*last_eb_bh); | 868 | BUG_ON(!last_eb_bh || !*last_eb_bh); |
552 | 869 | ||
553 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | ||
554 | |||
555 | if (eb_bh) { | 870 | if (eb_bh) { |
556 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | 871 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; |
557 | el = &eb->h_list; | 872 | el = &eb->h_list; |
558 | } else | 873 | } else |
559 | el = &fe->id2.i_list; | 874 | el = et->et_root_el; |
560 | 875 | ||
561 | /* we never add a branch to a leaf. */ | 876 | /* we never add a branch to a leaf. */ |
562 | BUG_ON(!el->l_tree_depth); | 877 | BUG_ON(!el->l_tree_depth); |
@@ -646,7 +961,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
646 | mlog_errno(status); | 961 | mlog_errno(status); |
647 | goto bail; | 962 | goto bail; |
648 | } | 963 | } |
649 | status = ocfs2_journal_access(handle, inode, fe_bh, | 964 | status = ocfs2_journal_access(handle, inode, et->et_root_bh, |
650 | OCFS2_JOURNAL_ACCESS_WRITE); | 965 | OCFS2_JOURNAL_ACCESS_WRITE); |
651 | if (status < 0) { | 966 | if (status < 0) { |
652 | mlog_errno(status); | 967 | mlog_errno(status); |
@@ -662,7 +977,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
662 | } | 977 | } |
663 | 978 | ||
664 | /* Link the new branch into the rest of the tree (el will | 979 | /* Link the new branch into the rest of the tree (el will |
665 | * either be on the fe, or the extent block passed in. */ | 980 | * either be on the root_bh, or the extent block passed in. */ |
666 | i = le16_to_cpu(el->l_next_free_rec); | 981 | i = le16_to_cpu(el->l_next_free_rec); |
667 | el->l_recs[i].e_blkno = cpu_to_le64(next_blkno); | 982 | el->l_recs[i].e_blkno = cpu_to_le64(next_blkno); |
668 | el->l_recs[i].e_cpos = cpu_to_le32(new_cpos); | 983 | el->l_recs[i].e_cpos = cpu_to_le32(new_cpos); |
@@ -671,7 +986,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
671 | 986 | ||
672 | /* fe needs a new last extent block pointer, as does the | 987 | /* fe needs a new last extent block pointer, as does the |
673 | * next_leaf on the previously last-extent-block. */ | 988 | * next_leaf on the previously last-extent-block. */ |
674 | fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk); | 989 | ocfs2_et_set_last_eb_blk(et, new_last_eb_blk); |
675 | 990 | ||
676 | eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; | 991 | eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; |
677 | eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); | 992 | eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); |
@@ -679,7 +994,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
679 | status = ocfs2_journal_dirty(handle, *last_eb_bh); | 994 | status = ocfs2_journal_dirty(handle, *last_eb_bh); |
680 | if (status < 0) | 995 | if (status < 0) |
681 | mlog_errno(status); | 996 | mlog_errno(status); |
682 | status = ocfs2_journal_dirty(handle, fe_bh); | 997 | status = ocfs2_journal_dirty(handle, et->et_root_bh); |
683 | if (status < 0) | 998 | if (status < 0) |
684 | mlog_errno(status); | 999 | mlog_errno(status); |
685 | if (eb_bh) { | 1000 | if (eb_bh) { |
@@ -700,8 +1015,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
700 | bail: | 1015 | bail: |
701 | if (new_eb_bhs) { | 1016 | if (new_eb_bhs) { |
702 | for (i = 0; i < new_blocks; i++) | 1017 | for (i = 0; i < new_blocks; i++) |
703 | if (new_eb_bhs[i]) | 1018 | brelse(new_eb_bhs[i]); |
704 | brelse(new_eb_bhs[i]); | ||
705 | kfree(new_eb_bhs); | 1019 | kfree(new_eb_bhs); |
706 | } | 1020 | } |
707 | 1021 | ||
@@ -717,16 +1031,15 @@ bail: | |||
717 | static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | 1031 | static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, |
718 | handle_t *handle, | 1032 | handle_t *handle, |
719 | struct inode *inode, | 1033 | struct inode *inode, |
720 | struct buffer_head *fe_bh, | 1034 | struct ocfs2_extent_tree *et, |
721 | struct ocfs2_alloc_context *meta_ac, | 1035 | struct ocfs2_alloc_context *meta_ac, |
722 | struct buffer_head **ret_new_eb_bh) | 1036 | struct buffer_head **ret_new_eb_bh) |
723 | { | 1037 | { |
724 | int status, i; | 1038 | int status, i; |
725 | u32 new_clusters; | 1039 | u32 new_clusters; |
726 | struct buffer_head *new_eb_bh = NULL; | 1040 | struct buffer_head *new_eb_bh = NULL; |
727 | struct ocfs2_dinode *fe; | ||
728 | struct ocfs2_extent_block *eb; | 1041 | struct ocfs2_extent_block *eb; |
729 | struct ocfs2_extent_list *fe_el; | 1042 | struct ocfs2_extent_list *root_el; |
730 | struct ocfs2_extent_list *eb_el; | 1043 | struct ocfs2_extent_list *eb_el; |
731 | 1044 | ||
732 | mlog_entry_void(); | 1045 | mlog_entry_void(); |
@@ -746,8 +1059,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | |||
746 | } | 1059 | } |
747 | 1060 | ||
748 | eb_el = &eb->h_list; | 1061 | eb_el = &eb->h_list; |
749 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 1062 | root_el = et->et_root_el; |
750 | fe_el = &fe->id2.i_list; | ||
751 | 1063 | ||
752 | status = ocfs2_journal_access(handle, inode, new_eb_bh, | 1064 | status = ocfs2_journal_access(handle, inode, new_eb_bh, |
753 | OCFS2_JOURNAL_ACCESS_CREATE); | 1065 | OCFS2_JOURNAL_ACCESS_CREATE); |
@@ -756,11 +1068,11 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | |||
756 | goto bail; | 1068 | goto bail; |
757 | } | 1069 | } |
758 | 1070 | ||
759 | /* copy the fe data into the new extent block */ | 1071 | /* copy the root extent list data into the new extent block */ |
760 | eb_el->l_tree_depth = fe_el->l_tree_depth; | 1072 | eb_el->l_tree_depth = root_el->l_tree_depth; |
761 | eb_el->l_next_free_rec = fe_el->l_next_free_rec; | 1073 | eb_el->l_next_free_rec = root_el->l_next_free_rec; |
762 | for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++) | 1074 | for (i = 0; i < le16_to_cpu(root_el->l_next_free_rec); i++) |
763 | eb_el->l_recs[i] = fe_el->l_recs[i]; | 1075 | eb_el->l_recs[i] = root_el->l_recs[i]; |
764 | 1076 | ||
765 | status = ocfs2_journal_dirty(handle, new_eb_bh); | 1077 | status = ocfs2_journal_dirty(handle, new_eb_bh); |
766 | if (status < 0) { | 1078 | if (status < 0) { |
@@ -768,7 +1080,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | |||
768 | goto bail; | 1080 | goto bail; |
769 | } | 1081 | } |
770 | 1082 | ||
771 | status = ocfs2_journal_access(handle, inode, fe_bh, | 1083 | status = ocfs2_journal_access(handle, inode, et->et_root_bh, |
772 | OCFS2_JOURNAL_ACCESS_WRITE); | 1084 | OCFS2_JOURNAL_ACCESS_WRITE); |
773 | if (status < 0) { | 1085 | if (status < 0) { |
774 | mlog_errno(status); | 1086 | mlog_errno(status); |
@@ -777,21 +1089,21 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | |||
777 | 1089 | ||
778 | new_clusters = ocfs2_sum_rightmost_rec(eb_el); | 1090 | new_clusters = ocfs2_sum_rightmost_rec(eb_el); |
779 | 1091 | ||
780 | /* update fe now */ | 1092 | /* update root_bh now */ |
781 | le16_add_cpu(&fe_el->l_tree_depth, 1); | 1093 | le16_add_cpu(&root_el->l_tree_depth, 1); |
782 | fe_el->l_recs[0].e_cpos = 0; | 1094 | root_el->l_recs[0].e_cpos = 0; |
783 | fe_el->l_recs[0].e_blkno = eb->h_blkno; | 1095 | root_el->l_recs[0].e_blkno = eb->h_blkno; |
784 | fe_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters); | 1096 | root_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters); |
785 | for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++) | 1097 | for (i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++) |
786 | memset(&fe_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec)); | 1098 | memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec)); |
787 | fe_el->l_next_free_rec = cpu_to_le16(1); | 1099 | root_el->l_next_free_rec = cpu_to_le16(1); |
788 | 1100 | ||
789 | /* If this is our 1st tree depth shift, then last_eb_blk | 1101 | /* If this is our 1st tree depth shift, then last_eb_blk |
790 | * becomes the allocated extent block */ | 1102 | * becomes the allocated extent block */ |
791 | if (fe_el->l_tree_depth == cpu_to_le16(1)) | 1103 | if (root_el->l_tree_depth == cpu_to_le16(1)) |
792 | fe->i_last_eb_blk = eb->h_blkno; | 1104 | ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); |
793 | 1105 | ||
794 | status = ocfs2_journal_dirty(handle, fe_bh); | 1106 | status = ocfs2_journal_dirty(handle, et->et_root_bh); |
795 | if (status < 0) { | 1107 | if (status < 0) { |
796 | mlog_errno(status); | 1108 | mlog_errno(status); |
797 | goto bail; | 1109 | goto bail; |
@@ -801,8 +1113,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | |||
801 | new_eb_bh = NULL; | 1113 | new_eb_bh = NULL; |
802 | status = 0; | 1114 | status = 0; |
803 | bail: | 1115 | bail: |
804 | if (new_eb_bh) | 1116 | brelse(new_eb_bh); |
805 | brelse(new_eb_bh); | ||
806 | 1117 | ||
807 | mlog_exit(status); | 1118 | mlog_exit(status); |
808 | return status; | 1119 | return status; |
@@ -817,22 +1128,21 @@ bail: | |||
817 | * 1) a lowest extent block is found, then we pass it back in | 1128 | * 1) a lowest extent block is found, then we pass it back in |
818 | * *lowest_eb_bh and return '0' | 1129 | * *lowest_eb_bh and return '0' |
819 | * | 1130 | * |
820 | * 2) the search fails to find anything, but the dinode has room. We | 1131 | * 2) the search fails to find anything, but the root_el has room. We |
821 | * pass NULL back in *lowest_eb_bh, but still return '0' | 1132 | * pass NULL back in *lowest_eb_bh, but still return '0' |
822 | * | 1133 | * |
823 | * 3) the search fails to find anything AND the dinode is full, in | 1134 | * 3) the search fails to find anything AND the root_el is full, in |
824 | * which case we return > 0 | 1135 | * which case we return > 0 |
825 | * | 1136 | * |
826 | * return status < 0 indicates an error. | 1137 | * return status < 0 indicates an error. |
827 | */ | 1138 | */ |
828 | static int ocfs2_find_branch_target(struct ocfs2_super *osb, | 1139 | static int ocfs2_find_branch_target(struct ocfs2_super *osb, |
829 | struct inode *inode, | 1140 | struct inode *inode, |
830 | struct buffer_head *fe_bh, | 1141 | struct ocfs2_extent_tree *et, |
831 | struct buffer_head **target_bh) | 1142 | struct buffer_head **target_bh) |
832 | { | 1143 | { |
833 | int status = 0, i; | 1144 | int status = 0, i; |
834 | u64 blkno; | 1145 | u64 blkno; |
835 | struct ocfs2_dinode *fe; | ||
836 | struct ocfs2_extent_block *eb; | 1146 | struct ocfs2_extent_block *eb; |
837 | struct ocfs2_extent_list *el; | 1147 | struct ocfs2_extent_list *el; |
838 | struct buffer_head *bh = NULL; | 1148 | struct buffer_head *bh = NULL; |
@@ -842,8 +1152,7 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb, | |||
842 | 1152 | ||
843 | *target_bh = NULL; | 1153 | *target_bh = NULL; |
844 | 1154 | ||
845 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 1155 | el = et->et_root_el; |
846 | el = &fe->id2.i_list; | ||
847 | 1156 | ||
848 | while(le16_to_cpu(el->l_tree_depth) > 1) { | 1157 | while(le16_to_cpu(el->l_tree_depth) > 1) { |
849 | if (le16_to_cpu(el->l_next_free_rec) == 0) { | 1158 | if (le16_to_cpu(el->l_next_free_rec) == 0) { |
@@ -864,13 +1173,10 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb, | |||
864 | goto bail; | 1173 | goto bail; |
865 | } | 1174 | } |
866 | 1175 | ||
867 | if (bh) { | 1176 | brelse(bh); |
868 | brelse(bh); | 1177 | bh = NULL; |
869 | bh = NULL; | ||
870 | } | ||
871 | 1178 | ||
872 | status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED, | 1179 | status = ocfs2_read_block(inode, blkno, &bh); |
873 | inode); | ||
874 | if (status < 0) { | 1180 | if (status < 0) { |
875 | mlog_errno(status); | 1181 | mlog_errno(status); |
876 | goto bail; | 1182 | goto bail; |
@@ -886,8 +1192,7 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb, | |||
886 | 1192 | ||
887 | if (le16_to_cpu(el->l_next_free_rec) < | 1193 | if (le16_to_cpu(el->l_next_free_rec) < |
888 | le16_to_cpu(el->l_count)) { | 1194 | le16_to_cpu(el->l_count)) { |
889 | if (lowest_bh) | 1195 | brelse(lowest_bh); |
890 | brelse(lowest_bh); | ||
891 | lowest_bh = bh; | 1196 | lowest_bh = bh; |
892 | get_bh(lowest_bh); | 1197 | get_bh(lowest_bh); |
893 | } | 1198 | } |
@@ -895,14 +1200,13 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb, | |||
895 | 1200 | ||
896 | /* If we didn't find one and the fe doesn't have any room, | 1201 | /* If we didn't find one and the fe doesn't have any room, |
897 | * then return '1' */ | 1202 | * then return '1' */ |
898 | if (!lowest_bh | 1203 | el = et->et_root_el; |
899 | && (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count)) | 1204 | if (!lowest_bh && (el->l_next_free_rec == el->l_count)) |
900 | status = 1; | 1205 | status = 1; |
901 | 1206 | ||
902 | *target_bh = lowest_bh; | 1207 | *target_bh = lowest_bh; |
903 | bail: | 1208 | bail: |
904 | if (bh) | 1209 | brelse(bh); |
905 | brelse(bh); | ||
906 | 1210 | ||
907 | mlog_exit(status); | 1211 | mlog_exit(status); |
908 | return status; | 1212 | return status; |
@@ -919,19 +1223,19 @@ bail: | |||
919 | * *last_eb_bh will be updated by ocfs2_add_branch(). | 1223 | * *last_eb_bh will be updated by ocfs2_add_branch(). |
920 | */ | 1224 | */ |
921 | static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, | 1225 | static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, |
922 | struct buffer_head *di_bh, int *final_depth, | 1226 | struct ocfs2_extent_tree *et, int *final_depth, |
923 | struct buffer_head **last_eb_bh, | 1227 | struct buffer_head **last_eb_bh, |
924 | struct ocfs2_alloc_context *meta_ac) | 1228 | struct ocfs2_alloc_context *meta_ac) |
925 | { | 1229 | { |
926 | int ret, shift; | 1230 | int ret, shift; |
927 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 1231 | struct ocfs2_extent_list *el = et->et_root_el; |
928 | int depth = le16_to_cpu(di->id2.i_list.l_tree_depth); | 1232 | int depth = le16_to_cpu(el->l_tree_depth); |
929 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1233 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
930 | struct buffer_head *bh = NULL; | 1234 | struct buffer_head *bh = NULL; |
931 | 1235 | ||
932 | BUG_ON(meta_ac == NULL); | 1236 | BUG_ON(meta_ac == NULL); |
933 | 1237 | ||
934 | shift = ocfs2_find_branch_target(osb, inode, di_bh, &bh); | 1238 | shift = ocfs2_find_branch_target(osb, inode, et, &bh); |
935 | if (shift < 0) { | 1239 | if (shift < 0) { |
936 | ret = shift; | 1240 | ret = shift; |
937 | mlog_errno(ret); | 1241 | mlog_errno(ret); |
@@ -948,7 +1252,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, | |||
948 | /* ocfs2_shift_tree_depth will return us a buffer with | 1252 | /* ocfs2_shift_tree_depth will return us a buffer with |
949 | * the new extent block (so we can pass that to | 1253 | * the new extent block (so we can pass that to |
950 | * ocfs2_add_branch). */ | 1254 | * ocfs2_add_branch). */ |
951 | ret = ocfs2_shift_tree_depth(osb, handle, inode, di_bh, | 1255 | ret = ocfs2_shift_tree_depth(osb, handle, inode, et, |
952 | meta_ac, &bh); | 1256 | meta_ac, &bh); |
953 | if (ret < 0) { | 1257 | if (ret < 0) { |
954 | mlog_errno(ret); | 1258 | mlog_errno(ret); |
@@ -975,7 +1279,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, | |||
975 | /* call ocfs2_add_branch to add the final part of the tree with | 1279 | /* call ocfs2_add_branch to add the final part of the tree with |
976 | * the new data. */ | 1280 | * the new data. */ |
977 | mlog(0, "add branch. bh = %p\n", bh); | 1281 | mlog(0, "add branch. bh = %p\n", bh); |
978 | ret = ocfs2_add_branch(osb, handle, inode, di_bh, bh, last_eb_bh, | 1282 | ret = ocfs2_add_branch(osb, handle, inode, et, bh, last_eb_bh, |
979 | meta_ac); | 1283 | meta_ac); |
980 | if (ret < 0) { | 1284 | if (ret < 0) { |
981 | mlog_errno(ret); | 1285 | mlog_errno(ret); |
@@ -990,15 +1294,6 @@ out: | |||
990 | } | 1294 | } |
991 | 1295 | ||
992 | /* | 1296 | /* |
993 | * This is only valid for leaf nodes, which are the only ones that can | ||
994 | * have empty extents anyway. | ||
995 | */ | ||
996 | static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) | ||
997 | { | ||
998 | return !rec->e_leaf_clusters; | ||
999 | } | ||
1000 | |||
1001 | /* | ||
1002 | * This function will discard the rightmost extent record. | 1297 | * This function will discard the rightmost extent record. |
1003 | */ | 1298 | */ |
1004 | static void ocfs2_shift_records_right(struct ocfs2_extent_list *el) | 1299 | static void ocfs2_shift_records_right(struct ocfs2_extent_list *el) |
@@ -1245,8 +1540,7 @@ static int __ocfs2_find_path(struct inode *inode, | |||
1245 | 1540 | ||
1246 | brelse(bh); | 1541 | brelse(bh); |
1247 | bh = NULL; | 1542 | bh = NULL; |
1248 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno, | 1543 | ret = ocfs2_read_block(inode, blkno, &bh); |
1249 | &bh, OCFS2_BH_CACHED, inode); | ||
1250 | if (ret) { | 1544 | if (ret) { |
1251 | mlog_errno(ret); | 1545 | mlog_errno(ret); |
1252 | goto out; | 1546 | goto out; |
@@ -2067,11 +2361,11 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
2067 | struct ocfs2_path *right_path, | 2361 | struct ocfs2_path *right_path, |
2068 | int subtree_index, | 2362 | int subtree_index, |
2069 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 2363 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
2070 | int *deleted) | 2364 | int *deleted, |
2365 | struct ocfs2_extent_tree *et) | ||
2071 | { | 2366 | { |
2072 | int ret, i, del_right_subtree = 0, right_has_empty = 0; | 2367 | int ret, i, del_right_subtree = 0, right_has_empty = 0; |
2073 | struct buffer_head *root_bh, *di_bh = path_root_bh(right_path); | 2368 | struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path); |
2074 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
2075 | struct ocfs2_extent_list *right_leaf_el, *left_leaf_el; | 2369 | struct ocfs2_extent_list *right_leaf_el, *left_leaf_el; |
2076 | struct ocfs2_extent_block *eb; | 2370 | struct ocfs2_extent_block *eb; |
2077 | 2371 | ||
@@ -2123,7 +2417,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
2123 | * We have to update i_last_eb_blk during the meta | 2417 | * We have to update i_last_eb_blk during the meta |
2124 | * data delete. | 2418 | * data delete. |
2125 | */ | 2419 | */ |
2126 | ret = ocfs2_journal_access(handle, inode, di_bh, | 2420 | ret = ocfs2_journal_access(handle, inode, et_root_bh, |
2127 | OCFS2_JOURNAL_ACCESS_WRITE); | 2421 | OCFS2_JOURNAL_ACCESS_WRITE); |
2128 | if (ret) { | 2422 | if (ret) { |
2129 | mlog_errno(ret); | 2423 | mlog_errno(ret); |
@@ -2198,7 +2492,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
2198 | ocfs2_update_edge_lengths(inode, handle, left_path); | 2492 | ocfs2_update_edge_lengths(inode, handle, left_path); |
2199 | 2493 | ||
2200 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; | 2494 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; |
2201 | di->i_last_eb_blk = eb->h_blkno; | 2495 | ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); |
2202 | 2496 | ||
2203 | /* | 2497 | /* |
2204 | * Removal of the extent in the left leaf was skipped | 2498 | * Removal of the extent in the left leaf was skipped |
@@ -2208,7 +2502,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
2208 | if (right_has_empty) | 2502 | if (right_has_empty) |
2209 | ocfs2_remove_empty_extent(left_leaf_el); | 2503 | ocfs2_remove_empty_extent(left_leaf_el); |
2210 | 2504 | ||
2211 | ret = ocfs2_journal_dirty(handle, di_bh); | 2505 | ret = ocfs2_journal_dirty(handle, et_root_bh); |
2212 | if (ret) | 2506 | if (ret) |
2213 | mlog_errno(ret); | 2507 | mlog_errno(ret); |
2214 | 2508 | ||
@@ -2331,7 +2625,8 @@ static int __ocfs2_rotate_tree_left(struct inode *inode, | |||
2331 | handle_t *handle, int orig_credits, | 2625 | handle_t *handle, int orig_credits, |
2332 | struct ocfs2_path *path, | 2626 | struct ocfs2_path *path, |
2333 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 2627 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
2334 | struct ocfs2_path **empty_extent_path) | 2628 | struct ocfs2_path **empty_extent_path, |
2629 | struct ocfs2_extent_tree *et) | ||
2335 | { | 2630 | { |
2336 | int ret, subtree_root, deleted; | 2631 | int ret, subtree_root, deleted; |
2337 | u32 right_cpos; | 2632 | u32 right_cpos; |
@@ -2404,7 +2699,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode, | |||
2404 | 2699 | ||
2405 | ret = ocfs2_rotate_subtree_left(inode, handle, left_path, | 2700 | ret = ocfs2_rotate_subtree_left(inode, handle, left_path, |
2406 | right_path, subtree_root, | 2701 | right_path, subtree_root, |
2407 | dealloc, &deleted); | 2702 | dealloc, &deleted, et); |
2408 | if (ret == -EAGAIN) { | 2703 | if (ret == -EAGAIN) { |
2409 | /* | 2704 | /* |
2410 | * The rotation has to temporarily stop due to | 2705 | * The rotation has to temporarily stop due to |
@@ -2447,29 +2742,20 @@ out: | |||
2447 | } | 2742 | } |
2448 | 2743 | ||
2449 | static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, | 2744 | static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, |
2450 | struct ocfs2_path *path, | 2745 | struct ocfs2_path *path, |
2451 | struct ocfs2_cached_dealloc_ctxt *dealloc) | 2746 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
2747 | struct ocfs2_extent_tree *et) | ||
2452 | { | 2748 | { |
2453 | int ret, subtree_index; | 2749 | int ret, subtree_index; |
2454 | u32 cpos; | 2750 | u32 cpos; |
2455 | struct ocfs2_path *left_path = NULL; | 2751 | struct ocfs2_path *left_path = NULL; |
2456 | struct ocfs2_dinode *di; | ||
2457 | struct ocfs2_extent_block *eb; | 2752 | struct ocfs2_extent_block *eb; |
2458 | struct ocfs2_extent_list *el; | 2753 | struct ocfs2_extent_list *el; |
2459 | 2754 | ||
2460 | /* | ||
2461 | * XXX: This code assumes that the root is an inode, which is | ||
2462 | * true for now but may change as tree code gets generic. | ||
2463 | */ | ||
2464 | di = (struct ocfs2_dinode *)path_root_bh(path)->b_data; | ||
2465 | if (!OCFS2_IS_VALID_DINODE(di)) { | ||
2466 | ret = -EIO; | ||
2467 | ocfs2_error(inode->i_sb, | ||
2468 | "Inode %llu has invalid path root", | ||
2469 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
2470 | goto out; | ||
2471 | } | ||
2472 | 2755 | ||
2756 | ret = ocfs2_et_sanity_check(inode, et); | ||
2757 | if (ret) | ||
2758 | goto out; | ||
2473 | /* | 2759 | /* |
2474 | * There's two ways we handle this depending on | 2760 | * There's two ways we handle this depending on |
2475 | * whether path is the only existing one. | 2761 | * whether path is the only existing one. |
@@ -2526,7 +2812,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, | |||
2526 | ocfs2_update_edge_lengths(inode, handle, left_path); | 2812 | ocfs2_update_edge_lengths(inode, handle, left_path); |
2527 | 2813 | ||
2528 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; | 2814 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; |
2529 | di->i_last_eb_blk = eb->h_blkno; | 2815 | ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); |
2530 | } else { | 2816 | } else { |
2531 | /* | 2817 | /* |
2532 | * 'path' is also the leftmost path which | 2818 | * 'path' is also the leftmost path which |
@@ -2537,12 +2823,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, | |||
2537 | */ | 2823 | */ |
2538 | ocfs2_unlink_path(inode, handle, dealloc, path, 1); | 2824 | ocfs2_unlink_path(inode, handle, dealloc, path, 1); |
2539 | 2825 | ||
2540 | el = &di->id2.i_list; | 2826 | el = et->et_root_el; |
2541 | el->l_tree_depth = 0; | 2827 | el->l_tree_depth = 0; |
2542 | el->l_next_free_rec = 0; | 2828 | el->l_next_free_rec = 0; |
2543 | memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); | 2829 | memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); |
2544 | 2830 | ||
2545 | di->i_last_eb_blk = 0; | 2831 | ocfs2_et_set_last_eb_blk(et, 0); |
2546 | } | 2832 | } |
2547 | 2833 | ||
2548 | ocfs2_journal_dirty(handle, path_root_bh(path)); | 2834 | ocfs2_journal_dirty(handle, path_root_bh(path)); |
@@ -2570,7 +2856,8 @@ out: | |||
2570 | */ | 2856 | */ |
2571 | static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle, | 2857 | static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle, |
2572 | struct ocfs2_path *path, | 2858 | struct ocfs2_path *path, |
2573 | struct ocfs2_cached_dealloc_ctxt *dealloc) | 2859 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
2860 | struct ocfs2_extent_tree *et) | ||
2574 | { | 2861 | { |
2575 | int ret, orig_credits = handle->h_buffer_credits; | 2862 | int ret, orig_credits = handle->h_buffer_credits; |
2576 | struct ocfs2_path *tmp_path = NULL, *restart_path = NULL; | 2863 | struct ocfs2_path *tmp_path = NULL, *restart_path = NULL; |
@@ -2584,7 +2871,7 @@ static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle, | |||
2584 | if (path->p_tree_depth == 0) { | 2871 | if (path->p_tree_depth == 0) { |
2585 | rightmost_no_delete: | 2872 | rightmost_no_delete: |
2586 | /* | 2873 | /* |
2587 | * In-inode extents. This is trivially handled, so do | 2874 | * Inline extents. This is trivially handled, so do |
2588 | * it up front. | 2875 | * it up front. |
2589 | */ | 2876 | */ |
2590 | ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, | 2877 | ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, |
@@ -2638,7 +2925,7 @@ rightmost_no_delete: | |||
2638 | */ | 2925 | */ |
2639 | 2926 | ||
2640 | ret = ocfs2_remove_rightmost_path(inode, handle, path, | 2927 | ret = ocfs2_remove_rightmost_path(inode, handle, path, |
2641 | dealloc); | 2928 | dealloc, et); |
2642 | if (ret) | 2929 | if (ret) |
2643 | mlog_errno(ret); | 2930 | mlog_errno(ret); |
2644 | goto out; | 2931 | goto out; |
@@ -2650,7 +2937,7 @@ rightmost_no_delete: | |||
2650 | */ | 2937 | */ |
2651 | try_rotate: | 2938 | try_rotate: |
2652 | ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path, | 2939 | ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path, |
2653 | dealloc, &restart_path); | 2940 | dealloc, &restart_path, et); |
2654 | if (ret && ret != -EAGAIN) { | 2941 | if (ret && ret != -EAGAIN) { |
2655 | mlog_errno(ret); | 2942 | mlog_errno(ret); |
2656 | goto out; | 2943 | goto out; |
@@ -2662,7 +2949,7 @@ try_rotate: | |||
2662 | 2949 | ||
2663 | ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, | 2950 | ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, |
2664 | tmp_path, dealloc, | 2951 | tmp_path, dealloc, |
2665 | &restart_path); | 2952 | &restart_path, et); |
2666 | if (ret && ret != -EAGAIN) { | 2953 | if (ret && ret != -EAGAIN) { |
2667 | mlog_errno(ret); | 2954 | mlog_errno(ret); |
2668 | goto out; | 2955 | goto out; |
@@ -2948,6 +3235,7 @@ static int ocfs2_merge_rec_left(struct inode *inode, | |||
2948 | handle_t *handle, | 3235 | handle_t *handle, |
2949 | struct ocfs2_extent_rec *split_rec, | 3236 | struct ocfs2_extent_rec *split_rec, |
2950 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 3237 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
3238 | struct ocfs2_extent_tree *et, | ||
2951 | int index) | 3239 | int index) |
2952 | { | 3240 | { |
2953 | int ret, i, subtree_index = 0, has_empty_extent = 0; | 3241 | int ret, i, subtree_index = 0, has_empty_extent = 0; |
@@ -3068,7 +3356,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, | |||
3068 | le16_to_cpu(el->l_next_free_rec) == 1) { | 3356 | le16_to_cpu(el->l_next_free_rec) == 1) { |
3069 | 3357 | ||
3070 | ret = ocfs2_remove_rightmost_path(inode, handle, | 3358 | ret = ocfs2_remove_rightmost_path(inode, handle, |
3071 | right_path, dealloc); | 3359 | right_path, |
3360 | dealloc, et); | ||
3072 | if (ret) { | 3361 | if (ret) { |
3073 | mlog_errno(ret); | 3362 | mlog_errno(ret); |
3074 | goto out; | 3363 | goto out; |
@@ -3095,7 +3384,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3095 | int split_index, | 3384 | int split_index, |
3096 | struct ocfs2_extent_rec *split_rec, | 3385 | struct ocfs2_extent_rec *split_rec, |
3097 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 3386 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
3098 | struct ocfs2_merge_ctxt *ctxt) | 3387 | struct ocfs2_merge_ctxt *ctxt, |
3388 | struct ocfs2_extent_tree *et) | ||
3099 | 3389 | ||
3100 | { | 3390 | { |
3101 | int ret = 0; | 3391 | int ret = 0; |
@@ -3113,7 +3403,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3113 | * illegal. | 3403 | * illegal. |
3114 | */ | 3404 | */ |
3115 | ret = ocfs2_rotate_tree_left(inode, handle, path, | 3405 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
3116 | dealloc); | 3406 | dealloc, et); |
3117 | if (ret) { | 3407 | if (ret) { |
3118 | mlog_errno(ret); | 3408 | mlog_errno(ret); |
3119 | goto out; | 3409 | goto out; |
@@ -3156,7 +3446,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3156 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); | 3446 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); |
3157 | 3447 | ||
3158 | /* The merge left us with an empty extent, remove it. */ | 3448 | /* The merge left us with an empty extent, remove it. */ |
3159 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); | 3449 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
3450 | dealloc, et); | ||
3160 | if (ret) { | 3451 | if (ret) { |
3161 | mlog_errno(ret); | 3452 | mlog_errno(ret); |
3162 | goto out; | 3453 | goto out; |
@@ -3170,7 +3461,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3170 | */ | 3461 | */ |
3171 | ret = ocfs2_merge_rec_left(inode, path, | 3462 | ret = ocfs2_merge_rec_left(inode, path, |
3172 | handle, rec, | 3463 | handle, rec, |
3173 | dealloc, | 3464 | dealloc, et, |
3174 | split_index); | 3465 | split_index); |
3175 | 3466 | ||
3176 | if (ret) { | 3467 | if (ret) { |
@@ -3179,7 +3470,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3179 | } | 3470 | } |
3180 | 3471 | ||
3181 | ret = ocfs2_rotate_tree_left(inode, handle, path, | 3472 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
3182 | dealloc); | 3473 | dealloc, et); |
3183 | /* | 3474 | /* |
3184 | * Error from this last rotate is not critical, so | 3475 | * Error from this last rotate is not critical, so |
3185 | * print but don't bubble it up. | 3476 | * print but don't bubble it up. |
@@ -3199,7 +3490,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3199 | ret = ocfs2_merge_rec_left(inode, | 3490 | ret = ocfs2_merge_rec_left(inode, |
3200 | path, | 3491 | path, |
3201 | handle, split_rec, | 3492 | handle, split_rec, |
3202 | dealloc, | 3493 | dealloc, et, |
3203 | split_index); | 3494 | split_index); |
3204 | if (ret) { | 3495 | if (ret) { |
3205 | mlog_errno(ret); | 3496 | mlog_errno(ret); |
@@ -3222,7 +3513,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3222 | * our leaf. Try to rotate it away. | 3513 | * our leaf. Try to rotate it away. |
3223 | */ | 3514 | */ |
3224 | ret = ocfs2_rotate_tree_left(inode, handle, path, | 3515 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
3225 | dealloc); | 3516 | dealloc, et); |
3226 | if (ret) | 3517 | if (ret) |
3227 | mlog_errno(ret); | 3518 | mlog_errno(ret); |
3228 | ret = 0; | 3519 | ret = 0; |
@@ -3356,16 +3647,6 @@ rotate: | |||
3356 | ocfs2_rotate_leaf(el, insert_rec); | 3647 | ocfs2_rotate_leaf(el, insert_rec); |
3357 | } | 3648 | } |
3358 | 3649 | ||
3359 | static inline void ocfs2_update_dinode_clusters(struct inode *inode, | ||
3360 | struct ocfs2_dinode *di, | ||
3361 | u32 clusters) | ||
3362 | { | ||
3363 | le32_add_cpu(&di->i_clusters, clusters); | ||
3364 | spin_lock(&OCFS2_I(inode)->ip_lock); | ||
3365 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters); | ||
3366 | spin_unlock(&OCFS2_I(inode)->ip_lock); | ||
3367 | } | ||
3368 | |||
3369 | static void ocfs2_adjust_rightmost_records(struct inode *inode, | 3650 | static void ocfs2_adjust_rightmost_records(struct inode *inode, |
3370 | handle_t *handle, | 3651 | handle_t *handle, |
3371 | struct ocfs2_path *path, | 3652 | struct ocfs2_path *path, |
@@ -3567,8 +3848,8 @@ static void ocfs2_split_record(struct inode *inode, | |||
3567 | } | 3848 | } |
3568 | 3849 | ||
3569 | /* | 3850 | /* |
3570 | * This function only does inserts on an allocation b-tree. For dinode | 3851 | * This function only does inserts on an allocation b-tree. For tree |
3571 | * lists, ocfs2_insert_at_leaf() is called directly. | 3852 | * depth = 0, ocfs2_insert_at_leaf() is called directly. |
3572 | * | 3853 | * |
3573 | * right_path is the path we want to do the actual insert | 3854 | * right_path is the path we want to do the actual insert |
3574 | * in. left_path should only be passed in if we need to update that | 3855 | * in. left_path should only be passed in if we need to update that |
@@ -3665,7 +3946,7 @@ out: | |||
3665 | 3946 | ||
3666 | static int ocfs2_do_insert_extent(struct inode *inode, | 3947 | static int ocfs2_do_insert_extent(struct inode *inode, |
3667 | handle_t *handle, | 3948 | handle_t *handle, |
3668 | struct buffer_head *di_bh, | 3949 | struct ocfs2_extent_tree *et, |
3669 | struct ocfs2_extent_rec *insert_rec, | 3950 | struct ocfs2_extent_rec *insert_rec, |
3670 | struct ocfs2_insert_type *type) | 3951 | struct ocfs2_insert_type *type) |
3671 | { | 3952 | { |
@@ -3673,13 +3954,11 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
3673 | u32 cpos; | 3954 | u32 cpos; |
3674 | struct ocfs2_path *right_path = NULL; | 3955 | struct ocfs2_path *right_path = NULL; |
3675 | struct ocfs2_path *left_path = NULL; | 3956 | struct ocfs2_path *left_path = NULL; |
3676 | struct ocfs2_dinode *di; | ||
3677 | struct ocfs2_extent_list *el; | 3957 | struct ocfs2_extent_list *el; |
3678 | 3958 | ||
3679 | di = (struct ocfs2_dinode *) di_bh->b_data; | 3959 | el = et->et_root_el; |
3680 | el = &di->id2.i_list; | ||
3681 | 3960 | ||
3682 | ret = ocfs2_journal_access(handle, inode, di_bh, | 3961 | ret = ocfs2_journal_access(handle, inode, et->et_root_bh, |
3683 | OCFS2_JOURNAL_ACCESS_WRITE); | 3962 | OCFS2_JOURNAL_ACCESS_WRITE); |
3684 | if (ret) { | 3963 | if (ret) { |
3685 | mlog_errno(ret); | 3964 | mlog_errno(ret); |
@@ -3691,7 +3970,7 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
3691 | goto out_update_clusters; | 3970 | goto out_update_clusters; |
3692 | } | 3971 | } |
3693 | 3972 | ||
3694 | right_path = ocfs2_new_inode_path(di_bh); | 3973 | right_path = ocfs2_new_path(et->et_root_bh, et->et_root_el); |
3695 | if (!right_path) { | 3974 | if (!right_path) { |
3696 | ret = -ENOMEM; | 3975 | ret = -ENOMEM; |
3697 | mlog_errno(ret); | 3976 | mlog_errno(ret); |
@@ -3741,7 +4020,7 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
3741 | * ocfs2_rotate_tree_right() might have extended the | 4020 | * ocfs2_rotate_tree_right() might have extended the |
3742 | * transaction without re-journaling our tree root. | 4021 | * transaction without re-journaling our tree root. |
3743 | */ | 4022 | */ |
3744 | ret = ocfs2_journal_access(handle, inode, di_bh, | 4023 | ret = ocfs2_journal_access(handle, inode, et->et_root_bh, |
3745 | OCFS2_JOURNAL_ACCESS_WRITE); | 4024 | OCFS2_JOURNAL_ACCESS_WRITE); |
3746 | if (ret) { | 4025 | if (ret) { |
3747 | mlog_errno(ret); | 4026 | mlog_errno(ret); |
@@ -3766,10 +4045,10 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
3766 | 4045 | ||
3767 | out_update_clusters: | 4046 | out_update_clusters: |
3768 | if (type->ins_split == SPLIT_NONE) | 4047 | if (type->ins_split == SPLIT_NONE) |
3769 | ocfs2_update_dinode_clusters(inode, di, | 4048 | ocfs2_et_update_clusters(inode, et, |
3770 | le16_to_cpu(insert_rec->e_leaf_clusters)); | 4049 | le16_to_cpu(insert_rec->e_leaf_clusters)); |
3771 | 4050 | ||
3772 | ret = ocfs2_journal_dirty(handle, di_bh); | 4051 | ret = ocfs2_journal_dirty(handle, et->et_root_bh); |
3773 | if (ret) | 4052 | if (ret) |
3774 | mlog_errno(ret); | 4053 | mlog_errno(ret); |
3775 | 4054 | ||
@@ -3899,7 +4178,8 @@ out: | |||
3899 | static void ocfs2_figure_contig_type(struct inode *inode, | 4178 | static void ocfs2_figure_contig_type(struct inode *inode, |
3900 | struct ocfs2_insert_type *insert, | 4179 | struct ocfs2_insert_type *insert, |
3901 | struct ocfs2_extent_list *el, | 4180 | struct ocfs2_extent_list *el, |
3902 | struct ocfs2_extent_rec *insert_rec) | 4181 | struct ocfs2_extent_rec *insert_rec, |
4182 | struct ocfs2_extent_tree *et) | ||
3903 | { | 4183 | { |
3904 | int i; | 4184 | int i; |
3905 | enum ocfs2_contig_type contig_type = CONTIG_NONE; | 4185 | enum ocfs2_contig_type contig_type = CONTIG_NONE; |
@@ -3915,6 +4195,21 @@ static void ocfs2_figure_contig_type(struct inode *inode, | |||
3915 | } | 4195 | } |
3916 | } | 4196 | } |
3917 | insert->ins_contig = contig_type; | 4197 | insert->ins_contig = contig_type; |
4198 | |||
4199 | if (insert->ins_contig != CONTIG_NONE) { | ||
4200 | struct ocfs2_extent_rec *rec = | ||
4201 | &el->l_recs[insert->ins_contig_index]; | ||
4202 | unsigned int len = le16_to_cpu(rec->e_leaf_clusters) + | ||
4203 | le16_to_cpu(insert_rec->e_leaf_clusters); | ||
4204 | |||
4205 | /* | ||
4206 | * Caller might want us to limit the size of extents, don't | ||
4207 | * calculate contiguousness if we might exceed that limit. | ||
4208 | */ | ||
4209 | if (et->et_max_leaf_clusters && | ||
4210 | (len > et->et_max_leaf_clusters)) | ||
4211 | insert->ins_contig = CONTIG_NONE; | ||
4212 | } | ||
3918 | } | 4213 | } |
3919 | 4214 | ||
3920 | /* | 4215 | /* |
@@ -3923,8 +4218,8 @@ static void ocfs2_figure_contig_type(struct inode *inode, | |||
3923 | * ocfs2_figure_appending_type() will figure out whether we'll have to | 4218 | * ocfs2_figure_appending_type() will figure out whether we'll have to |
3924 | * insert at the tail of the rightmost leaf. | 4219 | * insert at the tail of the rightmost leaf. |
3925 | * | 4220 | * |
3926 | * This should also work against the dinode list for tree's with 0 | 4221 | * This should also work against the root extent list for tree's with 0 |
3927 | * depth. If we consider the dinode list to be the rightmost leaf node | 4222 | * depth. If we consider the root extent list to be the rightmost leaf node |
3928 | * then the logic here makes sense. | 4223 | * then the logic here makes sense. |
3929 | */ | 4224 | */ |
3930 | static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert, | 4225 | static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert, |
@@ -3975,14 +4270,13 @@ set_tail_append: | |||
3975 | * structure. | 4270 | * structure. |
3976 | */ | 4271 | */ |
3977 | static int ocfs2_figure_insert_type(struct inode *inode, | 4272 | static int ocfs2_figure_insert_type(struct inode *inode, |
3978 | struct buffer_head *di_bh, | 4273 | struct ocfs2_extent_tree *et, |
3979 | struct buffer_head **last_eb_bh, | 4274 | struct buffer_head **last_eb_bh, |
3980 | struct ocfs2_extent_rec *insert_rec, | 4275 | struct ocfs2_extent_rec *insert_rec, |
3981 | int *free_records, | 4276 | int *free_records, |
3982 | struct ocfs2_insert_type *insert) | 4277 | struct ocfs2_insert_type *insert) |
3983 | { | 4278 | { |
3984 | int ret; | 4279 | int ret; |
3985 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
3986 | struct ocfs2_extent_block *eb; | 4280 | struct ocfs2_extent_block *eb; |
3987 | struct ocfs2_extent_list *el; | 4281 | struct ocfs2_extent_list *el; |
3988 | struct ocfs2_path *path = NULL; | 4282 | struct ocfs2_path *path = NULL; |
@@ -3990,7 +4284,7 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
3990 | 4284 | ||
3991 | insert->ins_split = SPLIT_NONE; | 4285 | insert->ins_split = SPLIT_NONE; |
3992 | 4286 | ||
3993 | el = &di->id2.i_list; | 4287 | el = et->et_root_el; |
3994 | insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth); | 4288 | insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth); |
3995 | 4289 | ||
3996 | if (el->l_tree_depth) { | 4290 | if (el->l_tree_depth) { |
@@ -4000,9 +4294,7 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
4000 | * ocfs2_figure_insert_type() and ocfs2_add_branch() | 4294 | * ocfs2_figure_insert_type() and ocfs2_add_branch() |
4001 | * may want it later. | 4295 | * may want it later. |
4002 | */ | 4296 | */ |
4003 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 4297 | ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), &bh); |
4004 | le64_to_cpu(di->i_last_eb_blk), &bh, | ||
4005 | OCFS2_BH_CACHED, inode); | ||
4006 | if (ret) { | 4298 | if (ret) { |
4007 | mlog_exit(ret); | 4299 | mlog_exit(ret); |
4008 | goto out; | 4300 | goto out; |
@@ -4023,12 +4315,12 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
4023 | le16_to_cpu(el->l_next_free_rec); | 4315 | le16_to_cpu(el->l_next_free_rec); |
4024 | 4316 | ||
4025 | if (!insert->ins_tree_depth) { | 4317 | if (!insert->ins_tree_depth) { |
4026 | ocfs2_figure_contig_type(inode, insert, el, insert_rec); | 4318 | ocfs2_figure_contig_type(inode, insert, el, insert_rec, et); |
4027 | ocfs2_figure_appending_type(insert, el, insert_rec); | 4319 | ocfs2_figure_appending_type(insert, el, insert_rec); |
4028 | return 0; | 4320 | return 0; |
4029 | } | 4321 | } |
4030 | 4322 | ||
4031 | path = ocfs2_new_inode_path(di_bh); | 4323 | path = ocfs2_new_path(et->et_root_bh, et->et_root_el); |
4032 | if (!path) { | 4324 | if (!path) { |
4033 | ret = -ENOMEM; | 4325 | ret = -ENOMEM; |
4034 | mlog_errno(ret); | 4326 | mlog_errno(ret); |
@@ -4057,7 +4349,7 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
4057 | * into two types of appends: simple record append, or a | 4349 | * into two types of appends: simple record append, or a |
4058 | * rotate inside the tail leaf. | 4350 | * rotate inside the tail leaf. |
4059 | */ | 4351 | */ |
4060 | ocfs2_figure_contig_type(inode, insert, el, insert_rec); | 4352 | ocfs2_figure_contig_type(inode, insert, el, insert_rec, et); |
4061 | 4353 | ||
4062 | /* | 4354 | /* |
4063 | * The insert code isn't quite ready to deal with all cases of | 4355 | * The insert code isn't quite ready to deal with all cases of |
@@ -4078,7 +4370,8 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
4078 | * the case that we're doing a tail append, so maybe we can | 4370 | * the case that we're doing a tail append, so maybe we can |
4079 | * take advantage of that information somehow. | 4371 | * take advantage of that information somehow. |
4080 | */ | 4372 | */ |
4081 | if (le64_to_cpu(di->i_last_eb_blk) == path_leaf_bh(path)->b_blocknr) { | 4373 | if (ocfs2_et_get_last_eb_blk(et) == |
4374 | path_leaf_bh(path)->b_blocknr) { | ||
4082 | /* | 4375 | /* |
4083 | * Ok, ocfs2_find_path() returned us the rightmost | 4376 | * Ok, ocfs2_find_path() returned us the rightmost |
4084 | * tree path. This might be an appending insert. There are | 4377 | * tree path. This might be an appending insert. There are |
@@ -4108,7 +4401,7 @@ out: | |||
4108 | int ocfs2_insert_extent(struct ocfs2_super *osb, | 4401 | int ocfs2_insert_extent(struct ocfs2_super *osb, |
4109 | handle_t *handle, | 4402 | handle_t *handle, |
4110 | struct inode *inode, | 4403 | struct inode *inode, |
4111 | struct buffer_head *fe_bh, | 4404 | struct ocfs2_extent_tree *et, |
4112 | u32 cpos, | 4405 | u32 cpos, |
4113 | u64 start_blk, | 4406 | u64 start_blk, |
4114 | u32 new_clusters, | 4407 | u32 new_clusters, |
@@ -4121,26 +4414,21 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
4121 | struct ocfs2_insert_type insert = {0, }; | 4414 | struct ocfs2_insert_type insert = {0, }; |
4122 | struct ocfs2_extent_rec rec; | 4415 | struct ocfs2_extent_rec rec; |
4123 | 4416 | ||
4124 | BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL); | ||
4125 | |||
4126 | mlog(0, "add %u clusters at position %u to inode %llu\n", | 4417 | mlog(0, "add %u clusters at position %u to inode %llu\n", |
4127 | new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); | 4418 | new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); |
4128 | 4419 | ||
4129 | mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) && | ||
4130 | (OCFS2_I(inode)->ip_clusters != cpos), | ||
4131 | "Device %s, asking for sparse allocation: inode %llu, " | ||
4132 | "cpos %u, clusters %u\n", | ||
4133 | osb->dev_str, | ||
4134 | (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, | ||
4135 | OCFS2_I(inode)->ip_clusters); | ||
4136 | |||
4137 | memset(&rec, 0, sizeof(rec)); | 4420 | memset(&rec, 0, sizeof(rec)); |
4138 | rec.e_cpos = cpu_to_le32(cpos); | 4421 | rec.e_cpos = cpu_to_le32(cpos); |
4139 | rec.e_blkno = cpu_to_le64(start_blk); | 4422 | rec.e_blkno = cpu_to_le64(start_blk); |
4140 | rec.e_leaf_clusters = cpu_to_le16(new_clusters); | 4423 | rec.e_leaf_clusters = cpu_to_le16(new_clusters); |
4141 | rec.e_flags = flags; | 4424 | rec.e_flags = flags; |
4425 | status = ocfs2_et_insert_check(inode, et, &rec); | ||
4426 | if (status) { | ||
4427 | mlog_errno(status); | ||
4428 | goto bail; | ||
4429 | } | ||
4142 | 4430 | ||
4143 | status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, | 4431 | status = ocfs2_figure_insert_type(inode, et, &last_eb_bh, &rec, |
4144 | &free_records, &insert); | 4432 | &free_records, &insert); |
4145 | if (status < 0) { | 4433 | if (status < 0) { |
4146 | mlog_errno(status); | 4434 | mlog_errno(status); |
@@ -4154,7 +4442,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
4154 | free_records, insert.ins_tree_depth); | 4442 | free_records, insert.ins_tree_depth); |
4155 | 4443 | ||
4156 | if (insert.ins_contig == CONTIG_NONE && free_records == 0) { | 4444 | if (insert.ins_contig == CONTIG_NONE && free_records == 0) { |
4157 | status = ocfs2_grow_tree(inode, handle, fe_bh, | 4445 | status = ocfs2_grow_tree(inode, handle, et, |
4158 | &insert.ins_tree_depth, &last_eb_bh, | 4446 | &insert.ins_tree_depth, &last_eb_bh, |
4159 | meta_ac); | 4447 | meta_ac); |
4160 | if (status) { | 4448 | if (status) { |
@@ -4164,17 +4452,124 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
4164 | } | 4452 | } |
4165 | 4453 | ||
4166 | /* Finally, we can add clusters. This might rotate the tree for us. */ | 4454 | /* Finally, we can add clusters. This might rotate the tree for us. */ |
4167 | status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert); | 4455 | status = ocfs2_do_insert_extent(inode, handle, et, &rec, &insert); |
4168 | if (status < 0) | 4456 | if (status < 0) |
4169 | mlog_errno(status); | 4457 | mlog_errno(status); |
4170 | else | 4458 | else if (et->et_ops == &ocfs2_dinode_et_ops) |
4171 | ocfs2_extent_map_insert_rec(inode, &rec); | 4459 | ocfs2_extent_map_insert_rec(inode, &rec); |
4172 | 4460 | ||
4173 | bail: | 4461 | bail: |
4174 | if (last_eb_bh) | 4462 | brelse(last_eb_bh); |
4175 | brelse(last_eb_bh); | 4463 | |
4464 | mlog_exit(status); | ||
4465 | return status; | ||
4466 | } | ||
4467 | |||
4468 | /* | ||
4469 | * Allcate and add clusters into the extent b-tree. | ||
4470 | * The new clusters(clusters_to_add) will be inserted at logical_offset. | ||
4471 | * The extent b-tree's root is specified by et, and | ||
4472 | * it is not limited to the file storage. Any extent tree can use this | ||
4473 | * function if it implements the proper ocfs2_extent_tree. | ||
4474 | */ | ||
4475 | int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb, | ||
4476 | struct inode *inode, | ||
4477 | u32 *logical_offset, | ||
4478 | u32 clusters_to_add, | ||
4479 | int mark_unwritten, | ||
4480 | struct ocfs2_extent_tree *et, | ||
4481 | handle_t *handle, | ||
4482 | struct ocfs2_alloc_context *data_ac, | ||
4483 | struct ocfs2_alloc_context *meta_ac, | ||
4484 | enum ocfs2_alloc_restarted *reason_ret) | ||
4485 | { | ||
4486 | int status = 0; | ||
4487 | int free_extents; | ||
4488 | enum ocfs2_alloc_restarted reason = RESTART_NONE; | ||
4489 | u32 bit_off, num_bits; | ||
4490 | u64 block; | ||
4491 | u8 flags = 0; | ||
4492 | |||
4493 | BUG_ON(!clusters_to_add); | ||
4494 | |||
4495 | if (mark_unwritten) | ||
4496 | flags = OCFS2_EXT_UNWRITTEN; | ||
4497 | |||
4498 | free_extents = ocfs2_num_free_extents(osb, inode, et); | ||
4499 | if (free_extents < 0) { | ||
4500 | status = free_extents; | ||
4501 | mlog_errno(status); | ||
4502 | goto leave; | ||
4503 | } | ||
4504 | |||
4505 | /* there are two cases which could cause us to EAGAIN in the | ||
4506 | * we-need-more-metadata case: | ||
4507 | * 1) we haven't reserved *any* | ||
4508 | * 2) we are so fragmented, we've needed to add metadata too | ||
4509 | * many times. */ | ||
4510 | if (!free_extents && !meta_ac) { | ||
4511 | mlog(0, "we haven't reserved any metadata!\n"); | ||
4512 | status = -EAGAIN; | ||
4513 | reason = RESTART_META; | ||
4514 | goto leave; | ||
4515 | } else if ((!free_extents) | ||
4516 | && (ocfs2_alloc_context_bits_left(meta_ac) | ||
4517 | < ocfs2_extend_meta_needed(et->et_root_el))) { | ||
4518 | mlog(0, "filesystem is really fragmented...\n"); | ||
4519 | status = -EAGAIN; | ||
4520 | reason = RESTART_META; | ||
4521 | goto leave; | ||
4522 | } | ||
4523 | |||
4524 | status = __ocfs2_claim_clusters(osb, handle, data_ac, 1, | ||
4525 | clusters_to_add, &bit_off, &num_bits); | ||
4526 | if (status < 0) { | ||
4527 | if (status != -ENOSPC) | ||
4528 | mlog_errno(status); | ||
4529 | goto leave; | ||
4530 | } | ||
4176 | 4531 | ||
4532 | BUG_ON(num_bits > clusters_to_add); | ||
4533 | |||
4534 | /* reserve our write early -- insert_extent may update the inode */ | ||
4535 | status = ocfs2_journal_access(handle, inode, et->et_root_bh, | ||
4536 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
4537 | if (status < 0) { | ||
4538 | mlog_errno(status); | ||
4539 | goto leave; | ||
4540 | } | ||
4541 | |||
4542 | block = ocfs2_clusters_to_blocks(osb->sb, bit_off); | ||
4543 | mlog(0, "Allocating %u clusters at block %u for inode %llu\n", | ||
4544 | num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
4545 | status = ocfs2_insert_extent(osb, handle, inode, et, | ||
4546 | *logical_offset, block, | ||
4547 | num_bits, flags, meta_ac); | ||
4548 | if (status < 0) { | ||
4549 | mlog_errno(status); | ||
4550 | goto leave; | ||
4551 | } | ||
4552 | |||
4553 | status = ocfs2_journal_dirty(handle, et->et_root_bh); | ||
4554 | if (status < 0) { | ||
4555 | mlog_errno(status); | ||
4556 | goto leave; | ||
4557 | } | ||
4558 | |||
4559 | clusters_to_add -= num_bits; | ||
4560 | *logical_offset += num_bits; | ||
4561 | |||
4562 | if (clusters_to_add) { | ||
4563 | mlog(0, "need to alloc once more, wanted = %u\n", | ||
4564 | clusters_to_add); | ||
4565 | status = -EAGAIN; | ||
4566 | reason = RESTART_TRANS; | ||
4567 | } | ||
4568 | |||
4569 | leave: | ||
4177 | mlog_exit(status); | 4570 | mlog_exit(status); |
4571 | if (reason_ret) | ||
4572 | *reason_ret = reason; | ||
4178 | return status; | 4573 | return status; |
4179 | } | 4574 | } |
4180 | 4575 | ||
@@ -4201,7 +4596,7 @@ static void ocfs2_make_right_split_rec(struct super_block *sb, | |||
4201 | static int ocfs2_split_and_insert(struct inode *inode, | 4596 | static int ocfs2_split_and_insert(struct inode *inode, |
4202 | handle_t *handle, | 4597 | handle_t *handle, |
4203 | struct ocfs2_path *path, | 4598 | struct ocfs2_path *path, |
4204 | struct buffer_head *di_bh, | 4599 | struct ocfs2_extent_tree *et, |
4205 | struct buffer_head **last_eb_bh, | 4600 | struct buffer_head **last_eb_bh, |
4206 | int split_index, | 4601 | int split_index, |
4207 | struct ocfs2_extent_rec *orig_split_rec, | 4602 | struct ocfs2_extent_rec *orig_split_rec, |
@@ -4215,7 +4610,6 @@ static int ocfs2_split_and_insert(struct inode *inode, | |||
4215 | struct ocfs2_extent_rec split_rec = *orig_split_rec; | 4610 | struct ocfs2_extent_rec split_rec = *orig_split_rec; |
4216 | struct ocfs2_insert_type insert; | 4611 | struct ocfs2_insert_type insert; |
4217 | struct ocfs2_extent_block *eb; | 4612 | struct ocfs2_extent_block *eb; |
4218 | struct ocfs2_dinode *di; | ||
4219 | 4613 | ||
4220 | leftright: | 4614 | leftright: |
4221 | /* | 4615 | /* |
@@ -4224,8 +4618,7 @@ leftright: | |||
4224 | */ | 4618 | */ |
4225 | rec = path_leaf_el(path)->l_recs[split_index]; | 4619 | rec = path_leaf_el(path)->l_recs[split_index]; |
4226 | 4620 | ||
4227 | di = (struct ocfs2_dinode *)di_bh->b_data; | 4621 | rightmost_el = et->et_root_el; |
4228 | rightmost_el = &di->id2.i_list; | ||
4229 | 4622 | ||
4230 | depth = le16_to_cpu(rightmost_el->l_tree_depth); | 4623 | depth = le16_to_cpu(rightmost_el->l_tree_depth); |
4231 | if (depth) { | 4624 | if (depth) { |
@@ -4236,8 +4629,8 @@ leftright: | |||
4236 | 4629 | ||
4237 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == | 4630 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == |
4238 | le16_to_cpu(rightmost_el->l_count)) { | 4631 | le16_to_cpu(rightmost_el->l_count)) { |
4239 | ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh, | 4632 | ret = ocfs2_grow_tree(inode, handle, et, |
4240 | meta_ac); | 4633 | &depth, last_eb_bh, meta_ac); |
4241 | if (ret) { | 4634 | if (ret) { |
4242 | mlog_errno(ret); | 4635 | mlog_errno(ret); |
4243 | goto out; | 4636 | goto out; |
@@ -4274,8 +4667,7 @@ leftright: | |||
4274 | do_leftright = 1; | 4667 | do_leftright = 1; |
4275 | } | 4668 | } |
4276 | 4669 | ||
4277 | ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, | 4670 | ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert); |
4278 | &insert); | ||
4279 | if (ret) { | 4671 | if (ret) { |
4280 | mlog_errno(ret); | 4672 | mlog_errno(ret); |
4281 | goto out; | 4673 | goto out; |
@@ -4317,8 +4709,9 @@ out: | |||
4317 | * of the tree is required. All other cases will degrade into a less | 4709 | * of the tree is required. All other cases will degrade into a less |
4318 | * optimal tree layout. | 4710 | * optimal tree layout. |
4319 | * | 4711 | * |
4320 | * last_eb_bh should be the rightmost leaf block for any inode with a | 4712 | * last_eb_bh should be the rightmost leaf block for any extent |
4321 | * btree. Since a split may grow the tree or a merge might shrink it, the caller cannot trust the contents of that buffer after this call. | 4713 | * btree. Since a split may grow the tree or a merge might shrink it, |
4714 | * the caller cannot trust the contents of that buffer after this call. | ||
4322 | * | 4715 | * |
4323 | * This code is optimized for readability - several passes might be | 4716 | * This code is optimized for readability - several passes might be |
4324 | * made over certain portions of the tree. All of those blocks will | 4717 | * made over certain portions of the tree. All of those blocks will |
@@ -4326,7 +4719,7 @@ out: | |||
4326 | * extra overhead is not expressed in terms of disk reads. | 4719 | * extra overhead is not expressed in terms of disk reads. |
4327 | */ | 4720 | */ |
4328 | static int __ocfs2_mark_extent_written(struct inode *inode, | 4721 | static int __ocfs2_mark_extent_written(struct inode *inode, |
4329 | struct buffer_head *di_bh, | 4722 | struct ocfs2_extent_tree *et, |
4330 | handle_t *handle, | 4723 | handle_t *handle, |
4331 | struct ocfs2_path *path, | 4724 | struct ocfs2_path *path, |
4332 | int split_index, | 4725 | int split_index, |
@@ -4366,11 +4759,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode, | |||
4366 | */ | 4759 | */ |
4367 | if (path->p_tree_depth) { | 4760 | if (path->p_tree_depth) { |
4368 | struct ocfs2_extent_block *eb; | 4761 | struct ocfs2_extent_block *eb; |
4369 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
4370 | 4762 | ||
4371 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 4763 | ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), |
4372 | le64_to_cpu(di->i_last_eb_blk), | 4764 | &last_eb_bh); |
4373 | &last_eb_bh, OCFS2_BH_CACHED, inode); | ||
4374 | if (ret) { | 4765 | if (ret) { |
4375 | mlog_exit(ret); | 4766 | mlog_exit(ret); |
4376 | goto out; | 4767 | goto out; |
@@ -4403,7 +4794,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode, | |||
4403 | if (ctxt.c_split_covers_rec) | 4794 | if (ctxt.c_split_covers_rec) |
4404 | el->l_recs[split_index] = *split_rec; | 4795 | el->l_recs[split_index] = *split_rec; |
4405 | else | 4796 | else |
4406 | ret = ocfs2_split_and_insert(inode, handle, path, di_bh, | 4797 | ret = ocfs2_split_and_insert(inode, handle, path, et, |
4407 | &last_eb_bh, split_index, | 4798 | &last_eb_bh, split_index, |
4408 | split_rec, meta_ac); | 4799 | split_rec, meta_ac); |
4409 | if (ret) | 4800 | if (ret) |
@@ -4411,7 +4802,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode, | |||
4411 | } else { | 4802 | } else { |
4412 | ret = ocfs2_try_to_merge_extent(inode, handle, path, | 4803 | ret = ocfs2_try_to_merge_extent(inode, handle, path, |
4413 | split_index, split_rec, | 4804 | split_index, split_rec, |
4414 | dealloc, &ctxt); | 4805 | dealloc, &ctxt, et); |
4415 | if (ret) | 4806 | if (ret) |
4416 | mlog_errno(ret); | 4807 | mlog_errno(ret); |
4417 | } | 4808 | } |
@@ -4429,7 +4820,8 @@ out: | |||
4429 | * | 4820 | * |
4430 | * The caller is responsible for passing down meta_ac if we'll need it. | 4821 | * The caller is responsible for passing down meta_ac if we'll need it. |
4431 | */ | 4822 | */ |
4432 | int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | 4823 | int ocfs2_mark_extent_written(struct inode *inode, |
4824 | struct ocfs2_extent_tree *et, | ||
4433 | handle_t *handle, u32 cpos, u32 len, u32 phys, | 4825 | handle_t *handle, u32 cpos, u32 len, u32 phys, |
4434 | struct ocfs2_alloc_context *meta_ac, | 4826 | struct ocfs2_alloc_context *meta_ac, |
4435 | struct ocfs2_cached_dealloc_ctxt *dealloc) | 4827 | struct ocfs2_cached_dealloc_ctxt *dealloc) |
@@ -4455,10 +4847,14 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | |||
4455 | /* | 4847 | /* |
4456 | * XXX: This should be fixed up so that we just re-insert the | 4848 | * XXX: This should be fixed up so that we just re-insert the |
4457 | * next extent records. | 4849 | * next extent records. |
4850 | * | ||
4851 | * XXX: This is a hack on the extent tree, maybe it should be | ||
4852 | * an op? | ||
4458 | */ | 4853 | */ |
4459 | ocfs2_extent_map_trunc(inode, 0); | 4854 | if (et->et_ops == &ocfs2_dinode_et_ops) |
4855 | ocfs2_extent_map_trunc(inode, 0); | ||
4460 | 4856 | ||
4461 | left_path = ocfs2_new_inode_path(di_bh); | 4857 | left_path = ocfs2_new_path(et->et_root_bh, et->et_root_el); |
4462 | if (!left_path) { | 4858 | if (!left_path) { |
4463 | ret = -ENOMEM; | 4859 | ret = -ENOMEM; |
4464 | mlog_errno(ret); | 4860 | mlog_errno(ret); |
@@ -4489,8 +4885,9 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | |||
4489 | split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags; | 4885 | split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags; |
4490 | split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN; | 4886 | split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN; |
4491 | 4887 | ||
4492 | ret = __ocfs2_mark_extent_written(inode, di_bh, handle, left_path, | 4888 | ret = __ocfs2_mark_extent_written(inode, et, handle, left_path, |
4493 | index, &split_rec, meta_ac, dealloc); | 4889 | index, &split_rec, meta_ac, |
4890 | dealloc); | ||
4494 | if (ret) | 4891 | if (ret) |
4495 | mlog_errno(ret); | 4892 | mlog_errno(ret); |
4496 | 4893 | ||
@@ -4499,13 +4896,12 @@ out: | |||
4499 | return ret; | 4896 | return ret; |
4500 | } | 4897 | } |
4501 | 4898 | ||
4502 | static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, | 4899 | static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et, |
4503 | handle_t *handle, struct ocfs2_path *path, | 4900 | handle_t *handle, struct ocfs2_path *path, |
4504 | int index, u32 new_range, | 4901 | int index, u32 new_range, |
4505 | struct ocfs2_alloc_context *meta_ac) | 4902 | struct ocfs2_alloc_context *meta_ac) |
4506 | { | 4903 | { |
4507 | int ret, depth, credits = handle->h_buffer_credits; | 4904 | int ret, depth, credits = handle->h_buffer_credits; |
4508 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
4509 | struct buffer_head *last_eb_bh = NULL; | 4905 | struct buffer_head *last_eb_bh = NULL; |
4510 | struct ocfs2_extent_block *eb; | 4906 | struct ocfs2_extent_block *eb; |
4511 | struct ocfs2_extent_list *rightmost_el, *el; | 4907 | struct ocfs2_extent_list *rightmost_el, *el; |
@@ -4522,9 +4918,8 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, | |||
4522 | 4918 | ||
4523 | depth = path->p_tree_depth; | 4919 | depth = path->p_tree_depth; |
4524 | if (depth > 0) { | 4920 | if (depth > 0) { |
4525 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 4921 | ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), |
4526 | le64_to_cpu(di->i_last_eb_blk), | 4922 | &last_eb_bh); |
4527 | &last_eb_bh, OCFS2_BH_CACHED, inode); | ||
4528 | if (ret < 0) { | 4923 | if (ret < 0) { |
4529 | mlog_errno(ret); | 4924 | mlog_errno(ret); |
4530 | goto out; | 4925 | goto out; |
@@ -4535,7 +4930,8 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, | |||
4535 | } else | 4930 | } else |
4536 | rightmost_el = path_leaf_el(path); | 4931 | rightmost_el = path_leaf_el(path); |
4537 | 4932 | ||
4538 | credits += path->p_tree_depth + ocfs2_extend_meta_needed(di); | 4933 | credits += path->p_tree_depth + |
4934 | ocfs2_extend_meta_needed(et->et_root_el); | ||
4539 | ret = ocfs2_extend_trans(handle, credits); | 4935 | ret = ocfs2_extend_trans(handle, credits); |
4540 | if (ret) { | 4936 | if (ret) { |
4541 | mlog_errno(ret); | 4937 | mlog_errno(ret); |
@@ -4544,7 +4940,7 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, | |||
4544 | 4940 | ||
4545 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == | 4941 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == |
4546 | le16_to_cpu(rightmost_el->l_count)) { | 4942 | le16_to_cpu(rightmost_el->l_count)) { |
4547 | ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, &last_eb_bh, | 4943 | ret = ocfs2_grow_tree(inode, handle, et, &depth, &last_eb_bh, |
4548 | meta_ac); | 4944 | meta_ac); |
4549 | if (ret) { | 4945 | if (ret) { |
4550 | mlog_errno(ret); | 4946 | mlog_errno(ret); |
@@ -4558,7 +4954,7 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, | |||
4558 | insert.ins_split = SPLIT_RIGHT; | 4954 | insert.ins_split = SPLIT_RIGHT; |
4559 | insert.ins_tree_depth = depth; | 4955 | insert.ins_tree_depth = depth; |
4560 | 4956 | ||
4561 | ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, &insert); | 4957 | ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert); |
4562 | if (ret) | 4958 | if (ret) |
4563 | mlog_errno(ret); | 4959 | mlog_errno(ret); |
4564 | 4960 | ||
@@ -4570,7 +4966,8 @@ out: | |||
4570 | static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, | 4966 | static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, |
4571 | struct ocfs2_path *path, int index, | 4967 | struct ocfs2_path *path, int index, |
4572 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 4968 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
4573 | u32 cpos, u32 len) | 4969 | u32 cpos, u32 len, |
4970 | struct ocfs2_extent_tree *et) | ||
4574 | { | 4971 | { |
4575 | int ret; | 4972 | int ret; |
4576 | u32 left_cpos, rec_range, trunc_range; | 4973 | u32 left_cpos, rec_range, trunc_range; |
@@ -4582,7 +4979,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, | |||
4582 | struct ocfs2_extent_block *eb; | 4979 | struct ocfs2_extent_block *eb; |
4583 | 4980 | ||
4584 | if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) { | 4981 | if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) { |
4585 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); | 4982 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et); |
4586 | if (ret) { | 4983 | if (ret) { |
4587 | mlog_errno(ret); | 4984 | mlog_errno(ret); |
4588 | goto out; | 4985 | goto out; |
@@ -4713,7 +5110,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, | |||
4713 | 5110 | ||
4714 | ocfs2_journal_dirty(handle, path_leaf_bh(path)); | 5111 | ocfs2_journal_dirty(handle, path_leaf_bh(path)); |
4715 | 5112 | ||
4716 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); | 5113 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et); |
4717 | if (ret) { | 5114 | if (ret) { |
4718 | mlog_errno(ret); | 5115 | mlog_errno(ret); |
4719 | goto out; | 5116 | goto out; |
@@ -4724,7 +5121,8 @@ out: | |||
4724 | return ret; | 5121 | return ret; |
4725 | } | 5122 | } |
4726 | 5123 | ||
4727 | int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | 5124 | int ocfs2_remove_extent(struct inode *inode, |
5125 | struct ocfs2_extent_tree *et, | ||
4728 | u32 cpos, u32 len, handle_t *handle, | 5126 | u32 cpos, u32 len, handle_t *handle, |
4729 | struct ocfs2_alloc_context *meta_ac, | 5127 | struct ocfs2_alloc_context *meta_ac, |
4730 | struct ocfs2_cached_dealloc_ctxt *dealloc) | 5128 | struct ocfs2_cached_dealloc_ctxt *dealloc) |
@@ -4733,11 +5131,11 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | |||
4733 | u32 rec_range, trunc_range; | 5131 | u32 rec_range, trunc_range; |
4734 | struct ocfs2_extent_rec *rec; | 5132 | struct ocfs2_extent_rec *rec; |
4735 | struct ocfs2_extent_list *el; | 5133 | struct ocfs2_extent_list *el; |
4736 | struct ocfs2_path *path; | 5134 | struct ocfs2_path *path = NULL; |
4737 | 5135 | ||
4738 | ocfs2_extent_map_trunc(inode, 0); | 5136 | ocfs2_extent_map_trunc(inode, 0); |
4739 | 5137 | ||
4740 | path = ocfs2_new_inode_path(di_bh); | 5138 | path = ocfs2_new_path(et->et_root_bh, et->et_root_el); |
4741 | if (!path) { | 5139 | if (!path) { |
4742 | ret = -ENOMEM; | 5140 | ret = -ENOMEM; |
4743 | mlog_errno(ret); | 5141 | mlog_errno(ret); |
@@ -4790,13 +5188,13 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | |||
4790 | 5188 | ||
4791 | if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) { | 5189 | if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) { |
4792 | ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, | 5190 | ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, |
4793 | cpos, len); | 5191 | cpos, len, et); |
4794 | if (ret) { | 5192 | if (ret) { |
4795 | mlog_errno(ret); | 5193 | mlog_errno(ret); |
4796 | goto out; | 5194 | goto out; |
4797 | } | 5195 | } |
4798 | } else { | 5196 | } else { |
4799 | ret = ocfs2_split_tree(inode, di_bh, handle, path, index, | 5197 | ret = ocfs2_split_tree(inode, et, handle, path, index, |
4800 | trunc_range, meta_ac); | 5198 | trunc_range, meta_ac); |
4801 | if (ret) { | 5199 | if (ret) { |
4802 | mlog_errno(ret); | 5200 | mlog_errno(ret); |
@@ -4845,7 +5243,7 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | |||
4845 | } | 5243 | } |
4846 | 5244 | ||
4847 | ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, | 5245 | ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, |
4848 | cpos, len); | 5246 | cpos, len, et); |
4849 | if (ret) { | 5247 | if (ret) { |
4850 | mlog_errno(ret); | 5248 | mlog_errno(ret); |
4851 | goto out; | 5249 | goto out; |
@@ -5188,8 +5586,7 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb, | |||
5188 | goto bail; | 5586 | goto bail; |
5189 | } | 5587 | } |
5190 | 5588 | ||
5191 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, | 5589 | status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); |
5192 | OCFS2_BH_CACHED, inode); | ||
5193 | if (status < 0) { | 5590 | if (status < 0) { |
5194 | iput(inode); | 5591 | iput(inode); |
5195 | mlog_errno(status); | 5592 | mlog_errno(status); |
@@ -5264,8 +5661,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, | |||
5264 | bail: | 5661 | bail: |
5265 | if (tl_inode) | 5662 | if (tl_inode) |
5266 | iput(tl_inode); | 5663 | iput(tl_inode); |
5267 | if (tl_bh) | 5664 | brelse(tl_bh); |
5268 | brelse(tl_bh); | ||
5269 | 5665 | ||
5270 | if (status < 0 && (*tl_copy)) { | 5666 | if (status < 0 && (*tl_copy)) { |
5271 | kfree(*tl_copy); | 5667 | kfree(*tl_copy); |
@@ -6008,20 +6404,13 @@ bail: | |||
6008 | return status; | 6404 | return status; |
6009 | } | 6405 | } |
6010 | 6406 | ||
6011 | static int ocfs2_writeback_zero_func(handle_t *handle, struct buffer_head *bh) | 6407 | static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh) |
6012 | { | 6408 | { |
6013 | set_buffer_uptodate(bh); | 6409 | set_buffer_uptodate(bh); |
6014 | mark_buffer_dirty(bh); | 6410 | mark_buffer_dirty(bh); |
6015 | return 0; | 6411 | return 0; |
6016 | } | 6412 | } |
6017 | 6413 | ||
6018 | static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh) | ||
6019 | { | ||
6020 | set_buffer_uptodate(bh); | ||
6021 | mark_buffer_dirty(bh); | ||
6022 | return ocfs2_journal_dirty_data(handle, bh); | ||
6023 | } | ||
6024 | |||
6025 | static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, | 6414 | static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, |
6026 | unsigned int from, unsigned int to, | 6415 | unsigned int from, unsigned int to, |
6027 | struct page *page, int zero, u64 *phys) | 6416 | struct page *page, int zero, u64 *phys) |
@@ -6040,17 +6429,18 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, | |||
6040 | * here if they aren't - ocfs2_map_page_blocks() | 6429 | * here if they aren't - ocfs2_map_page_blocks() |
6041 | * might've skipped some | 6430 | * might've skipped some |
6042 | */ | 6431 | */ |
6043 | if (ocfs2_should_order_data(inode)) { | 6432 | ret = walk_page_buffers(handle, page_buffers(page), |
6044 | ret = walk_page_buffers(handle, | 6433 | from, to, &partial, |
6045 | page_buffers(page), | 6434 | ocfs2_zero_func); |
6046 | from, to, &partial, | 6435 | if (ret < 0) |
6047 | ocfs2_ordered_zero_func); | 6436 | mlog_errno(ret); |
6048 | if (ret < 0) | 6437 | else if (ocfs2_should_order_data(inode)) { |
6049 | mlog_errno(ret); | 6438 | ret = ocfs2_jbd2_file_inode(handle, inode); |
6050 | } else { | 6439 | #ifdef CONFIG_OCFS2_COMPAT_JBD |
6051 | ret = walk_page_buffers(handle, page_buffers(page), | 6440 | ret = walk_page_buffers(handle, page_buffers(page), |
6052 | from, to, &partial, | 6441 | from, to, &partial, |
6053 | ocfs2_writeback_zero_func); | 6442 | ocfs2_journal_dirty_data); |
6443 | #endif | ||
6054 | if (ret < 0) | 6444 | if (ret < 0) |
6055 | mlog_errno(ret); | 6445 | mlog_errno(ret); |
6056 | } | 6446 | } |
@@ -6215,20 +6605,29 @@ out: | |||
6215 | return ret; | 6605 | return ret; |
6216 | } | 6606 | } |
6217 | 6607 | ||
6218 | static void ocfs2_zero_dinode_id2(struct inode *inode, struct ocfs2_dinode *di) | 6608 | static void ocfs2_zero_dinode_id2_with_xattr(struct inode *inode, |
6609 | struct ocfs2_dinode *di) | ||
6219 | { | 6610 | { |
6220 | unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits; | 6611 | unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits; |
6612 | unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size); | ||
6221 | 6613 | ||
6222 | memset(&di->id2, 0, blocksize - offsetof(struct ocfs2_dinode, id2)); | 6614 | if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL) |
6615 | memset(&di->id2, 0, blocksize - | ||
6616 | offsetof(struct ocfs2_dinode, id2) - | ||
6617 | xattrsize); | ||
6618 | else | ||
6619 | memset(&di->id2, 0, blocksize - | ||
6620 | offsetof(struct ocfs2_dinode, id2)); | ||
6223 | } | 6621 | } |
6224 | 6622 | ||
6225 | void ocfs2_dinode_new_extent_list(struct inode *inode, | 6623 | void ocfs2_dinode_new_extent_list(struct inode *inode, |
6226 | struct ocfs2_dinode *di) | 6624 | struct ocfs2_dinode *di) |
6227 | { | 6625 | { |
6228 | ocfs2_zero_dinode_id2(inode, di); | 6626 | ocfs2_zero_dinode_id2_with_xattr(inode, di); |
6229 | di->id2.i_list.l_tree_depth = 0; | 6627 | di->id2.i_list.l_tree_depth = 0; |
6230 | di->id2.i_list.l_next_free_rec = 0; | 6628 | di->id2.i_list.l_next_free_rec = 0; |
6231 | di->id2.i_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(inode->i_sb)); | 6629 | di->id2.i_list.l_count = cpu_to_le16( |
6630 | ocfs2_extent_recs_per_inode_with_xattr(inode->i_sb, di)); | ||
6232 | } | 6631 | } |
6233 | 6632 | ||
6234 | void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di) | 6633 | void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di) |
@@ -6245,9 +6644,10 @@ void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di) | |||
6245 | * We clear the entire i_data structure here so that all | 6644 | * We clear the entire i_data structure here so that all |
6246 | * fields can be properly initialized. | 6645 | * fields can be properly initialized. |
6247 | */ | 6646 | */ |
6248 | ocfs2_zero_dinode_id2(inode, di); | 6647 | ocfs2_zero_dinode_id2_with_xattr(inode, di); |
6249 | 6648 | ||
6250 | idata->id_count = cpu_to_le16(ocfs2_max_inline_data(inode->i_sb)); | 6649 | idata->id_count = cpu_to_le16( |
6650 | ocfs2_max_inline_data_with_xattr(inode->i_sb, di)); | ||
6251 | } | 6651 | } |
6252 | 6652 | ||
6253 | int ocfs2_convert_inline_data_to_extents(struct inode *inode, | 6653 | int ocfs2_convert_inline_data_to_extents(struct inode *inode, |
@@ -6262,6 +6662,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
6262 | struct ocfs2_alloc_context *data_ac = NULL; | 6662 | struct ocfs2_alloc_context *data_ac = NULL; |
6263 | struct page **pages = NULL; | 6663 | struct page **pages = NULL; |
6264 | loff_t end = osb->s_clustersize; | 6664 | loff_t end = osb->s_clustersize; |
6665 | struct ocfs2_extent_tree et; | ||
6265 | 6666 | ||
6266 | has_data = i_size_read(inode) ? 1 : 0; | 6667 | has_data = i_size_read(inode) ? 1 : 0; |
6267 | 6668 | ||
@@ -6361,7 +6762,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
6361 | * this proves to be false, we could always re-build | 6762 | * this proves to be false, we could always re-build |
6362 | * the in-inode data from our pages. | 6763 | * the in-inode data from our pages. |
6363 | */ | 6764 | */ |
6364 | ret = ocfs2_insert_extent(osb, handle, inode, di_bh, | 6765 | ocfs2_init_dinode_extent_tree(&et, inode, di_bh); |
6766 | ret = ocfs2_insert_extent(osb, handle, inode, &et, | ||
6365 | 0, block, 1, 0, NULL); | 6767 | 0, block, 1, 0, NULL); |
6366 | if (ret) { | 6768 | if (ret) { |
6367 | mlog_errno(ret); | 6769 | mlog_errno(ret); |
@@ -6404,13 +6806,14 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, | |||
6404 | handle_t *handle = NULL; | 6806 | handle_t *handle = NULL; |
6405 | struct inode *tl_inode = osb->osb_tl_inode; | 6807 | struct inode *tl_inode = osb->osb_tl_inode; |
6406 | struct ocfs2_path *path = NULL; | 6808 | struct ocfs2_path *path = NULL; |
6809 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; | ||
6407 | 6810 | ||
6408 | mlog_entry_void(); | 6811 | mlog_entry_void(); |
6409 | 6812 | ||
6410 | new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, | 6813 | new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, |
6411 | i_size_read(inode)); | 6814 | i_size_read(inode)); |
6412 | 6815 | ||
6413 | path = ocfs2_new_inode_path(fe_bh); | 6816 | path = ocfs2_new_path(fe_bh, &di->id2.i_list); |
6414 | if (!path) { | 6817 | if (!path) { |
6415 | status = -ENOMEM; | 6818 | status = -ENOMEM; |
6416 | mlog_errno(status); | 6819 | mlog_errno(status); |
@@ -6581,8 +6984,8 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb, | |||
6581 | ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); | 6984 | ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); |
6582 | 6985 | ||
6583 | if (fe->id2.i_list.l_tree_depth) { | 6986 | if (fe->id2.i_list.l_tree_depth) { |
6584 | status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), | 6987 | status = ocfs2_read_block(inode, le64_to_cpu(fe->i_last_eb_blk), |
6585 | &last_eb_bh, OCFS2_BH_CACHED, inode); | 6988 | &last_eb_bh); |
6586 | if (status < 0) { | 6989 | if (status < 0) { |
6587 | mlog_errno(status); | 6990 | mlog_errno(status); |
6588 | goto bail; | 6991 | goto bail; |
@@ -6695,8 +7098,7 @@ static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) | |||
6695 | mlog(ML_NOTICE, | 7098 | mlog(ML_NOTICE, |
6696 | "Truncate completion has non-empty dealloc context\n"); | 7099 | "Truncate completion has non-empty dealloc context\n"); |
6697 | 7100 | ||
6698 | if (tc->tc_last_eb_bh) | 7101 | brelse(tc->tc_last_eb_bh); |
6699 | brelse(tc->tc_last_eb_bh); | ||
6700 | 7102 | ||
6701 | kfree(tc); | 7103 | kfree(tc); |
6702 | } | 7104 | } |
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 42ff94bd8011..70257c84cfbe 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h | |||
@@ -26,30 +26,102 @@ | |||
26 | #ifndef OCFS2_ALLOC_H | 26 | #ifndef OCFS2_ALLOC_H |
27 | #define OCFS2_ALLOC_H | 27 | #define OCFS2_ALLOC_H |
28 | 28 | ||
29 | |||
30 | /* | ||
31 | * For xattr tree leaf, we limit the leaf byte size to be 64K. | ||
32 | */ | ||
33 | #define OCFS2_MAX_XATTR_TREE_LEAF_SIZE 65536 | ||
34 | |||
35 | /* | ||
36 | * ocfs2_extent_tree and ocfs2_extent_tree_operations are used to abstract | ||
37 | * the b-tree operations in ocfs2. Now all the b-tree operations are not | ||
38 | * limited to ocfs2_dinode only. Any data which need to allocate clusters | ||
39 | * to store can use b-tree. And it only needs to implement its ocfs2_extent_tree | ||
40 | * and operation. | ||
41 | * | ||
42 | * ocfs2_extent_tree becomes the first-class object for extent tree | ||
43 | * manipulation. Callers of the alloc.c code need to fill it via one of | ||
44 | * the ocfs2_init_*_extent_tree() operations below. | ||
45 | * | ||
46 | * ocfs2_extent_tree contains info for the root of the b-tree, it must have a | ||
47 | * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree | ||
48 | * functions. | ||
49 | * ocfs2_extent_tree_operations abstract the normal operations we do for | ||
50 | * the root of extent b-tree. | ||
51 | */ | ||
52 | struct ocfs2_extent_tree_operations; | ||
53 | struct ocfs2_extent_tree { | ||
54 | struct ocfs2_extent_tree_operations *et_ops; | ||
55 | struct buffer_head *et_root_bh; | ||
56 | struct ocfs2_extent_list *et_root_el; | ||
57 | void *et_object; | ||
58 | unsigned int et_max_leaf_clusters; | ||
59 | }; | ||
60 | |||
61 | /* | ||
62 | * ocfs2_init_*_extent_tree() will fill an ocfs2_extent_tree from the | ||
63 | * specified object buffer. | ||
64 | */ | ||
65 | void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et, | ||
66 | struct inode *inode, | ||
67 | struct buffer_head *bh); | ||
68 | void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, | ||
69 | struct inode *inode, | ||
70 | struct buffer_head *bh); | ||
71 | void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, | ||
72 | struct inode *inode, | ||
73 | struct buffer_head *bh, | ||
74 | struct ocfs2_xattr_value_root *xv); | ||
75 | |||
29 | struct ocfs2_alloc_context; | 76 | struct ocfs2_alloc_context; |
30 | int ocfs2_insert_extent(struct ocfs2_super *osb, | 77 | int ocfs2_insert_extent(struct ocfs2_super *osb, |
31 | handle_t *handle, | 78 | handle_t *handle, |
32 | struct inode *inode, | 79 | struct inode *inode, |
33 | struct buffer_head *fe_bh, | 80 | struct ocfs2_extent_tree *et, |
34 | u32 cpos, | 81 | u32 cpos, |
35 | u64 start_blk, | 82 | u64 start_blk, |
36 | u32 new_clusters, | 83 | u32 new_clusters, |
37 | u8 flags, | 84 | u8 flags, |
38 | struct ocfs2_alloc_context *meta_ac); | 85 | struct ocfs2_alloc_context *meta_ac); |
86 | |||
87 | enum ocfs2_alloc_restarted { | ||
88 | RESTART_NONE = 0, | ||
89 | RESTART_TRANS, | ||
90 | RESTART_META | ||
91 | }; | ||
92 | int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb, | ||
93 | struct inode *inode, | ||
94 | u32 *logical_offset, | ||
95 | u32 clusters_to_add, | ||
96 | int mark_unwritten, | ||
97 | struct ocfs2_extent_tree *et, | ||
98 | handle_t *handle, | ||
99 | struct ocfs2_alloc_context *data_ac, | ||
100 | struct ocfs2_alloc_context *meta_ac, | ||
101 | enum ocfs2_alloc_restarted *reason_ret); | ||
39 | struct ocfs2_cached_dealloc_ctxt; | 102 | struct ocfs2_cached_dealloc_ctxt; |
40 | int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | 103 | int ocfs2_mark_extent_written(struct inode *inode, |
104 | struct ocfs2_extent_tree *et, | ||
41 | handle_t *handle, u32 cpos, u32 len, u32 phys, | 105 | handle_t *handle, u32 cpos, u32 len, u32 phys, |
42 | struct ocfs2_alloc_context *meta_ac, | 106 | struct ocfs2_alloc_context *meta_ac, |
43 | struct ocfs2_cached_dealloc_ctxt *dealloc); | 107 | struct ocfs2_cached_dealloc_ctxt *dealloc); |
44 | int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | 108 | int ocfs2_remove_extent(struct inode *inode, |
109 | struct ocfs2_extent_tree *et, | ||
45 | u32 cpos, u32 len, handle_t *handle, | 110 | u32 cpos, u32 len, handle_t *handle, |
46 | struct ocfs2_alloc_context *meta_ac, | 111 | struct ocfs2_alloc_context *meta_ac, |
47 | struct ocfs2_cached_dealloc_ctxt *dealloc); | 112 | struct ocfs2_cached_dealloc_ctxt *dealloc); |
48 | int ocfs2_num_free_extents(struct ocfs2_super *osb, | 113 | int ocfs2_num_free_extents(struct ocfs2_super *osb, |
49 | struct inode *inode, | 114 | struct inode *inode, |
50 | struct ocfs2_dinode *fe); | 115 | struct ocfs2_extent_tree *et); |
51 | /* how many new metadata chunks would an allocation need at maximum? */ | 116 | |
52 | static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe) | 117 | /* |
118 | * how many new metadata chunks would an allocation need at maximum? | ||
119 | * | ||
120 | * Please note that the caller must make sure that root_el is the root | ||
121 | * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise | ||
122 | * the result may be wrong. | ||
123 | */ | ||
124 | static inline int ocfs2_extend_meta_needed(struct ocfs2_extent_list *root_el) | ||
53 | { | 125 | { |
54 | /* | 126 | /* |
55 | * Rather than do all the work of determining how much we need | 127 | * Rather than do all the work of determining how much we need |
@@ -59,7 +131,7 @@ static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe) | |||
59 | * new tree_depth==0 extent_block, and one block at the new | 131 | * new tree_depth==0 extent_block, and one block at the new |
60 | * top-of-the tree. | 132 | * top-of-the tree. |
61 | */ | 133 | */ |
62 | return le16_to_cpu(fe->id2.i_list.l_tree_depth) + 2; | 134 | return le16_to_cpu(root_el->l_tree_depth) + 2; |
63 | } | 135 | } |
64 | 136 | ||
65 | void ocfs2_dinode_new_extent_list(struct inode *inode, struct ocfs2_dinode *di); | 137 | void ocfs2_dinode_new_extent_list(struct inode *inode, struct ocfs2_dinode *di); |
@@ -146,4 +218,13 @@ static inline unsigned int ocfs2_rec_clusters(struct ocfs2_extent_list *el, | |||
146 | return le16_to_cpu(rec->e_leaf_clusters); | 218 | return le16_to_cpu(rec->e_leaf_clusters); |
147 | } | 219 | } |
148 | 220 | ||
221 | /* | ||
222 | * This is only valid for leaf nodes, which are the only ones that can | ||
223 | * have empty extents anyway. | ||
224 | */ | ||
225 | static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) | ||
226 | { | ||
227 | return !rec->e_leaf_clusters; | ||
228 | } | ||
229 | |||
149 | #endif /* OCFS2_ALLOC_H */ | 230 | #endif /* OCFS2_ALLOC_H */ |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 506c24fb5078..c22543b33420 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -68,9 +68,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, | |||
68 | goto bail; | 68 | goto bail; |
69 | } | 69 | } |
70 | 70 | ||
71 | status = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 71 | status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); |
72 | OCFS2_I(inode)->ip_blkno, | ||
73 | &bh, OCFS2_BH_CACHED, inode); | ||
74 | if (status < 0) { | 72 | if (status < 0) { |
75 | mlog_errno(status); | 73 | mlog_errno(status); |
76 | goto bail; | 74 | goto bail; |
@@ -128,8 +126,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, | |||
128 | err = 0; | 126 | err = 0; |
129 | 127 | ||
130 | bail: | 128 | bail: |
131 | if (bh) | 129 | brelse(bh); |
132 | brelse(bh); | ||
133 | 130 | ||
134 | mlog_exit(err); | 131 | mlog_exit(err); |
135 | return err; | 132 | return err; |
@@ -261,13 +258,11 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page) | |||
261 | { | 258 | { |
262 | int ret; | 259 | int ret; |
263 | struct buffer_head *di_bh = NULL; | 260 | struct buffer_head *di_bh = NULL; |
264 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
265 | 261 | ||
266 | BUG_ON(!PageLocked(page)); | 262 | BUG_ON(!PageLocked(page)); |
267 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); | 263 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); |
268 | 264 | ||
269 | ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh, | 265 | ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); |
270 | OCFS2_BH_CACHED, inode); | ||
271 | if (ret) { | 266 | if (ret) { |
272 | mlog_errno(ret); | 267 | mlog_errno(ret); |
273 | goto out; | 268 | goto out; |
@@ -485,11 +480,14 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode, | |||
485 | } | 480 | } |
486 | 481 | ||
487 | if (ocfs2_should_order_data(inode)) { | 482 | if (ocfs2_should_order_data(inode)) { |
483 | ret = ocfs2_jbd2_file_inode(handle, inode); | ||
484 | #ifdef CONFIG_OCFS2_COMPAT_JBD | ||
488 | ret = walk_page_buffers(handle, | 485 | ret = walk_page_buffers(handle, |
489 | page_buffers(page), | 486 | page_buffers(page), |
490 | from, to, NULL, | 487 | from, to, NULL, |
491 | ocfs2_journal_dirty_data); | 488 | ocfs2_journal_dirty_data); |
492 | if (ret < 0) | 489 | #endif |
490 | if (ret < 0) | ||
493 | mlog_errno(ret); | 491 | mlog_errno(ret); |
494 | } | 492 | } |
495 | out: | 493 | out: |
@@ -594,7 +592,7 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | |||
594 | goto bail; | 592 | goto bail; |
595 | } | 593 | } |
596 | 594 | ||
597 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno) { | 595 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) { |
598 | ocfs2_error(inode->i_sb, | 596 | ocfs2_error(inode->i_sb, |
599 | "Inode %llu has a hole at block %llu\n", | 597 | "Inode %llu has a hole at block %llu\n", |
600 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 598 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
@@ -669,7 +667,7 @@ static void ocfs2_invalidatepage(struct page *page, unsigned long offset) | |||
669 | { | 667 | { |
670 | journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; | 668 | journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; |
671 | 669 | ||
672 | journal_invalidatepage(journal, page, offset); | 670 | jbd2_journal_invalidatepage(journal, page, offset); |
673 | } | 671 | } |
674 | 672 | ||
675 | static int ocfs2_releasepage(struct page *page, gfp_t wait) | 673 | static int ocfs2_releasepage(struct page *page, gfp_t wait) |
@@ -678,7 +676,7 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait) | |||
678 | 676 | ||
679 | if (!page_has_buffers(page)) | 677 | if (!page_has_buffers(page)) |
680 | return 0; | 678 | return 0; |
681 | return journal_try_to_free_buffers(journal, page, wait); | 679 | return jbd2_journal_try_to_free_buffers(journal, page, wait); |
682 | } | 680 | } |
683 | 681 | ||
684 | static ssize_t ocfs2_direct_IO(int rw, | 682 | static ssize_t ocfs2_direct_IO(int rw, |
@@ -1074,11 +1072,15 @@ static void ocfs2_write_failure(struct inode *inode, | |||
1074 | tmppage = wc->w_pages[i]; | 1072 | tmppage = wc->w_pages[i]; |
1075 | 1073 | ||
1076 | if (page_has_buffers(tmppage)) { | 1074 | if (page_has_buffers(tmppage)) { |
1077 | if (ocfs2_should_order_data(inode)) | 1075 | if (ocfs2_should_order_data(inode)) { |
1076 | ocfs2_jbd2_file_inode(wc->w_handle, inode); | ||
1077 | #ifdef CONFIG_OCFS2_COMPAT_JBD | ||
1078 | walk_page_buffers(wc->w_handle, | 1078 | walk_page_buffers(wc->w_handle, |
1079 | page_buffers(tmppage), | 1079 | page_buffers(tmppage), |
1080 | from, to, NULL, | 1080 | from, to, NULL, |
1081 | ocfs2_journal_dirty_data); | 1081 | ocfs2_journal_dirty_data); |
1082 | #endif | ||
1083 | } | ||
1082 | 1084 | ||
1083 | block_commit_write(tmppage, from, to); | 1085 | block_commit_write(tmppage, from, to); |
1084 | } | 1086 | } |
@@ -1242,6 +1244,7 @@ static int ocfs2_write_cluster(struct address_space *mapping, | |||
1242 | int ret, i, new, should_zero = 0; | 1244 | int ret, i, new, should_zero = 0; |
1243 | u64 v_blkno, p_blkno; | 1245 | u64 v_blkno, p_blkno; |
1244 | struct inode *inode = mapping->host; | 1246 | struct inode *inode = mapping->host; |
1247 | struct ocfs2_extent_tree et; | ||
1245 | 1248 | ||
1246 | new = phys == 0 ? 1 : 0; | 1249 | new = phys == 0 ? 1 : 0; |
1247 | if (new || unwritten) | 1250 | if (new || unwritten) |
@@ -1255,10 +1258,10 @@ static int ocfs2_write_cluster(struct address_space *mapping, | |||
1255 | * any additional semaphores or cluster locks. | 1258 | * any additional semaphores or cluster locks. |
1256 | */ | 1259 | */ |
1257 | tmp_pos = cpos; | 1260 | tmp_pos = cpos; |
1258 | ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode, | 1261 | ret = ocfs2_add_inode_data(OCFS2_SB(inode->i_sb), inode, |
1259 | &tmp_pos, 1, 0, wc->w_di_bh, | 1262 | &tmp_pos, 1, 0, wc->w_di_bh, |
1260 | wc->w_handle, data_ac, | 1263 | wc->w_handle, data_ac, |
1261 | meta_ac, NULL); | 1264 | meta_ac, NULL); |
1262 | /* | 1265 | /* |
1263 | * This shouldn't happen because we must have already | 1266 | * This shouldn't happen because we must have already |
1264 | * calculated the correct meta data allocation required. The | 1267 | * calculated the correct meta data allocation required. The |
@@ -1276,7 +1279,8 @@ static int ocfs2_write_cluster(struct address_space *mapping, | |||
1276 | goto out; | 1279 | goto out; |
1277 | } | 1280 | } |
1278 | } else if (unwritten) { | 1281 | } else if (unwritten) { |
1279 | ret = ocfs2_mark_extent_written(inode, wc->w_di_bh, | 1282 | ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh); |
1283 | ret = ocfs2_mark_extent_written(inode, &et, | ||
1280 | wc->w_handle, cpos, 1, phys, | 1284 | wc->w_handle, cpos, 1, phys, |
1281 | meta_ac, &wc->w_dealloc); | 1285 | meta_ac, &wc->w_dealloc); |
1282 | if (ret < 0) { | 1286 | if (ret < 0) { |
@@ -1665,6 +1669,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1665 | struct ocfs2_alloc_context *data_ac = NULL; | 1669 | struct ocfs2_alloc_context *data_ac = NULL; |
1666 | struct ocfs2_alloc_context *meta_ac = NULL; | 1670 | struct ocfs2_alloc_context *meta_ac = NULL; |
1667 | handle_t *handle; | 1671 | handle_t *handle; |
1672 | struct ocfs2_extent_tree et; | ||
1668 | 1673 | ||
1669 | ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); | 1674 | ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); |
1670 | if (ret) { | 1675 | if (ret) { |
@@ -1712,14 +1717,23 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1712 | * ocfs2_lock_allocators(). It greatly over-estimates | 1717 | * ocfs2_lock_allocators(). It greatly over-estimates |
1713 | * the work to be done. | 1718 | * the work to be done. |
1714 | */ | 1719 | */ |
1715 | ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc, | 1720 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u," |
1716 | extents_to_split, &data_ac, &meta_ac); | 1721 | " clusters_to_add = %u, extents_to_split = %u\n", |
1722 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
1723 | (long long)i_size_read(inode), le32_to_cpu(di->i_clusters), | ||
1724 | clusters_to_alloc, extents_to_split); | ||
1725 | |||
1726 | ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh); | ||
1727 | ret = ocfs2_lock_allocators(inode, &et, | ||
1728 | clusters_to_alloc, extents_to_split, | ||
1729 | &data_ac, &meta_ac); | ||
1717 | if (ret) { | 1730 | if (ret) { |
1718 | mlog_errno(ret); | 1731 | mlog_errno(ret); |
1719 | goto out; | 1732 | goto out; |
1720 | } | 1733 | } |
1721 | 1734 | ||
1722 | credits = ocfs2_calc_extend_credits(inode->i_sb, di, | 1735 | credits = ocfs2_calc_extend_credits(inode->i_sb, |
1736 | &di->id2.i_list, | ||
1723 | clusters_to_alloc); | 1737 | clusters_to_alloc); |
1724 | 1738 | ||
1725 | } | 1739 | } |
@@ -1905,11 +1919,15 @@ int ocfs2_write_end_nolock(struct address_space *mapping, | |||
1905 | } | 1919 | } |
1906 | 1920 | ||
1907 | if (page_has_buffers(tmppage)) { | 1921 | if (page_has_buffers(tmppage)) { |
1908 | if (ocfs2_should_order_data(inode)) | 1922 | if (ocfs2_should_order_data(inode)) { |
1923 | ocfs2_jbd2_file_inode(wc->w_handle, inode); | ||
1924 | #ifdef CONFIG_OCFS2_COMPAT_JBD | ||
1909 | walk_page_buffers(wc->w_handle, | 1925 | walk_page_buffers(wc->w_handle, |
1910 | page_buffers(tmppage), | 1926 | page_buffers(tmppage), |
1911 | from, to, NULL, | 1927 | from, to, NULL, |
1912 | ocfs2_journal_dirty_data); | 1928 | ocfs2_journal_dirty_data); |
1929 | #endif | ||
1930 | } | ||
1913 | block_commit_write(tmppage, from, to); | 1931 | block_commit_write(tmppage, from, to); |
1914 | } | 1932 | } |
1915 | } | 1933 | } |
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index f136639f5b41..7e947c672469 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c | |||
@@ -66,7 +66,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, | |||
66 | /* remove from dirty list before I/O. */ | 66 | /* remove from dirty list before I/O. */ |
67 | clear_buffer_dirty(bh); | 67 | clear_buffer_dirty(bh); |
68 | 68 | ||
69 | get_bh(bh); /* for end_buffer_write_sync() */ | 69 | get_bh(bh); /* for end_buffer_write_sync() */ |
70 | bh->b_end_io = end_buffer_write_sync; | 70 | bh->b_end_io = end_buffer_write_sync; |
71 | submit_bh(WRITE, bh); | 71 | submit_bh(WRITE, bh); |
72 | 72 | ||
@@ -88,22 +88,103 @@ out: | |||
88 | return ret; | 88 | return ret; |
89 | } | 89 | } |
90 | 90 | ||
91 | int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | 91 | int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, |
92 | struct buffer_head *bhs[], int flags, | 92 | unsigned int nr, struct buffer_head *bhs[]) |
93 | struct inode *inode) | 93 | { |
94 | int status = 0; | ||
95 | unsigned int i; | ||
96 | struct buffer_head *bh; | ||
97 | |||
98 | if (!nr) { | ||
99 | mlog(ML_BH_IO, "No buffers will be read!\n"); | ||
100 | goto bail; | ||
101 | } | ||
102 | |||
103 | for (i = 0 ; i < nr ; i++) { | ||
104 | if (bhs[i] == NULL) { | ||
105 | bhs[i] = sb_getblk(osb->sb, block++); | ||
106 | if (bhs[i] == NULL) { | ||
107 | status = -EIO; | ||
108 | mlog_errno(status); | ||
109 | goto bail; | ||
110 | } | ||
111 | } | ||
112 | bh = bhs[i]; | ||
113 | |||
114 | if (buffer_jbd(bh)) { | ||
115 | mlog(ML_ERROR, | ||
116 | "trying to sync read a jbd " | ||
117 | "managed bh (blocknr = %llu), skipping\n", | ||
118 | (unsigned long long)bh->b_blocknr); | ||
119 | continue; | ||
120 | } | ||
121 | |||
122 | if (buffer_dirty(bh)) { | ||
123 | /* This should probably be a BUG, or | ||
124 | * at least return an error. */ | ||
125 | mlog(ML_ERROR, | ||
126 | "trying to sync read a dirty " | ||
127 | "buffer! (blocknr = %llu), skipping\n", | ||
128 | (unsigned long long)bh->b_blocknr); | ||
129 | continue; | ||
130 | } | ||
131 | |||
132 | lock_buffer(bh); | ||
133 | if (buffer_jbd(bh)) { | ||
134 | mlog(ML_ERROR, | ||
135 | "block %llu had the JBD bit set " | ||
136 | "while I was in lock_buffer!", | ||
137 | (unsigned long long)bh->b_blocknr); | ||
138 | BUG(); | ||
139 | } | ||
140 | |||
141 | clear_buffer_uptodate(bh); | ||
142 | get_bh(bh); /* for end_buffer_read_sync() */ | ||
143 | bh->b_end_io = end_buffer_read_sync; | ||
144 | submit_bh(READ, bh); | ||
145 | } | ||
146 | |||
147 | for (i = nr; i > 0; i--) { | ||
148 | bh = bhs[i - 1]; | ||
149 | |||
150 | if (buffer_jbd(bh)) { | ||
151 | mlog(ML_ERROR, | ||
152 | "the journal got the buffer while it was " | ||
153 | "locked for io! (blocknr = %llu)\n", | ||
154 | (unsigned long long)bh->b_blocknr); | ||
155 | BUG(); | ||
156 | } | ||
157 | |||
158 | wait_on_buffer(bh); | ||
159 | if (!buffer_uptodate(bh)) { | ||
160 | /* Status won't be cleared from here on out, | ||
161 | * so we can safely record this and loop back | ||
162 | * to cleanup the other buffers. */ | ||
163 | status = -EIO; | ||
164 | put_bh(bh); | ||
165 | bhs[i - 1] = NULL; | ||
166 | } | ||
167 | } | ||
168 | |||
169 | bail: | ||
170 | return status; | ||
171 | } | ||
172 | |||
173 | int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, | ||
174 | struct buffer_head *bhs[], int flags) | ||
94 | { | 175 | { |
95 | int status = 0; | 176 | int status = 0; |
96 | struct super_block *sb; | ||
97 | int i, ignore_cache = 0; | 177 | int i, ignore_cache = 0; |
98 | struct buffer_head *bh; | 178 | struct buffer_head *bh; |
99 | 179 | ||
100 | mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n", | 180 | mlog_entry("(inode=%p, block=(%llu), nr=(%d), flags=%d)\n", |
101 | (unsigned long long)block, nr, flags, inode); | 181 | inode, (unsigned long long)block, nr, flags); |
102 | 182 | ||
183 | BUG_ON(!inode); | ||
103 | BUG_ON((flags & OCFS2_BH_READAHEAD) && | 184 | BUG_ON((flags & OCFS2_BH_READAHEAD) && |
104 | (!inode || !(flags & OCFS2_BH_CACHED))); | 185 | (flags & OCFS2_BH_IGNORE_CACHE)); |
105 | 186 | ||
106 | if (osb == NULL || osb->sb == NULL || bhs == NULL) { | 187 | if (bhs == NULL) { |
107 | status = -EINVAL; | 188 | status = -EINVAL; |
108 | mlog_errno(status); | 189 | mlog_errno(status); |
109 | goto bail; | 190 | goto bail; |
@@ -122,26 +203,19 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
122 | goto bail; | 203 | goto bail; |
123 | } | 204 | } |
124 | 205 | ||
125 | sb = osb->sb; | 206 | mutex_lock(&OCFS2_I(inode)->ip_io_mutex); |
126 | |||
127 | if (flags & OCFS2_BH_CACHED && !inode) | ||
128 | flags &= ~OCFS2_BH_CACHED; | ||
129 | |||
130 | if (inode) | ||
131 | mutex_lock(&OCFS2_I(inode)->ip_io_mutex); | ||
132 | for (i = 0 ; i < nr ; i++) { | 207 | for (i = 0 ; i < nr ; i++) { |
133 | if (bhs[i] == NULL) { | 208 | if (bhs[i] == NULL) { |
134 | bhs[i] = sb_getblk(sb, block++); | 209 | bhs[i] = sb_getblk(inode->i_sb, block++); |
135 | if (bhs[i] == NULL) { | 210 | if (bhs[i] == NULL) { |
136 | if (inode) | 211 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); |
137 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); | ||
138 | status = -EIO; | 212 | status = -EIO; |
139 | mlog_errno(status); | 213 | mlog_errno(status); |
140 | goto bail; | 214 | goto bail; |
141 | } | 215 | } |
142 | } | 216 | } |
143 | bh = bhs[i]; | 217 | bh = bhs[i]; |
144 | ignore_cache = 0; | 218 | ignore_cache = (flags & OCFS2_BH_IGNORE_CACHE); |
145 | 219 | ||
146 | /* There are three read-ahead cases here which we need to | 220 | /* There are three read-ahead cases here which we need to |
147 | * be concerned with. All three assume a buffer has | 221 | * be concerned with. All three assume a buffer has |
@@ -167,26 +241,27 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
167 | * before our is-it-in-flight check. | 241 | * before our is-it-in-flight check. |
168 | */ | 242 | */ |
169 | 243 | ||
170 | if (flags & OCFS2_BH_CACHED && | 244 | if (!ignore_cache && !ocfs2_buffer_uptodate(inode, bh)) { |
171 | !ocfs2_buffer_uptodate(inode, bh)) { | ||
172 | mlog(ML_UPTODATE, | 245 | mlog(ML_UPTODATE, |
173 | "bh (%llu), inode %llu not uptodate\n", | 246 | "bh (%llu), inode %llu not uptodate\n", |
174 | (unsigned long long)bh->b_blocknr, | 247 | (unsigned long long)bh->b_blocknr, |
175 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 248 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
249 | /* We're using ignore_cache here to say | ||
250 | * "go to disk" */ | ||
176 | ignore_cache = 1; | 251 | ignore_cache = 1; |
177 | } | 252 | } |
178 | 253 | ||
179 | /* XXX: Can we ever get this and *not* have the cached | 254 | /* XXX: Can we ever get this and *not* have the cached |
180 | * flag set? */ | 255 | * flag set? */ |
181 | if (buffer_jbd(bh)) { | 256 | if (buffer_jbd(bh)) { |
182 | if (!(flags & OCFS2_BH_CACHED) || ignore_cache) | 257 | if (ignore_cache) |
183 | mlog(ML_BH_IO, "trying to sync read a jbd " | 258 | mlog(ML_BH_IO, "trying to sync read a jbd " |
184 | "managed bh (blocknr = %llu)\n", | 259 | "managed bh (blocknr = %llu)\n", |
185 | (unsigned long long)bh->b_blocknr); | 260 | (unsigned long long)bh->b_blocknr); |
186 | continue; | 261 | continue; |
187 | } | 262 | } |
188 | 263 | ||
189 | if (!(flags & OCFS2_BH_CACHED) || ignore_cache) { | 264 | if (ignore_cache) { |
190 | if (buffer_dirty(bh)) { | 265 | if (buffer_dirty(bh)) { |
191 | /* This should probably be a BUG, or | 266 | /* This should probably be a BUG, or |
192 | * at least return an error. */ | 267 | * at least return an error. */ |
@@ -221,7 +296,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
221 | * previously read-ahead buffer may have | 296 | * previously read-ahead buffer may have |
222 | * completed I/O while we were waiting for the | 297 | * completed I/O while we were waiting for the |
223 | * buffer lock. */ | 298 | * buffer lock. */ |
224 | if ((flags & OCFS2_BH_CACHED) | 299 | if (!(flags & OCFS2_BH_IGNORE_CACHE) |
225 | && !(flags & OCFS2_BH_READAHEAD) | 300 | && !(flags & OCFS2_BH_READAHEAD) |
226 | && ocfs2_buffer_uptodate(inode, bh)) { | 301 | && ocfs2_buffer_uptodate(inode, bh)) { |
227 | unlock_buffer(bh); | 302 | unlock_buffer(bh); |
@@ -265,15 +340,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
265 | /* Always set the buffer in the cache, even if it was | 340 | /* Always set the buffer in the cache, even if it was |
266 | * a forced read, or read-ahead which hasn't yet | 341 | * a forced read, or read-ahead which hasn't yet |
267 | * completed. */ | 342 | * completed. */ |
268 | if (inode) | 343 | ocfs2_set_buffer_uptodate(inode, bh); |
269 | ocfs2_set_buffer_uptodate(inode, bh); | ||
270 | } | 344 | } |
271 | if (inode) | 345 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); |
272 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); | ||
273 | 346 | ||
274 | mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", | 347 | mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", |
275 | (unsigned long long)block, nr, | 348 | (unsigned long long)block, nr, |
276 | (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags); | 349 | ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes", |
350 | flags); | ||
277 | 351 | ||
278 | bail: | 352 | bail: |
279 | 353 | ||
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h index c2e78614c3e5..75e1dcb1ade7 100644 --- a/fs/ocfs2/buffer_head_io.h +++ b/fs/ocfs2/buffer_head_io.h | |||
@@ -31,31 +31,29 @@ | |||
31 | void ocfs2_end_buffer_io_sync(struct buffer_head *bh, | 31 | void ocfs2_end_buffer_io_sync(struct buffer_head *bh, |
32 | int uptodate); | 32 | int uptodate); |
33 | 33 | ||
34 | static inline int ocfs2_read_block(struct ocfs2_super *osb, | 34 | static inline int ocfs2_read_block(struct inode *inode, |
35 | u64 off, | 35 | u64 off, |
36 | struct buffer_head **bh, | 36 | struct buffer_head **bh); |
37 | int flags, | ||
38 | struct inode *inode); | ||
39 | 37 | ||
40 | int ocfs2_write_block(struct ocfs2_super *osb, | 38 | int ocfs2_write_block(struct ocfs2_super *osb, |
41 | struct buffer_head *bh, | 39 | struct buffer_head *bh, |
42 | struct inode *inode); | 40 | struct inode *inode); |
43 | int ocfs2_read_blocks(struct ocfs2_super *osb, | 41 | int ocfs2_read_blocks(struct inode *inode, |
44 | u64 block, | 42 | u64 block, |
45 | int nr, | 43 | int nr, |
46 | struct buffer_head *bhs[], | 44 | struct buffer_head *bhs[], |
47 | int flags, | 45 | int flags); |
48 | struct inode *inode); | 46 | int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, |
47 | unsigned int nr, struct buffer_head *bhs[]); | ||
49 | 48 | ||
50 | int ocfs2_write_super_or_backup(struct ocfs2_super *osb, | 49 | int ocfs2_write_super_or_backup(struct ocfs2_super *osb, |
51 | struct buffer_head *bh); | 50 | struct buffer_head *bh); |
52 | 51 | ||
53 | #define OCFS2_BH_CACHED 1 | 52 | #define OCFS2_BH_IGNORE_CACHE 1 |
54 | #define OCFS2_BH_READAHEAD 8 | 53 | #define OCFS2_BH_READAHEAD 8 |
55 | 54 | ||
56 | static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off, | 55 | static inline int ocfs2_read_block(struct inode *inode, u64 off, |
57 | struct buffer_head **bh, int flags, | 56 | struct buffer_head **bh) |
58 | struct inode *inode) | ||
59 | { | 57 | { |
60 | int status = 0; | 58 | int status = 0; |
61 | 59 | ||
@@ -65,8 +63,7 @@ static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off, | |||
65 | goto bail; | 63 | goto bail; |
66 | } | 64 | } |
67 | 65 | ||
68 | status = ocfs2_read_blocks(osb, off, 1, bh, | 66 | status = ocfs2_read_blocks(inode, off, 1, bh, 0); |
69 | flags, inode); | ||
70 | 67 | ||
71 | bail: | 68 | bail: |
72 | return status; | 69 | return status; |
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c index 23c732f27529..d8a0cb92cef6 100644 --- a/fs/ocfs2/cluster/masklog.c +++ b/fs/ocfs2/cluster/masklog.c | |||
@@ -109,6 +109,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = { | |||
109 | define_mask(CONN), | 109 | define_mask(CONN), |
110 | define_mask(QUORUM), | 110 | define_mask(QUORUM), |
111 | define_mask(EXPORT), | 111 | define_mask(EXPORT), |
112 | define_mask(XATTR), | ||
112 | define_mask(ERROR), | 113 | define_mask(ERROR), |
113 | define_mask(NOTICE), | 114 | define_mask(NOTICE), |
114 | define_mask(KTHREAD), | 115 | define_mask(KTHREAD), |
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 597e064bb94f..57670c680471 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h | |||
@@ -112,6 +112,7 @@ | |||
112 | #define ML_CONN 0x0000000004000000ULL /* net connection management */ | 112 | #define ML_CONN 0x0000000004000000ULL /* net connection management */ |
113 | #define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */ | 113 | #define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */ |
114 | #define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */ | 114 | #define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */ |
115 | #define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ | ||
115 | /* bits that are infrequently given and frequently matched in the high word */ | 116 | /* bits that are infrequently given and frequently matched in the high word */ |
116 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ | 117 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ |
117 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ | 118 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ |
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index d8bfa0eb41b2..52276c02f710 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c | |||
@@ -138,20 +138,20 @@ static int nst_seq_show(struct seq_file *seq, void *v) | |||
138 | " message id: %d\n" | 138 | " message id: %d\n" |
139 | " message type: %u\n" | 139 | " message type: %u\n" |
140 | " message key: 0x%08x\n" | 140 | " message key: 0x%08x\n" |
141 | " sock acquiry: %lu.%lu\n" | 141 | " sock acquiry: %lu.%ld\n" |
142 | " send start: %lu.%lu\n" | 142 | " send start: %lu.%ld\n" |
143 | " wait start: %lu.%lu\n", | 143 | " wait start: %lu.%ld\n", |
144 | nst, (unsigned long)nst->st_task->pid, | 144 | nst, (unsigned long)nst->st_task->pid, |
145 | (unsigned long)nst->st_task->tgid, | 145 | (unsigned long)nst->st_task->tgid, |
146 | nst->st_task->comm, nst->st_node, | 146 | nst->st_task->comm, nst->st_node, |
147 | nst->st_sc, nst->st_id, nst->st_msg_type, | 147 | nst->st_sc, nst->st_id, nst->st_msg_type, |
148 | nst->st_msg_key, | 148 | nst->st_msg_key, |
149 | nst->st_sock_time.tv_sec, | 149 | nst->st_sock_time.tv_sec, |
150 | (unsigned long)nst->st_sock_time.tv_usec, | 150 | (long)nst->st_sock_time.tv_usec, |
151 | nst->st_send_time.tv_sec, | 151 | nst->st_send_time.tv_sec, |
152 | (unsigned long)nst->st_send_time.tv_usec, | 152 | (long)nst->st_send_time.tv_usec, |
153 | nst->st_status_time.tv_sec, | 153 | nst->st_status_time.tv_sec, |
154 | nst->st_status_time.tv_usec); | 154 | (long)nst->st_status_time.tv_usec); |
155 | } | 155 | } |
156 | 156 | ||
157 | spin_unlock(&o2net_debug_lock); | 157 | spin_unlock(&o2net_debug_lock); |
@@ -276,7 +276,7 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
276 | return sc; /* unused, just needs to be null when done */ | 276 | return sc; /* unused, just needs to be null when done */ |
277 | } | 277 | } |
278 | 278 | ||
279 | #define TV_SEC_USEC(TV) TV.tv_sec, (unsigned long)TV.tv_usec | 279 | #define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec |
280 | 280 | ||
281 | static int sc_seq_show(struct seq_file *seq, void *v) | 281 | static int sc_seq_show(struct seq_file *seq, void *v) |
282 | { | 282 | { |
@@ -309,12 +309,12 @@ static int sc_seq_show(struct seq_file *seq, void *v) | |||
309 | " remote node: %s\n" | 309 | " remote node: %s\n" |
310 | " page off: %zu\n" | 310 | " page off: %zu\n" |
311 | " handshake ok: %u\n" | 311 | " handshake ok: %u\n" |
312 | " timer: %lu.%lu\n" | 312 | " timer: %lu.%ld\n" |
313 | " data ready: %lu.%lu\n" | 313 | " data ready: %lu.%ld\n" |
314 | " advance start: %lu.%lu\n" | 314 | " advance start: %lu.%ld\n" |
315 | " advance stop: %lu.%lu\n" | 315 | " advance stop: %lu.%ld\n" |
316 | " func start: %lu.%lu\n" | 316 | " func start: %lu.%ld\n" |
317 | " func stop: %lu.%lu\n" | 317 | " func stop: %lu.%ld\n" |
318 | " func key: %u\n" | 318 | " func key: %u\n" |
319 | " func type: %u\n", | 319 | " func type: %u\n", |
320 | sc, | 320 | sc, |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index a27d61581bd6..2bcf706d9dd3 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -143,8 +143,8 @@ static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); | |||
143 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); | 143 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); |
144 | 144 | ||
145 | #ifdef CONFIG_DEBUG_FS | 145 | #ifdef CONFIG_DEBUG_FS |
146 | void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | 146 | static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, |
147 | u32 msgkey, struct task_struct *task, u8 node) | 147 | u32 msgkey, struct task_struct *task, u8 node) |
148 | { | 148 | { |
149 | INIT_LIST_HEAD(&nst->st_net_debug_item); | 149 | INIT_LIST_HEAD(&nst->st_net_debug_item); |
150 | nst->st_task = task; | 150 | nst->st_task = task; |
@@ -153,31 +153,61 @@ void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | |||
153 | nst->st_node = node; | 153 | nst->st_node = node; |
154 | } | 154 | } |
155 | 155 | ||
156 | void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) | 156 | static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) |
157 | { | 157 | { |
158 | do_gettimeofday(&nst->st_sock_time); | 158 | do_gettimeofday(&nst->st_sock_time); |
159 | } | 159 | } |
160 | 160 | ||
161 | void o2net_set_nst_send_time(struct o2net_send_tracking *nst) | 161 | static void o2net_set_nst_send_time(struct o2net_send_tracking *nst) |
162 | { | 162 | { |
163 | do_gettimeofday(&nst->st_send_time); | 163 | do_gettimeofday(&nst->st_send_time); |
164 | } | 164 | } |
165 | 165 | ||
166 | void o2net_set_nst_status_time(struct o2net_send_tracking *nst) | 166 | static void o2net_set_nst_status_time(struct o2net_send_tracking *nst) |
167 | { | 167 | { |
168 | do_gettimeofday(&nst->st_status_time); | 168 | do_gettimeofday(&nst->st_status_time); |
169 | } | 169 | } |
170 | 170 | ||
171 | void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | 171 | static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, |
172 | struct o2net_sock_container *sc) | 172 | struct o2net_sock_container *sc) |
173 | { | 173 | { |
174 | nst->st_sc = sc; | 174 | nst->st_sc = sc; |
175 | } | 175 | } |
176 | 176 | ||
177 | void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) | 177 | static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) |
178 | { | 178 | { |
179 | nst->st_id = msg_id; | 179 | nst->st_id = msg_id; |
180 | } | 180 | } |
181 | |||
182 | #else /* CONFIG_DEBUG_FS */ | ||
183 | |||
184 | static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | ||
185 | u32 msgkey, struct task_struct *task, u8 node) | ||
186 | { | ||
187 | } | ||
188 | |||
189 | static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) | ||
190 | { | ||
191 | } | ||
192 | |||
193 | static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) | ||
194 | { | ||
195 | } | ||
196 | |||
197 | static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) | ||
198 | { | ||
199 | } | ||
200 | |||
201 | static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | ||
202 | struct o2net_sock_container *sc) | ||
203 | { | ||
204 | } | ||
205 | |||
206 | static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, | ||
207 | u32 msg_id) | ||
208 | { | ||
209 | } | ||
210 | |||
181 | #endif /* CONFIG_DEBUG_FS */ | 211 | #endif /* CONFIG_DEBUG_FS */ |
182 | 212 | ||
183 | static inline int o2net_reconnect_delay(void) | 213 | static inline int o2net_reconnect_delay(void) |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 18307ff81b77..8d58cfe410b1 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
@@ -224,42 +224,10 @@ struct o2net_send_tracking { | |||
224 | struct timeval st_send_time; | 224 | struct timeval st_send_time; |
225 | struct timeval st_status_time; | 225 | struct timeval st_status_time; |
226 | }; | 226 | }; |
227 | |||
228 | void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | ||
229 | u32 msgkey, struct task_struct *task, u8 node); | ||
230 | void o2net_set_nst_sock_time(struct o2net_send_tracking *nst); | ||
231 | void o2net_set_nst_send_time(struct o2net_send_tracking *nst); | ||
232 | void o2net_set_nst_status_time(struct o2net_send_tracking *nst); | ||
233 | void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | ||
234 | struct o2net_sock_container *sc); | ||
235 | void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id); | ||
236 | |||
237 | #else | 227 | #else |
238 | struct o2net_send_tracking { | 228 | struct o2net_send_tracking { |
239 | u32 dummy; | 229 | u32 dummy; |
240 | }; | 230 | }; |
241 | |||
242 | static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | ||
243 | u32 msgkey, struct task_struct *task, u8 node) | ||
244 | { | ||
245 | } | ||
246 | static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) | ||
247 | { | ||
248 | } | ||
249 | static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) | ||
250 | { | ||
251 | } | ||
252 | static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) | ||
253 | { | ||
254 | } | ||
255 | static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | ||
256 | struct o2net_sock_container *sc) | ||
257 | { | ||
258 | } | ||
259 | static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, | ||
260 | u32 msg_id) | ||
261 | { | ||
262 | } | ||
263 | #endif /* CONFIG_DEBUG_FS */ | 231 | #endif /* CONFIG_DEBUG_FS */ |
264 | 232 | ||
265 | #endif /* O2CLUSTER_TCP_INTERNAL_H */ | 233 | #endif /* O2CLUSTER_TCP_INTERNAL_H */ |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 8a1875848080..026e6eb85187 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -82,6 +82,49 @@ static int ocfs2_do_extend_dir(struct super_block *sb, | |||
82 | struct ocfs2_alloc_context *meta_ac, | 82 | struct ocfs2_alloc_context *meta_ac, |
83 | struct buffer_head **new_bh); | 83 | struct buffer_head **new_bh); |
84 | 84 | ||
85 | static struct buffer_head *ocfs2_bread(struct inode *inode, | ||
86 | int block, int *err, int reada) | ||
87 | { | ||
88 | struct buffer_head *bh = NULL; | ||
89 | int tmperr; | ||
90 | u64 p_blkno; | ||
91 | int readflags = 0; | ||
92 | |||
93 | if (reada) | ||
94 | readflags |= OCFS2_BH_READAHEAD; | ||
95 | |||
96 | if (((u64)block << inode->i_sb->s_blocksize_bits) >= | ||
97 | i_size_read(inode)) { | ||
98 | BUG_ON(!reada); | ||
99 | return NULL; | ||
100 | } | ||
101 | |||
102 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
103 | tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, | ||
104 | NULL); | ||
105 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
106 | if (tmperr < 0) { | ||
107 | mlog_errno(tmperr); | ||
108 | goto fail; | ||
109 | } | ||
110 | |||
111 | tmperr = ocfs2_read_blocks(inode, p_blkno, 1, &bh, readflags); | ||
112 | if (tmperr < 0) | ||
113 | goto fail; | ||
114 | |||
115 | tmperr = 0; | ||
116 | |||
117 | *err = 0; | ||
118 | return bh; | ||
119 | |||
120 | fail: | ||
121 | brelse(bh); | ||
122 | bh = NULL; | ||
123 | |||
124 | *err = -EIO; | ||
125 | return NULL; | ||
126 | } | ||
127 | |||
85 | /* | 128 | /* |
86 | * bh passed here can be an inode block or a dir data block, depending | 129 | * bh passed here can be an inode block or a dir data block, depending |
87 | * on the inode inline data flag. | 130 | * on the inode inline data flag. |
@@ -188,8 +231,7 @@ static struct buffer_head *ocfs2_find_entry_id(const char *name, | |||
188 | struct ocfs2_dinode *di; | 231 | struct ocfs2_dinode *di; |
189 | struct ocfs2_inline_data *data; | 232 | struct ocfs2_inline_data *data; |
190 | 233 | ||
191 | ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno, | 234 | ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh); |
192 | &di_bh, OCFS2_BH_CACHED, dir); | ||
193 | if (ret) { | 235 | if (ret) { |
194 | mlog_errno(ret); | 236 | mlog_errno(ret); |
195 | goto out; | 237 | goto out; |
@@ -260,14 +302,13 @@ restart: | |||
260 | } | 302 | } |
261 | if ((bh = bh_use[ra_ptr++]) == NULL) | 303 | if ((bh = bh_use[ra_ptr++]) == NULL) |
262 | goto next; | 304 | goto next; |
263 | wait_on_buffer(bh); | 305 | if (ocfs2_read_block(dir, block, &bh)) { |
264 | if (!buffer_uptodate(bh)) { | 306 | /* read error, skip block & hope for the best. |
265 | /* read error, skip block & hope for the best */ | 307 | * ocfs2_read_block() has released the bh. */ |
266 | ocfs2_error(dir->i_sb, "reading directory %llu, " | 308 | ocfs2_error(dir->i_sb, "reading directory %llu, " |
267 | "offset %lu\n", | 309 | "offset %lu\n", |
268 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | 310 | (unsigned long long)OCFS2_I(dir)->ip_blkno, |
269 | block); | 311 | block); |
270 | brelse(bh); | ||
271 | goto next; | 312 | goto next; |
272 | } | 313 | } |
273 | i = ocfs2_search_dirblock(bh, dir, name, namelen, | 314 | i = ocfs2_search_dirblock(bh, dir, name, namelen, |
@@ -417,8 +458,7 @@ static inline int ocfs2_delete_entry_id(handle_t *handle, | |||
417 | struct ocfs2_dinode *di; | 458 | struct ocfs2_dinode *di; |
418 | struct ocfs2_inline_data *data; | 459 | struct ocfs2_inline_data *data; |
419 | 460 | ||
420 | ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno, | 461 | ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh); |
421 | &di_bh, OCFS2_BH_CACHED, dir); | ||
422 | if (ret) { | 462 | if (ret) { |
423 | mlog_errno(ret); | 463 | mlog_errno(ret); |
424 | goto out; | 464 | goto out; |
@@ -596,8 +636,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode, | |||
596 | struct ocfs2_inline_data *data; | 636 | struct ocfs2_inline_data *data; |
597 | struct ocfs2_dir_entry *de; | 637 | struct ocfs2_dir_entry *de; |
598 | 638 | ||
599 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, | 639 | ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); |
600 | &di_bh, OCFS2_BH_CACHED, inode); | ||
601 | if (ret) { | 640 | if (ret) { |
602 | mlog(ML_ERROR, "Unable to read inode block for dir %llu\n", | 641 | mlog(ML_ERROR, "Unable to read inode block for dir %llu\n", |
603 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 642 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
@@ -716,8 +755,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, | |||
716 | for (i = ra_sectors >> (sb->s_blocksize_bits - 9); | 755 | for (i = ra_sectors >> (sb->s_blocksize_bits - 9); |
717 | i > 0; i--) { | 756 | i > 0; i--) { |
718 | tmp = ocfs2_bread(inode, ++blk, &err, 1); | 757 | tmp = ocfs2_bread(inode, ++blk, &err, 1); |
719 | if (tmp) | 758 | brelse(tmp); |
720 | brelse(tmp); | ||
721 | } | 759 | } |
722 | last_ra_blk = blk; | 760 | last_ra_blk = blk; |
723 | ra_sectors = 8; | 761 | ra_sectors = 8; |
@@ -899,10 +937,8 @@ int ocfs2_find_files_on_disk(const char *name, | |||
899 | leave: | 937 | leave: |
900 | if (status < 0) { | 938 | if (status < 0) { |
901 | *dirent = NULL; | 939 | *dirent = NULL; |
902 | if (*dirent_bh) { | 940 | brelse(*dirent_bh); |
903 | brelse(*dirent_bh); | 941 | *dirent_bh = NULL; |
904 | *dirent_bh = NULL; | ||
905 | } | ||
906 | } | 942 | } |
907 | 943 | ||
908 | mlog_exit(status); | 944 | mlog_exit(status); |
@@ -951,8 +987,7 @@ int ocfs2_check_dir_for_entry(struct inode *dir, | |||
951 | 987 | ||
952 | ret = 0; | 988 | ret = 0; |
953 | bail: | 989 | bail: |
954 | if (dirent_bh) | 990 | brelse(dirent_bh); |
955 | brelse(dirent_bh); | ||
956 | 991 | ||
957 | mlog_exit(ret); | 992 | mlog_exit(ret); |
958 | return ret; | 993 | return ret; |
@@ -1127,8 +1162,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, | |||
1127 | 1162 | ||
1128 | status = 0; | 1163 | status = 0; |
1129 | bail: | 1164 | bail: |
1130 | if (new_bh) | 1165 | brelse(new_bh); |
1131 | brelse(new_bh); | ||
1132 | 1166 | ||
1133 | mlog_exit(status); | 1167 | mlog_exit(status); |
1134 | return status; | 1168 | return status; |
@@ -1192,6 +1226,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1192 | struct buffer_head *dirdata_bh = NULL; | 1226 | struct buffer_head *dirdata_bh = NULL; |
1193 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 1227 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; |
1194 | handle_t *handle; | 1228 | handle_t *handle; |
1229 | struct ocfs2_extent_tree et; | ||
1230 | |||
1231 | ocfs2_init_dinode_extent_tree(&et, dir, di_bh); | ||
1195 | 1232 | ||
1196 | alloc = ocfs2_clusters_for_bytes(sb, bytes); | 1233 | alloc = ocfs2_clusters_for_bytes(sb, bytes); |
1197 | 1234 | ||
@@ -1300,19 +1337,24 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1300 | di->i_size = cpu_to_le64(sb->s_blocksize); | 1337 | di->i_size = cpu_to_le64(sb->s_blocksize); |
1301 | di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); | 1338 | di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); |
1302 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); | 1339 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); |
1303 | dir->i_blocks = ocfs2_inode_sector_count(dir); | ||
1304 | 1340 | ||
1305 | /* | 1341 | /* |
1306 | * This should never fail as our extent list is empty and all | 1342 | * This should never fail as our extent list is empty and all |
1307 | * related blocks have been journaled already. | 1343 | * related blocks have been journaled already. |
1308 | */ | 1344 | */ |
1309 | ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 0, blkno, len, 0, | 1345 | ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, blkno, len, |
1310 | NULL); | 1346 | 0, NULL); |
1311 | if (ret) { | 1347 | if (ret) { |
1312 | mlog_errno(ret); | 1348 | mlog_errno(ret); |
1313 | goto out; | 1349 | goto out_commit; |
1314 | } | 1350 | } |
1315 | 1351 | ||
1352 | /* | ||
1353 | * Set i_blocks after the extent insert for the most up to | ||
1354 | * date ip_clusters value. | ||
1355 | */ | ||
1356 | dir->i_blocks = ocfs2_inode_sector_count(dir); | ||
1357 | |||
1316 | ret = ocfs2_journal_dirty(handle, di_bh); | 1358 | ret = ocfs2_journal_dirty(handle, di_bh); |
1317 | if (ret) { | 1359 | if (ret) { |
1318 | mlog_errno(ret); | 1360 | mlog_errno(ret); |
@@ -1332,11 +1374,11 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1332 | } | 1374 | } |
1333 | blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off); | 1375 | blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off); |
1334 | 1376 | ||
1335 | ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 1, blkno, | 1377 | ret = ocfs2_insert_extent(osb, handle, dir, &et, 1, |
1336 | len, 0, NULL); | 1378 | blkno, len, 0, NULL); |
1337 | if (ret) { | 1379 | if (ret) { |
1338 | mlog_errno(ret); | 1380 | mlog_errno(ret); |
1339 | goto out; | 1381 | goto out_commit; |
1340 | } | 1382 | } |
1341 | } | 1383 | } |
1342 | 1384 | ||
@@ -1378,9 +1420,9 @@ static int ocfs2_do_extend_dir(struct super_block *sb, | |||
1378 | if (extend) { | 1420 | if (extend) { |
1379 | u32 offset = OCFS2_I(dir)->ip_clusters; | 1421 | u32 offset = OCFS2_I(dir)->ip_clusters; |
1380 | 1422 | ||
1381 | status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset, | 1423 | status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset, |
1382 | 1, 0, parent_fe_bh, handle, | 1424 | 1, 0, parent_fe_bh, handle, |
1383 | data_ac, meta_ac, NULL); | 1425 | data_ac, meta_ac, NULL); |
1384 | BUG_ON(status == -EAGAIN); | 1426 | BUG_ON(status == -EAGAIN); |
1385 | if (status < 0) { | 1427 | if (status < 0) { |
1386 | mlog_errno(status); | 1428 | mlog_errno(status); |
@@ -1425,12 +1467,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
1425 | int credits, num_free_extents, drop_alloc_sem = 0; | 1467 | int credits, num_free_extents, drop_alloc_sem = 0; |
1426 | loff_t dir_i_size; | 1468 | loff_t dir_i_size; |
1427 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; | 1469 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; |
1470 | struct ocfs2_extent_list *el = &fe->id2.i_list; | ||
1428 | struct ocfs2_alloc_context *data_ac = NULL; | 1471 | struct ocfs2_alloc_context *data_ac = NULL; |
1429 | struct ocfs2_alloc_context *meta_ac = NULL; | 1472 | struct ocfs2_alloc_context *meta_ac = NULL; |
1430 | handle_t *handle = NULL; | 1473 | handle_t *handle = NULL; |
1431 | struct buffer_head *new_bh = NULL; | 1474 | struct buffer_head *new_bh = NULL; |
1432 | struct ocfs2_dir_entry * de; | 1475 | struct ocfs2_dir_entry * de; |
1433 | struct super_block *sb = osb->sb; | 1476 | struct super_block *sb = osb->sb; |
1477 | struct ocfs2_extent_tree et; | ||
1434 | 1478 | ||
1435 | mlog_entry_void(); | 1479 | mlog_entry_void(); |
1436 | 1480 | ||
@@ -1474,7 +1518,8 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
1474 | spin_lock(&OCFS2_I(dir)->ip_lock); | 1518 | spin_lock(&OCFS2_I(dir)->ip_lock); |
1475 | if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) { | 1519 | if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) { |
1476 | spin_unlock(&OCFS2_I(dir)->ip_lock); | 1520 | spin_unlock(&OCFS2_I(dir)->ip_lock); |
1477 | num_free_extents = ocfs2_num_free_extents(osb, dir, fe); | 1521 | ocfs2_init_dinode_extent_tree(&et, dir, parent_fe_bh); |
1522 | num_free_extents = ocfs2_num_free_extents(osb, dir, &et); | ||
1478 | if (num_free_extents < 0) { | 1523 | if (num_free_extents < 0) { |
1479 | status = num_free_extents; | 1524 | status = num_free_extents; |
1480 | mlog_errno(status); | 1525 | mlog_errno(status); |
@@ -1482,7 +1527,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
1482 | } | 1527 | } |
1483 | 1528 | ||
1484 | if (!num_free_extents) { | 1529 | if (!num_free_extents) { |
1485 | status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac); | 1530 | status = ocfs2_reserve_new_metadata(osb, el, &meta_ac); |
1486 | if (status < 0) { | 1531 | if (status < 0) { |
1487 | if (status != -ENOSPC) | 1532 | if (status != -ENOSPC) |
1488 | mlog_errno(status); | 1533 | mlog_errno(status); |
@@ -1497,7 +1542,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
1497 | goto bail; | 1542 | goto bail; |
1498 | } | 1543 | } |
1499 | 1544 | ||
1500 | credits = ocfs2_calc_extend_credits(sb, fe, 1); | 1545 | credits = ocfs2_calc_extend_credits(sb, el, 1); |
1501 | } else { | 1546 | } else { |
1502 | spin_unlock(&OCFS2_I(dir)->ip_lock); | 1547 | spin_unlock(&OCFS2_I(dir)->ip_lock); |
1503 | credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; | 1548 | credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; |
@@ -1563,8 +1608,7 @@ bail: | |||
1563 | if (meta_ac) | 1608 | if (meta_ac) |
1564 | ocfs2_free_alloc_context(meta_ac); | 1609 | ocfs2_free_alloc_context(meta_ac); |
1565 | 1610 | ||
1566 | if (new_bh) | 1611 | brelse(new_bh); |
1567 | brelse(new_bh); | ||
1568 | 1612 | ||
1569 | mlog_exit(status); | 1613 | mlog_exit(status); |
1570 | return status; | 1614 | return status; |
@@ -1691,8 +1735,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, | |||
1691 | 1735 | ||
1692 | status = 0; | 1736 | status = 0; |
1693 | bail: | 1737 | bail: |
1694 | if (bh) | 1738 | brelse(bh); |
1695 | brelse(bh); | ||
1696 | 1739 | ||
1697 | mlog_exit(status); | 1740 | mlog_exit(status); |
1698 | return status; | 1741 | return status; |
@@ -1751,7 +1794,6 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, | |||
1751 | *ret_de_bh = bh; | 1794 | *ret_de_bh = bh; |
1752 | bh = NULL; | 1795 | bh = NULL; |
1753 | out: | 1796 | out: |
1754 | if (bh) | 1797 | brelse(bh); |
1755 | brelse(bh); | ||
1756 | return ret; | 1798 | return ret; |
1757 | } | 1799 | } |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index eae3d643a5e4..ec684426034b 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -2024,8 +2024,7 @@ static int ocfs2_inode_lock_update(struct inode *inode, | |||
2024 | } else { | 2024 | } else { |
2025 | /* Boo, we have to go to disk. */ | 2025 | /* Boo, we have to go to disk. */ |
2026 | /* read bh, cast, ocfs2_refresh_inode */ | 2026 | /* read bh, cast, ocfs2_refresh_inode */ |
2027 | status = ocfs2_read_block(OCFS2_SB(inode->i_sb), oi->ip_blkno, | 2027 | status = ocfs2_read_block(inode, oi->ip_blkno, bh); |
2028 | bh, OCFS2_BH_CACHED, inode); | ||
2029 | if (status < 0) { | 2028 | if (status < 0) { |
2030 | mlog_errno(status); | 2029 | mlog_errno(status); |
2031 | goto bail_refresh; | 2030 | goto bail_refresh; |
@@ -2086,11 +2085,7 @@ static int ocfs2_assign_bh(struct inode *inode, | |||
2086 | return 0; | 2085 | return 0; |
2087 | } | 2086 | } |
2088 | 2087 | ||
2089 | status = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 2088 | status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, ret_bh); |
2090 | OCFS2_I(inode)->ip_blkno, | ||
2091 | ret_bh, | ||
2092 | OCFS2_BH_CACHED, | ||
2093 | inode); | ||
2094 | if (status < 0) | 2089 | if (status < 0) |
2095 | mlog_errno(status); | 2090 | mlog_errno(status); |
2096 | 2091 | ||
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index c58668a326fe..2baedac58234 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/fs.h> | 25 | #include <linux/fs.h> |
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/types.h> | 27 | #include <linux/types.h> |
28 | #include <linux/fiemap.h> | ||
28 | 29 | ||
29 | #define MLOG_MASK_PREFIX ML_EXTENT_MAP | 30 | #define MLOG_MASK_PREFIX ML_EXTENT_MAP |
30 | #include <cluster/masklog.h> | 31 | #include <cluster/masklog.h> |
@@ -32,6 +33,7 @@ | |||
32 | #include "ocfs2.h" | 33 | #include "ocfs2.h" |
33 | 34 | ||
34 | #include "alloc.h" | 35 | #include "alloc.h" |
36 | #include "dlmglue.h" | ||
35 | #include "extent_map.h" | 37 | #include "extent_map.h" |
36 | #include "inode.h" | 38 | #include "inode.h" |
37 | #include "super.h" | 39 | #include "super.h" |
@@ -282,6 +284,50 @@ out: | |||
282 | kfree(new_emi); | 284 | kfree(new_emi); |
283 | } | 285 | } |
284 | 286 | ||
287 | static int ocfs2_last_eb_is_empty(struct inode *inode, | ||
288 | struct ocfs2_dinode *di) | ||
289 | { | ||
290 | int ret, next_free; | ||
291 | u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk); | ||
292 | struct buffer_head *eb_bh = NULL; | ||
293 | struct ocfs2_extent_block *eb; | ||
294 | struct ocfs2_extent_list *el; | ||
295 | |||
296 | ret = ocfs2_read_block(inode, last_eb_blk, &eb_bh); | ||
297 | if (ret) { | ||
298 | mlog_errno(ret); | ||
299 | goto out; | ||
300 | } | ||
301 | |||
302 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
303 | el = &eb->h_list; | ||
304 | |||
305 | if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { | ||
306 | ret = -EROFS; | ||
307 | OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); | ||
308 | goto out; | ||
309 | } | ||
310 | |||
311 | if (el->l_tree_depth) { | ||
312 | ocfs2_error(inode->i_sb, | ||
313 | "Inode %lu has non zero tree depth in " | ||
314 | "leaf block %llu\n", inode->i_ino, | ||
315 | (unsigned long long)eb_bh->b_blocknr); | ||
316 | ret = -EROFS; | ||
317 | goto out; | ||
318 | } | ||
319 | |||
320 | next_free = le16_to_cpu(el->l_next_free_rec); | ||
321 | |||
322 | if (next_free == 0 || | ||
323 | (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) | ||
324 | ret = 1; | ||
325 | |||
326 | out: | ||
327 | brelse(eb_bh); | ||
328 | return ret; | ||
329 | } | ||
330 | |||
285 | /* | 331 | /* |
286 | * Return the 1st index within el which contains an extent start | 332 | * Return the 1st index within el which contains an extent start |
287 | * larger than v_cluster. | 333 | * larger than v_cluster. |
@@ -335,9 +381,9 @@ static int ocfs2_figure_hole_clusters(struct inode *inode, | |||
335 | if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) | 381 | if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) |
336 | goto no_more_extents; | 382 | goto no_more_extents; |
337 | 383 | ||
338 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 384 | ret = ocfs2_read_block(inode, |
339 | le64_to_cpu(eb->h_next_leaf_blk), | 385 | le64_to_cpu(eb->h_next_leaf_blk), |
340 | &next_eb_bh, OCFS2_BH_CACHED, inode); | 386 | &next_eb_bh); |
341 | if (ret) { | 387 | if (ret) { |
342 | mlog_errno(ret); | 388 | mlog_errno(ret); |
343 | goto out; | 389 | goto out; |
@@ -373,42 +419,28 @@ out: | |||
373 | return ret; | 419 | return ret; |
374 | } | 420 | } |
375 | 421 | ||
376 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | 422 | static int ocfs2_get_clusters_nocache(struct inode *inode, |
377 | u32 *p_cluster, u32 *num_clusters, | 423 | struct buffer_head *di_bh, |
378 | unsigned int *extent_flags) | 424 | u32 v_cluster, unsigned int *hole_len, |
425 | struct ocfs2_extent_rec *ret_rec, | ||
426 | unsigned int *is_last) | ||
379 | { | 427 | { |
380 | int ret, i; | 428 | int i, ret, tree_height, len; |
381 | unsigned int flags = 0; | ||
382 | struct buffer_head *di_bh = NULL; | ||
383 | struct buffer_head *eb_bh = NULL; | ||
384 | struct ocfs2_dinode *di; | 429 | struct ocfs2_dinode *di; |
385 | struct ocfs2_extent_block *eb; | 430 | struct ocfs2_extent_block *uninitialized_var(eb); |
386 | struct ocfs2_extent_list *el; | 431 | struct ocfs2_extent_list *el; |
387 | struct ocfs2_extent_rec *rec; | 432 | struct ocfs2_extent_rec *rec; |
388 | u32 coff; | 433 | struct buffer_head *eb_bh = NULL; |
389 | |||
390 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
391 | ret = -ERANGE; | ||
392 | mlog_errno(ret); | ||
393 | goto out; | ||
394 | } | ||
395 | |||
396 | ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, | ||
397 | num_clusters, extent_flags); | ||
398 | if (ret == 0) | ||
399 | goto out; | ||
400 | 434 | ||
401 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, | 435 | memset(ret_rec, 0, sizeof(*ret_rec)); |
402 | &di_bh, OCFS2_BH_CACHED, inode); | 436 | if (is_last) |
403 | if (ret) { | 437 | *is_last = 0; |
404 | mlog_errno(ret); | ||
405 | goto out; | ||
406 | } | ||
407 | 438 | ||
408 | di = (struct ocfs2_dinode *) di_bh->b_data; | 439 | di = (struct ocfs2_dinode *) di_bh->b_data; |
409 | el = &di->id2.i_list; | 440 | el = &di->id2.i_list; |
441 | tree_height = le16_to_cpu(el->l_tree_depth); | ||
410 | 442 | ||
411 | if (el->l_tree_depth) { | 443 | if (tree_height > 0) { |
412 | ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); | 444 | ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); |
413 | if (ret) { | 445 | if (ret) { |
414 | mlog_errno(ret); | 446 | mlog_errno(ret); |
@@ -431,46 +463,202 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | |||
431 | i = ocfs2_search_extent_list(el, v_cluster); | 463 | i = ocfs2_search_extent_list(el, v_cluster); |
432 | if (i == -1) { | 464 | if (i == -1) { |
433 | /* | 465 | /* |
434 | * A hole was found. Return some canned values that | 466 | * Holes can be larger than the maximum size of an |
435 | * callers can key on. If asked for, num_clusters will | 467 | * extent, so we return their lengths in a seperate |
436 | * be populated with the size of the hole. | 468 | * field. |
437 | */ | 469 | */ |
438 | *p_cluster = 0; | 470 | if (hole_len) { |
439 | if (num_clusters) { | ||
440 | ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, | 471 | ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, |
441 | v_cluster, | 472 | v_cluster, &len); |
442 | num_clusters); | ||
443 | if (ret) { | 473 | if (ret) { |
444 | mlog_errno(ret); | 474 | mlog_errno(ret); |
445 | goto out; | 475 | goto out; |
446 | } | 476 | } |
477 | |||
478 | *hole_len = len; | ||
479 | } | ||
480 | goto out_hole; | ||
481 | } | ||
482 | |||
483 | rec = &el->l_recs[i]; | ||
484 | |||
485 | BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); | ||
486 | |||
487 | if (!rec->e_blkno) { | ||
488 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " | ||
489 | "record (%u, %u, 0)", inode->i_ino, | ||
490 | le32_to_cpu(rec->e_cpos), | ||
491 | ocfs2_rec_clusters(el, rec)); | ||
492 | ret = -EROFS; | ||
493 | goto out; | ||
494 | } | ||
495 | |||
496 | *ret_rec = *rec; | ||
497 | |||
498 | /* | ||
499 | * Checking for last extent is potentially expensive - we | ||
500 | * might have to look at the next leaf over to see if it's | ||
501 | * empty. | ||
502 | * | ||
503 | * The first two checks are to see whether the caller even | ||
504 | * cares for this information, and if the extent is at least | ||
505 | * the last in it's list. | ||
506 | * | ||
507 | * If those hold true, then the extent is last if any of the | ||
508 | * additional conditions hold true: | ||
509 | * - Extent list is in-inode | ||
510 | * - Extent list is right-most | ||
511 | * - Extent list is 2nd to rightmost, with empty right-most | ||
512 | */ | ||
513 | if (is_last) { | ||
514 | if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) { | ||
515 | if (tree_height == 0) | ||
516 | *is_last = 1; | ||
517 | else if (eb->h_blkno == di->i_last_eb_blk) | ||
518 | *is_last = 1; | ||
519 | else if (eb->h_next_leaf_blk == di->i_last_eb_blk) { | ||
520 | ret = ocfs2_last_eb_is_empty(inode, di); | ||
521 | if (ret < 0) { | ||
522 | mlog_errno(ret); | ||
523 | goto out; | ||
524 | } | ||
525 | if (ret == 1) | ||
526 | *is_last = 1; | ||
527 | } | ||
528 | } | ||
529 | } | ||
530 | |||
531 | out_hole: | ||
532 | ret = 0; | ||
533 | out: | ||
534 | brelse(eb_bh); | ||
535 | return ret; | ||
536 | } | ||
537 | |||
538 | static void ocfs2_relative_extent_offsets(struct super_block *sb, | ||
539 | u32 v_cluster, | ||
540 | struct ocfs2_extent_rec *rec, | ||
541 | u32 *p_cluster, u32 *num_clusters) | ||
542 | |||
543 | { | ||
544 | u32 coff = v_cluster - le32_to_cpu(rec->e_cpos); | ||
545 | |||
546 | *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno)); | ||
547 | *p_cluster = *p_cluster + coff; | ||
548 | |||
549 | if (num_clusters) | ||
550 | *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff; | ||
551 | } | ||
552 | |||
553 | int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, | ||
554 | u32 *p_cluster, u32 *num_clusters, | ||
555 | struct ocfs2_extent_list *el) | ||
556 | { | ||
557 | int ret = 0, i; | ||
558 | struct buffer_head *eb_bh = NULL; | ||
559 | struct ocfs2_extent_block *eb; | ||
560 | struct ocfs2_extent_rec *rec; | ||
561 | u32 coff; | ||
562 | |||
563 | if (el->l_tree_depth) { | ||
564 | ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); | ||
565 | if (ret) { | ||
566 | mlog_errno(ret); | ||
567 | goto out; | ||
447 | } | 568 | } |
569 | |||
570 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
571 | el = &eb->h_list; | ||
572 | |||
573 | if (el->l_tree_depth) { | ||
574 | ocfs2_error(inode->i_sb, | ||
575 | "Inode %lu has non zero tree depth in " | ||
576 | "xattr leaf block %llu\n", inode->i_ino, | ||
577 | (unsigned long long)eb_bh->b_blocknr); | ||
578 | ret = -EROFS; | ||
579 | goto out; | ||
580 | } | ||
581 | } | ||
582 | |||
583 | i = ocfs2_search_extent_list(el, v_cluster); | ||
584 | if (i == -1) { | ||
585 | ret = -EROFS; | ||
586 | mlog_errno(ret); | ||
587 | goto out; | ||
448 | } else { | 588 | } else { |
449 | rec = &el->l_recs[i]; | 589 | rec = &el->l_recs[i]; |
450 | |||
451 | BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); | 590 | BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); |
452 | 591 | ||
453 | if (!rec->e_blkno) { | 592 | if (!rec->e_blkno) { |
454 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " | 593 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " |
455 | "record (%u, %u, 0)", inode->i_ino, | 594 | "record (%u, %u, 0) in xattr", inode->i_ino, |
456 | le32_to_cpu(rec->e_cpos), | 595 | le32_to_cpu(rec->e_cpos), |
457 | ocfs2_rec_clusters(el, rec)); | 596 | ocfs2_rec_clusters(el, rec)); |
458 | ret = -EROFS; | 597 | ret = -EROFS; |
459 | goto out; | 598 | goto out; |
460 | } | 599 | } |
461 | |||
462 | coff = v_cluster - le32_to_cpu(rec->e_cpos); | 600 | coff = v_cluster - le32_to_cpu(rec->e_cpos); |
463 | |||
464 | *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, | 601 | *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, |
465 | le64_to_cpu(rec->e_blkno)); | 602 | le64_to_cpu(rec->e_blkno)); |
466 | *p_cluster = *p_cluster + coff; | 603 | *p_cluster = *p_cluster + coff; |
467 | |||
468 | if (num_clusters) | 604 | if (num_clusters) |
469 | *num_clusters = ocfs2_rec_clusters(el, rec) - coff; | 605 | *num_clusters = ocfs2_rec_clusters(el, rec) - coff; |
606 | } | ||
607 | out: | ||
608 | if (eb_bh) | ||
609 | brelse(eb_bh); | ||
610 | return ret; | ||
611 | } | ||
612 | |||
613 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | ||
614 | u32 *p_cluster, u32 *num_clusters, | ||
615 | unsigned int *extent_flags) | ||
616 | { | ||
617 | int ret; | ||
618 | unsigned int uninitialized_var(hole_len), flags = 0; | ||
619 | struct buffer_head *di_bh = NULL; | ||
620 | struct ocfs2_extent_rec rec; | ||
621 | |||
622 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
623 | ret = -ERANGE; | ||
624 | mlog_errno(ret); | ||
625 | goto out; | ||
626 | } | ||
627 | |||
628 | ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, | ||
629 | num_clusters, extent_flags); | ||
630 | if (ret == 0) | ||
631 | goto out; | ||
632 | |||
633 | ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); | ||
634 | if (ret) { | ||
635 | mlog_errno(ret); | ||
636 | goto out; | ||
637 | } | ||
470 | 638 | ||
471 | flags = rec->e_flags; | 639 | ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len, |
640 | &rec, NULL); | ||
641 | if (ret) { | ||
642 | mlog_errno(ret); | ||
643 | goto out; | ||
644 | } | ||
645 | |||
646 | if (rec.e_blkno == 0ULL) { | ||
647 | /* | ||
648 | * A hole was found. Return some canned values that | ||
649 | * callers can key on. If asked for, num_clusters will | ||
650 | * be populated with the size of the hole. | ||
651 | */ | ||
652 | *p_cluster = 0; | ||
653 | if (num_clusters) { | ||
654 | *num_clusters = hole_len; | ||
655 | } | ||
656 | } else { | ||
657 | ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec, | ||
658 | p_cluster, num_clusters); | ||
659 | flags = rec.e_flags; | ||
472 | 660 | ||
473 | ocfs2_extent_map_insert_rec(inode, rec); | 661 | ocfs2_extent_map_insert_rec(inode, &rec); |
474 | } | 662 | } |
475 | 663 | ||
476 | if (extent_flags) | 664 | if (extent_flags) |
@@ -478,7 +666,6 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | |||
478 | 666 | ||
479 | out: | 667 | out: |
480 | brelse(di_bh); | 668 | brelse(di_bh); |
481 | brelse(eb_bh); | ||
482 | return ret; | 669 | return ret; |
483 | } | 670 | } |
484 | 671 | ||
@@ -521,3 +708,114 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, | |||
521 | out: | 708 | out: |
522 | return ret; | 709 | return ret; |
523 | } | 710 | } |
711 | |||
712 | static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, | ||
713 | struct fiemap_extent_info *fieinfo, | ||
714 | u64 map_start) | ||
715 | { | ||
716 | int ret; | ||
717 | unsigned int id_count; | ||
718 | struct ocfs2_dinode *di; | ||
719 | u64 phys; | ||
720 | u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST; | ||
721 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
722 | |||
723 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
724 | id_count = le16_to_cpu(di->id2.i_data.id_count); | ||
725 | |||
726 | if (map_start < id_count) { | ||
727 | phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; | ||
728 | phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data); | ||
729 | |||
730 | ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, | ||
731 | flags); | ||
732 | if (ret < 0) | ||
733 | return ret; | ||
734 | } | ||
735 | |||
736 | return 0; | ||
737 | } | ||
738 | |||
739 | #define OCFS2_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) | ||
740 | |||
741 | int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
742 | u64 map_start, u64 map_len) | ||
743 | { | ||
744 | int ret, is_last; | ||
745 | u32 mapping_end, cpos; | ||
746 | unsigned int hole_size; | ||
747 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
748 | u64 len_bytes, phys_bytes, virt_bytes; | ||
749 | struct buffer_head *di_bh = NULL; | ||
750 | struct ocfs2_extent_rec rec; | ||
751 | |||
752 | ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS); | ||
753 | if (ret) | ||
754 | return ret; | ||
755 | |||
756 | ret = ocfs2_inode_lock(inode, &di_bh, 0); | ||
757 | if (ret) { | ||
758 | mlog_errno(ret); | ||
759 | goto out; | ||
760 | } | ||
761 | |||
762 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
763 | |||
764 | /* | ||
765 | * Handle inline-data separately. | ||
766 | */ | ||
767 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
768 | ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); | ||
769 | goto out_unlock; | ||
770 | } | ||
771 | |||
772 | cpos = map_start >> osb->s_clustersize_bits; | ||
773 | mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, | ||
774 | map_start + map_len); | ||
775 | mapping_end -= cpos; | ||
776 | is_last = 0; | ||
777 | while (cpos < mapping_end && !is_last) { | ||
778 | u32 fe_flags; | ||
779 | |||
780 | ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, | ||
781 | &hole_size, &rec, &is_last); | ||
782 | if (ret) { | ||
783 | mlog_errno(ret); | ||
784 | goto out; | ||
785 | } | ||
786 | |||
787 | if (rec.e_blkno == 0ULL) { | ||
788 | cpos += hole_size; | ||
789 | continue; | ||
790 | } | ||
791 | |||
792 | fe_flags = 0; | ||
793 | if (rec.e_flags & OCFS2_EXT_UNWRITTEN) | ||
794 | fe_flags |= FIEMAP_EXTENT_UNWRITTEN; | ||
795 | if (is_last) | ||
796 | fe_flags |= FIEMAP_EXTENT_LAST; | ||
797 | len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; | ||
798 | phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits; | ||
799 | virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits; | ||
800 | |||
801 | ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes, | ||
802 | len_bytes, fe_flags); | ||
803 | if (ret) | ||
804 | break; | ||
805 | |||
806 | cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters); | ||
807 | } | ||
808 | |||
809 | if (ret > 0) | ||
810 | ret = 0; | ||
811 | |||
812 | out_unlock: | ||
813 | brelse(di_bh); | ||
814 | |||
815 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
816 | |||
817 | ocfs2_inode_unlock(inode, 0); | ||
818 | out: | ||
819 | |||
820 | return ret; | ||
821 | } | ||
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h index de91e3e41a22..1c4aa8b06f34 100644 --- a/fs/ocfs2/extent_map.h +++ b/fs/ocfs2/extent_map.h | |||
@@ -50,4 +50,11 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster, | |||
50 | int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, | 50 | int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, |
51 | u64 *ret_count, unsigned int *extent_flags); | 51 | u64 *ret_count, unsigned int *extent_flags); |
52 | 52 | ||
53 | int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
54 | u64 map_start, u64 map_len); | ||
55 | |||
56 | int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, | ||
57 | u32 *p_cluster, u32 *num_clusters, | ||
58 | struct ocfs2_extent_list *el); | ||
59 | |||
53 | #endif /* _EXTENT_MAP_H */ | 60 | #endif /* _EXTENT_MAP_H */ |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ec2ed15c3daa..8d3225a78073 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -55,6 +55,7 @@ | |||
55 | #include "mmap.h" | 55 | #include "mmap.h" |
56 | #include "suballoc.h" | 56 | #include "suballoc.h" |
57 | #include "super.h" | 57 | #include "super.h" |
58 | #include "xattr.h" | ||
58 | 59 | ||
59 | #include "buffer_head_io.h" | 60 | #include "buffer_head_io.h" |
60 | 61 | ||
@@ -184,7 +185,7 @@ static int ocfs2_sync_file(struct file *file, | |||
184 | goto bail; | 185 | goto bail; |
185 | 186 | ||
186 | journal = osb->journal->j_journal; | 187 | journal = osb->journal->j_journal; |
187 | err = journal_force_commit(journal); | 188 | err = jbd2_journal_force_commit(journal); |
188 | 189 | ||
189 | bail: | 190 | bail: |
190 | mlog_exit(err); | 191 | mlog_exit(err); |
@@ -488,7 +489,7 @@ bail: | |||
488 | } | 489 | } |
489 | 490 | ||
490 | /* | 491 | /* |
491 | * extend allocation only here. | 492 | * extend file allocation only here. |
492 | * we'll update all the disk stuff, and oip->alloc_size | 493 | * we'll update all the disk stuff, and oip->alloc_size |
493 | * | 494 | * |
494 | * expect stuff to be locked, a transaction started and enough data / | 495 | * expect stuff to be locked, a transaction started and enough data / |
@@ -497,189 +498,25 @@ bail: | |||
497 | * Will return -EAGAIN, and a reason if a restart is needed. | 498 | * Will return -EAGAIN, and a reason if a restart is needed. |
498 | * If passed in, *reason will always be set, even in error. | 499 | * If passed in, *reason will always be set, even in error. |
499 | */ | 500 | */ |
500 | int ocfs2_do_extend_allocation(struct ocfs2_super *osb, | 501 | int ocfs2_add_inode_data(struct ocfs2_super *osb, |
501 | struct inode *inode, | 502 | struct inode *inode, |
502 | u32 *logical_offset, | 503 | u32 *logical_offset, |
503 | u32 clusters_to_add, | 504 | u32 clusters_to_add, |
504 | int mark_unwritten, | 505 | int mark_unwritten, |
505 | struct buffer_head *fe_bh, | 506 | struct buffer_head *fe_bh, |
506 | handle_t *handle, | 507 | handle_t *handle, |
507 | struct ocfs2_alloc_context *data_ac, | 508 | struct ocfs2_alloc_context *data_ac, |
508 | struct ocfs2_alloc_context *meta_ac, | 509 | struct ocfs2_alloc_context *meta_ac, |
509 | enum ocfs2_alloc_restarted *reason_ret) | 510 | enum ocfs2_alloc_restarted *reason_ret) |
510 | { | 511 | { |
511 | int status = 0; | 512 | int ret; |
512 | int free_extents; | 513 | struct ocfs2_extent_tree et; |
513 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; | ||
514 | enum ocfs2_alloc_restarted reason = RESTART_NONE; | ||
515 | u32 bit_off, num_bits; | ||
516 | u64 block; | ||
517 | u8 flags = 0; | ||
518 | |||
519 | BUG_ON(!clusters_to_add); | ||
520 | |||
521 | if (mark_unwritten) | ||
522 | flags = OCFS2_EXT_UNWRITTEN; | ||
523 | |||
524 | free_extents = ocfs2_num_free_extents(osb, inode, fe); | ||
525 | if (free_extents < 0) { | ||
526 | status = free_extents; | ||
527 | mlog_errno(status); | ||
528 | goto leave; | ||
529 | } | ||
530 | |||
531 | /* there are two cases which could cause us to EAGAIN in the | ||
532 | * we-need-more-metadata case: | ||
533 | * 1) we haven't reserved *any* | ||
534 | * 2) we are so fragmented, we've needed to add metadata too | ||
535 | * many times. */ | ||
536 | if (!free_extents && !meta_ac) { | ||
537 | mlog(0, "we haven't reserved any metadata!\n"); | ||
538 | status = -EAGAIN; | ||
539 | reason = RESTART_META; | ||
540 | goto leave; | ||
541 | } else if ((!free_extents) | ||
542 | && (ocfs2_alloc_context_bits_left(meta_ac) | ||
543 | < ocfs2_extend_meta_needed(fe))) { | ||
544 | mlog(0, "filesystem is really fragmented...\n"); | ||
545 | status = -EAGAIN; | ||
546 | reason = RESTART_META; | ||
547 | goto leave; | ||
548 | } | ||
549 | |||
550 | status = __ocfs2_claim_clusters(osb, handle, data_ac, 1, | ||
551 | clusters_to_add, &bit_off, &num_bits); | ||
552 | if (status < 0) { | ||
553 | if (status != -ENOSPC) | ||
554 | mlog_errno(status); | ||
555 | goto leave; | ||
556 | } | ||
557 | |||
558 | BUG_ON(num_bits > clusters_to_add); | ||
559 | |||
560 | /* reserve our write early -- insert_extent may update the inode */ | ||
561 | status = ocfs2_journal_access(handle, inode, fe_bh, | ||
562 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
563 | if (status < 0) { | ||
564 | mlog_errno(status); | ||
565 | goto leave; | ||
566 | } | ||
567 | |||
568 | block = ocfs2_clusters_to_blocks(osb->sb, bit_off); | ||
569 | mlog(0, "Allocating %u clusters at block %u for inode %llu\n", | ||
570 | num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
571 | status = ocfs2_insert_extent(osb, handle, inode, fe_bh, | ||
572 | *logical_offset, block, num_bits, | ||
573 | flags, meta_ac); | ||
574 | if (status < 0) { | ||
575 | mlog_errno(status); | ||
576 | goto leave; | ||
577 | } | ||
578 | |||
579 | status = ocfs2_journal_dirty(handle, fe_bh); | ||
580 | if (status < 0) { | ||
581 | mlog_errno(status); | ||
582 | goto leave; | ||
583 | } | ||
584 | |||
585 | clusters_to_add -= num_bits; | ||
586 | *logical_offset += num_bits; | ||
587 | |||
588 | if (clusters_to_add) { | ||
589 | mlog(0, "need to alloc once more, clusters = %u, wanted = " | ||
590 | "%u\n", fe->i_clusters, clusters_to_add); | ||
591 | status = -EAGAIN; | ||
592 | reason = RESTART_TRANS; | ||
593 | } | ||
594 | |||
595 | leave: | ||
596 | mlog_exit(status); | ||
597 | if (reason_ret) | ||
598 | *reason_ret = reason; | ||
599 | return status; | ||
600 | } | ||
601 | |||
602 | /* | ||
603 | * For a given allocation, determine which allocators will need to be | ||
604 | * accessed, and lock them, reserving the appropriate number of bits. | ||
605 | * | ||
606 | * Sparse file systems call this from ocfs2_write_begin_nolock() | ||
607 | * and ocfs2_allocate_unwritten_extents(). | ||
608 | * | ||
609 | * File systems which don't support holes call this from | ||
610 | * ocfs2_extend_allocation(). | ||
611 | */ | ||
612 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | ||
613 | u32 clusters_to_add, u32 extents_to_split, | ||
614 | struct ocfs2_alloc_context **data_ac, | ||
615 | struct ocfs2_alloc_context **meta_ac) | ||
616 | { | ||
617 | int ret = 0, num_free_extents; | ||
618 | unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; | ||
619 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
620 | |||
621 | *meta_ac = NULL; | ||
622 | if (data_ac) | ||
623 | *data_ac = NULL; | ||
624 | |||
625 | BUG_ON(clusters_to_add != 0 && data_ac == NULL); | ||
626 | |||
627 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " | ||
628 | "clusters_to_add = %u, extents_to_split = %u\n", | ||
629 | (unsigned long long)OCFS2_I(inode)->ip_blkno, (long long)i_size_read(inode), | ||
630 | le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split); | ||
631 | |||
632 | num_free_extents = ocfs2_num_free_extents(osb, inode, di); | ||
633 | if (num_free_extents < 0) { | ||
634 | ret = num_free_extents; | ||
635 | mlog_errno(ret); | ||
636 | goto out; | ||
637 | } | ||
638 | |||
639 | /* | ||
640 | * Sparse allocation file systems need to be more conservative | ||
641 | * with reserving room for expansion - the actual allocation | ||
642 | * happens while we've got a journal handle open so re-taking | ||
643 | * a cluster lock (because we ran out of room for another | ||
644 | * extent) will violate ordering rules. | ||
645 | * | ||
646 | * Most of the time we'll only be seeing this 1 cluster at a time | ||
647 | * anyway. | ||
648 | * | ||
649 | * Always lock for any unwritten extents - we might want to | ||
650 | * add blocks during a split. | ||
651 | */ | ||
652 | if (!num_free_extents || | ||
653 | (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) { | ||
654 | ret = ocfs2_reserve_new_metadata(osb, di, meta_ac); | ||
655 | if (ret < 0) { | ||
656 | if (ret != -ENOSPC) | ||
657 | mlog_errno(ret); | ||
658 | goto out; | ||
659 | } | ||
660 | } | ||
661 | |||
662 | if (clusters_to_add == 0) | ||
663 | goto out; | ||
664 | |||
665 | ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); | ||
666 | if (ret < 0) { | ||
667 | if (ret != -ENOSPC) | ||
668 | mlog_errno(ret); | ||
669 | goto out; | ||
670 | } | ||
671 | |||
672 | out: | ||
673 | if (ret) { | ||
674 | if (*meta_ac) { | ||
675 | ocfs2_free_alloc_context(*meta_ac); | ||
676 | *meta_ac = NULL; | ||
677 | } | ||
678 | 514 | ||
679 | /* | 515 | ocfs2_init_dinode_extent_tree(&et, inode, fe_bh); |
680 | * We cannot have an error and a non null *data_ac. | 516 | ret = ocfs2_add_clusters_in_btree(osb, inode, logical_offset, |
681 | */ | 517 | clusters_to_add, mark_unwritten, |
682 | } | 518 | &et, handle, |
519 | data_ac, meta_ac, reason_ret); | ||
683 | 520 | ||
684 | return ret; | 521 | return ret; |
685 | } | 522 | } |
@@ -698,6 +535,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, | |||
698 | struct ocfs2_alloc_context *meta_ac = NULL; | 535 | struct ocfs2_alloc_context *meta_ac = NULL; |
699 | enum ocfs2_alloc_restarted why; | 536 | enum ocfs2_alloc_restarted why; |
700 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 537 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
538 | struct ocfs2_extent_tree et; | ||
701 | 539 | ||
702 | mlog_entry("(clusters_to_add = %u)\n", clusters_to_add); | 540 | mlog_entry("(clusters_to_add = %u)\n", clusters_to_add); |
703 | 541 | ||
@@ -707,8 +545,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, | |||
707 | */ | 545 | */ |
708 | BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); | 546 | BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); |
709 | 547 | ||
710 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, | 548 | status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); |
711 | OCFS2_BH_CACHED, inode); | ||
712 | if (status < 0) { | 549 | if (status < 0) { |
713 | mlog_errno(status); | 550 | mlog_errno(status); |
714 | goto leave; | 551 | goto leave; |
@@ -724,14 +561,21 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, | |||
724 | restart_all: | 561 | restart_all: |
725 | BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); | 562 | BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); |
726 | 563 | ||
727 | status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac, | 564 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " |
728 | &meta_ac); | 565 | "clusters_to_add = %u\n", |
566 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
567 | (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters), | ||
568 | clusters_to_add); | ||
569 | ocfs2_init_dinode_extent_tree(&et, inode, bh); | ||
570 | status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, | ||
571 | &data_ac, &meta_ac); | ||
729 | if (status) { | 572 | if (status) { |
730 | mlog_errno(status); | 573 | mlog_errno(status); |
731 | goto leave; | 574 | goto leave; |
732 | } | 575 | } |
733 | 576 | ||
734 | credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add); | 577 | credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list, |
578 | clusters_to_add); | ||
735 | handle = ocfs2_start_trans(osb, credits); | 579 | handle = ocfs2_start_trans(osb, credits); |
736 | if (IS_ERR(handle)) { | 580 | if (IS_ERR(handle)) { |
737 | status = PTR_ERR(handle); | 581 | status = PTR_ERR(handle); |
@@ -753,16 +597,16 @@ restarted_transaction: | |||
753 | 597 | ||
754 | prev_clusters = OCFS2_I(inode)->ip_clusters; | 598 | prev_clusters = OCFS2_I(inode)->ip_clusters; |
755 | 599 | ||
756 | status = ocfs2_do_extend_allocation(osb, | 600 | status = ocfs2_add_inode_data(osb, |
757 | inode, | 601 | inode, |
758 | &logical_start, | 602 | &logical_start, |
759 | clusters_to_add, | 603 | clusters_to_add, |
760 | mark_unwritten, | 604 | mark_unwritten, |
761 | bh, | 605 | bh, |
762 | handle, | 606 | handle, |
763 | data_ac, | 607 | data_ac, |
764 | meta_ac, | 608 | meta_ac, |
765 | &why); | 609 | &why); |
766 | if ((status < 0) && (status != -EAGAIN)) { | 610 | if ((status < 0) && (status != -EAGAIN)) { |
767 | if (status != -ENOSPC) | 611 | if (status != -ENOSPC) |
768 | mlog_errno(status); | 612 | mlog_errno(status); |
@@ -789,7 +633,7 @@ restarted_transaction: | |||
789 | mlog(0, "restarting transaction.\n"); | 633 | mlog(0, "restarting transaction.\n"); |
790 | /* TODO: This can be more intelligent. */ | 634 | /* TODO: This can be more intelligent. */ |
791 | credits = ocfs2_calc_extend_credits(osb->sb, | 635 | credits = ocfs2_calc_extend_credits(osb->sb, |
792 | fe, | 636 | &fe->id2.i_list, |
793 | clusters_to_add); | 637 | clusters_to_add); |
794 | status = ocfs2_extend_trans(handle, credits); | 638 | status = ocfs2_extend_trans(handle, credits); |
795 | if (status < 0) { | 639 | if (status < 0) { |
@@ -826,10 +670,8 @@ leave: | |||
826 | restart_func = 0; | 670 | restart_func = 0; |
827 | goto restart_all; | 671 | goto restart_all; |
828 | } | 672 | } |
829 | if (bh) { | 673 | brelse(bh); |
830 | brelse(bh); | 674 | bh = NULL; |
831 | bh = NULL; | ||
832 | } | ||
833 | 675 | ||
834 | mlog_exit(status); | 676 | mlog_exit(status); |
835 | return status; | 677 | return status; |
@@ -1096,9 +938,15 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1096 | goto bail_unlock; | 938 | goto bail_unlock; |
1097 | } | 939 | } |
1098 | 940 | ||
1099 | if (i_size_read(inode) > attr->ia_size) | 941 | if (i_size_read(inode) > attr->ia_size) { |
942 | if (ocfs2_should_order_data(inode)) { | ||
943 | status = ocfs2_begin_ordered_truncate(inode, | ||
944 | attr->ia_size); | ||
945 | if (status) | ||
946 | goto bail_unlock; | ||
947 | } | ||
1100 | status = ocfs2_truncate_file(inode, bh, attr->ia_size); | 948 | status = ocfs2_truncate_file(inode, bh, attr->ia_size); |
1101 | else | 949 | } else |
1102 | status = ocfs2_extend_file(inode, bh, attr->ia_size); | 950 | status = ocfs2_extend_file(inode, bh, attr->ia_size); |
1103 | if (status < 0) { | 951 | if (status < 0) { |
1104 | if (status != -ENOSPC) | 952 | if (status != -ENOSPC) |
@@ -1140,8 +988,7 @@ bail_unlock_rw: | |||
1140 | if (size_change) | 988 | if (size_change) |
1141 | ocfs2_rw_unlock(inode, 1); | 989 | ocfs2_rw_unlock(inode, 1); |
1142 | bail: | 990 | bail: |
1143 | if (bh) | 991 | brelse(bh); |
1144 | brelse(bh); | ||
1145 | 992 | ||
1146 | mlog_exit(status); | 993 | mlog_exit(status); |
1147 | return status; | 994 | return status; |
@@ -1284,8 +1131,7 @@ static int ocfs2_write_remove_suid(struct inode *inode) | |||
1284 | struct buffer_head *bh = NULL; | 1131 | struct buffer_head *bh = NULL; |
1285 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1132 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1286 | 1133 | ||
1287 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 1134 | ret = ocfs2_read_block(inode, oi->ip_blkno, &bh); |
1288 | oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode); | ||
1289 | if (ret < 0) { | 1135 | if (ret < 0) { |
1290 | mlog_errno(ret); | 1136 | mlog_errno(ret); |
1291 | goto out; | 1137 | goto out; |
@@ -1311,9 +1157,8 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode, | |||
1311 | struct buffer_head *di_bh = NULL; | 1157 | struct buffer_head *di_bh = NULL; |
1312 | 1158 | ||
1313 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 1159 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
1314 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 1160 | ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, |
1315 | OCFS2_I(inode)->ip_blkno, &di_bh, | 1161 | &di_bh); |
1316 | OCFS2_BH_CACHED, inode); | ||
1317 | if (ret) { | 1162 | if (ret) { |
1318 | mlog_errno(ret); | 1163 | mlog_errno(ret); |
1319 | goto out; | 1164 | goto out; |
@@ -1394,8 +1239,11 @@ static int __ocfs2_remove_inode_range(struct inode *inode, | |||
1394 | handle_t *handle; | 1239 | handle_t *handle; |
1395 | struct ocfs2_alloc_context *meta_ac = NULL; | 1240 | struct ocfs2_alloc_context *meta_ac = NULL; |
1396 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 1241 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; |
1242 | struct ocfs2_extent_tree et; | ||
1397 | 1243 | ||
1398 | ret = ocfs2_lock_allocators(inode, di, 0, 1, NULL, &meta_ac); | 1244 | ocfs2_init_dinode_extent_tree(&et, inode, di_bh); |
1245 | |||
1246 | ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); | ||
1399 | if (ret) { | 1247 | if (ret) { |
1400 | mlog_errno(ret); | 1248 | mlog_errno(ret); |
1401 | return ret; | 1249 | return ret; |
@@ -1425,7 +1273,7 @@ static int __ocfs2_remove_inode_range(struct inode *inode, | |||
1425 | goto out; | 1273 | goto out; |
1426 | } | 1274 | } |
1427 | 1275 | ||
1428 | ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac, | 1276 | ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac, |
1429 | dealloc); | 1277 | dealloc); |
1430 | if (ret) { | 1278 | if (ret) { |
1431 | mlog_errno(ret); | 1279 | mlog_errno(ret); |
@@ -2040,7 +1888,7 @@ out_dio: | |||
2040 | */ | 1888 | */ |
2041 | if (old_size != i_size_read(inode) || | 1889 | if (old_size != i_size_read(inode) || |
2042 | old_clusters != OCFS2_I(inode)->ip_clusters) { | 1890 | old_clusters != OCFS2_I(inode)->ip_clusters) { |
2043 | ret = journal_force_commit(osb->journal->j_journal); | 1891 | ret = jbd2_journal_force_commit(osb->journal->j_journal); |
2044 | if (ret < 0) | 1892 | if (ret < 0) |
2045 | written = ret; | 1893 | written = ret; |
2046 | } | 1894 | } |
@@ -2227,7 +2075,12 @@ const struct inode_operations ocfs2_file_iops = { | |||
2227 | .setattr = ocfs2_setattr, | 2075 | .setattr = ocfs2_setattr, |
2228 | .getattr = ocfs2_getattr, | 2076 | .getattr = ocfs2_getattr, |
2229 | .permission = ocfs2_permission, | 2077 | .permission = ocfs2_permission, |
2078 | .setxattr = generic_setxattr, | ||
2079 | .getxattr = generic_getxattr, | ||
2080 | .listxattr = ocfs2_listxattr, | ||
2081 | .removexattr = generic_removexattr, | ||
2230 | .fallocate = ocfs2_fallocate, | 2082 | .fallocate = ocfs2_fallocate, |
2083 | .fiemap = ocfs2_fiemap, | ||
2231 | }; | 2084 | }; |
2232 | 2085 | ||
2233 | const struct inode_operations ocfs2_special_file_iops = { | 2086 | const struct inode_operations ocfs2_special_file_iops = { |
@@ -2236,6 +2089,10 @@ const struct inode_operations ocfs2_special_file_iops = { | |||
2236 | .permission = ocfs2_permission, | 2089 | .permission = ocfs2_permission, |
2237 | }; | 2090 | }; |
2238 | 2091 | ||
2092 | /* | ||
2093 | * Other than ->lock, keep ocfs2_fops and ocfs2_dops in sync with | ||
2094 | * ocfs2_fops_no_plocks and ocfs2_dops_no_plocks! | ||
2095 | */ | ||
2239 | const struct file_operations ocfs2_fops = { | 2096 | const struct file_operations ocfs2_fops = { |
2240 | .llseek = generic_file_llseek, | 2097 | .llseek = generic_file_llseek, |
2241 | .read = do_sync_read, | 2098 | .read = do_sync_read, |
@@ -2250,6 +2107,7 @@ const struct file_operations ocfs2_fops = { | |||
2250 | #ifdef CONFIG_COMPAT | 2107 | #ifdef CONFIG_COMPAT |
2251 | .compat_ioctl = ocfs2_compat_ioctl, | 2108 | .compat_ioctl = ocfs2_compat_ioctl, |
2252 | #endif | 2109 | #endif |
2110 | .lock = ocfs2_lock, | ||
2253 | .flock = ocfs2_flock, | 2111 | .flock = ocfs2_flock, |
2254 | .splice_read = ocfs2_file_splice_read, | 2112 | .splice_read = ocfs2_file_splice_read, |
2255 | .splice_write = ocfs2_file_splice_write, | 2113 | .splice_write = ocfs2_file_splice_write, |
@@ -2266,5 +2124,51 @@ const struct file_operations ocfs2_dops = { | |||
2266 | #ifdef CONFIG_COMPAT | 2124 | #ifdef CONFIG_COMPAT |
2267 | .compat_ioctl = ocfs2_compat_ioctl, | 2125 | .compat_ioctl = ocfs2_compat_ioctl, |
2268 | #endif | 2126 | #endif |
2127 | .lock = ocfs2_lock, | ||
2128 | .flock = ocfs2_flock, | ||
2129 | }; | ||
2130 | |||
2131 | /* | ||
2132 | * POSIX-lockless variants of our file_operations. | ||
2133 | * | ||
2134 | * These will be used if the underlying cluster stack does not support | ||
2135 | * posix file locking, if the user passes the "localflocks" mount | ||
2136 | * option, or if we have a local-only fs. | ||
2137 | * | ||
2138 | * ocfs2_flock is in here because all stacks handle UNIX file locks, | ||
2139 | * so we still want it in the case of no stack support for | ||
2140 | * plocks. Internally, it will do the right thing when asked to ignore | ||
2141 | * the cluster. | ||
2142 | */ | ||
2143 | const struct file_operations ocfs2_fops_no_plocks = { | ||
2144 | .llseek = generic_file_llseek, | ||
2145 | .read = do_sync_read, | ||
2146 | .write = do_sync_write, | ||
2147 | .mmap = ocfs2_mmap, | ||
2148 | .fsync = ocfs2_sync_file, | ||
2149 | .release = ocfs2_file_release, | ||
2150 | .open = ocfs2_file_open, | ||
2151 | .aio_read = ocfs2_file_aio_read, | ||
2152 | .aio_write = ocfs2_file_aio_write, | ||
2153 | .unlocked_ioctl = ocfs2_ioctl, | ||
2154 | #ifdef CONFIG_COMPAT | ||
2155 | .compat_ioctl = ocfs2_compat_ioctl, | ||
2156 | #endif | ||
2157 | .flock = ocfs2_flock, | ||
2158 | .splice_read = ocfs2_file_splice_read, | ||
2159 | .splice_write = ocfs2_file_splice_write, | ||
2160 | }; | ||
2161 | |||
2162 | const struct file_operations ocfs2_dops_no_plocks = { | ||
2163 | .llseek = generic_file_llseek, | ||
2164 | .read = generic_read_dir, | ||
2165 | .readdir = ocfs2_readdir, | ||
2166 | .fsync = ocfs2_sync_file, | ||
2167 | .release = ocfs2_dir_release, | ||
2168 | .open = ocfs2_dir_open, | ||
2169 | .unlocked_ioctl = ocfs2_ioctl, | ||
2170 | #ifdef CONFIG_COMPAT | ||
2171 | .compat_ioctl = ocfs2_compat_ioctl, | ||
2172 | #endif | ||
2269 | .flock = ocfs2_flock, | 2173 | .flock = ocfs2_flock, |
2270 | }; | 2174 | }; |
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 1e27b4d017ea..e92382cbca5f 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h | |||
@@ -28,9 +28,12 @@ | |||
28 | 28 | ||
29 | extern const struct file_operations ocfs2_fops; | 29 | extern const struct file_operations ocfs2_fops; |
30 | extern const struct file_operations ocfs2_dops; | 30 | extern const struct file_operations ocfs2_dops; |
31 | extern const struct file_operations ocfs2_fops_no_plocks; | ||
32 | extern const struct file_operations ocfs2_dops_no_plocks; | ||
31 | extern const struct inode_operations ocfs2_file_iops; | 33 | extern const struct inode_operations ocfs2_file_iops; |
32 | extern const struct inode_operations ocfs2_special_file_iops; | 34 | extern const struct inode_operations ocfs2_special_file_iops; |
33 | struct ocfs2_alloc_context; | 35 | struct ocfs2_alloc_context; |
36 | enum ocfs2_alloc_restarted; | ||
34 | 37 | ||
35 | struct ocfs2_file_private { | 38 | struct ocfs2_file_private { |
36 | struct file *fp_file; | 39 | struct file *fp_file; |
@@ -38,27 +41,18 @@ struct ocfs2_file_private { | |||
38 | struct ocfs2_lock_res fp_flock; | 41 | struct ocfs2_lock_res fp_flock; |
39 | }; | 42 | }; |
40 | 43 | ||
41 | enum ocfs2_alloc_restarted { | 44 | int ocfs2_add_inode_data(struct ocfs2_super *osb, |
42 | RESTART_NONE = 0, | 45 | struct inode *inode, |
43 | RESTART_TRANS, | 46 | u32 *logical_offset, |
44 | RESTART_META | 47 | u32 clusters_to_add, |
45 | }; | 48 | int mark_unwritten, |
46 | int ocfs2_do_extend_allocation(struct ocfs2_super *osb, | 49 | struct buffer_head *fe_bh, |
47 | struct inode *inode, | 50 | handle_t *handle, |
48 | u32 *logical_offset, | 51 | struct ocfs2_alloc_context *data_ac, |
49 | u32 clusters_to_add, | 52 | struct ocfs2_alloc_context *meta_ac, |
50 | int mark_unwritten, | 53 | enum ocfs2_alloc_restarted *reason_ret); |
51 | struct buffer_head *fe_bh, | ||
52 | handle_t *handle, | ||
53 | struct ocfs2_alloc_context *data_ac, | ||
54 | struct ocfs2_alloc_context *meta_ac, | ||
55 | enum ocfs2_alloc_restarted *reason_ret); | ||
56 | int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, | 54 | int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, |
57 | u64 zero_to); | 55 | u64 zero_to); |
58 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | ||
59 | u32 clusters_to_add, u32 extents_to_split, | ||
60 | struct ocfs2_alloc_context **data_ac, | ||
61 | struct ocfs2_alloc_context **meta_ac); | ||
62 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); | 56 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); |
63 | int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, | 57 | int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, |
64 | struct kstat *stat); | 58 | struct kstat *stat); |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 7e9e4c79aec7..4903688f72a9 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include "symlink.h" | 49 | #include "symlink.h" |
50 | #include "sysfile.h" | 50 | #include "sysfile.h" |
51 | #include "uptodate.h" | 51 | #include "uptodate.h" |
52 | #include "xattr.h" | ||
52 | 53 | ||
53 | #include "buffer_head_io.h" | 54 | #include "buffer_head_io.h" |
54 | 55 | ||
@@ -219,6 +220,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
219 | struct super_block *sb; | 220 | struct super_block *sb; |
220 | struct ocfs2_super *osb; | 221 | struct ocfs2_super *osb; |
221 | int status = -EINVAL; | 222 | int status = -EINVAL; |
223 | int use_plocks = 1; | ||
222 | 224 | ||
223 | mlog_entry("(0x%p, size:%llu)\n", inode, | 225 | mlog_entry("(0x%p, size:%llu)\n", inode, |
224 | (unsigned long long)le64_to_cpu(fe->i_size)); | 226 | (unsigned long long)le64_to_cpu(fe->i_size)); |
@@ -226,6 +228,10 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
226 | sb = inode->i_sb; | 228 | sb = inode->i_sb; |
227 | osb = OCFS2_SB(sb); | 229 | osb = OCFS2_SB(sb); |
228 | 230 | ||
231 | if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) || | ||
232 | ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks()) | ||
233 | use_plocks = 0; | ||
234 | |||
229 | /* this means that read_inode cannot create a superblock inode | 235 | /* this means that read_inode cannot create a superblock inode |
230 | * today. change if needed. */ | 236 | * today. change if needed. */ |
231 | if (!OCFS2_IS_VALID_DINODE(fe) || | 237 | if (!OCFS2_IS_VALID_DINODE(fe) || |
@@ -295,13 +301,19 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
295 | 301 | ||
296 | switch (inode->i_mode & S_IFMT) { | 302 | switch (inode->i_mode & S_IFMT) { |
297 | case S_IFREG: | 303 | case S_IFREG: |
298 | inode->i_fop = &ocfs2_fops; | 304 | if (use_plocks) |
305 | inode->i_fop = &ocfs2_fops; | ||
306 | else | ||
307 | inode->i_fop = &ocfs2_fops_no_plocks; | ||
299 | inode->i_op = &ocfs2_file_iops; | 308 | inode->i_op = &ocfs2_file_iops; |
300 | i_size_write(inode, le64_to_cpu(fe->i_size)); | 309 | i_size_write(inode, le64_to_cpu(fe->i_size)); |
301 | break; | 310 | break; |
302 | case S_IFDIR: | 311 | case S_IFDIR: |
303 | inode->i_op = &ocfs2_dir_iops; | 312 | inode->i_op = &ocfs2_dir_iops; |
304 | inode->i_fop = &ocfs2_dops; | 313 | if (use_plocks) |
314 | inode->i_fop = &ocfs2_dops; | ||
315 | else | ||
316 | inode->i_fop = &ocfs2_dops_no_plocks; | ||
305 | i_size_write(inode, le64_to_cpu(fe->i_size)); | 317 | i_size_write(inode, le64_to_cpu(fe->i_size)); |
306 | break; | 318 | break; |
307 | case S_IFLNK: | 319 | case S_IFLNK: |
@@ -448,8 +460,11 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
448 | } | 460 | } |
449 | } | 461 | } |
450 | 462 | ||
451 | status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, | 463 | if (can_lock) |
452 | can_lock ? inode : NULL); | 464 | status = ocfs2_read_blocks(inode, args->fi_blkno, 1, &bh, |
465 | OCFS2_BH_IGNORE_CACHE); | ||
466 | else | ||
467 | status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); | ||
453 | if (status < 0) { | 468 | if (status < 0) { |
454 | mlog_errno(status); | 469 | mlog_errno(status); |
455 | goto bail; | 470 | goto bail; |
@@ -522,6 +537,9 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, | |||
522 | * data and fast symlinks. | 537 | * data and fast symlinks. |
523 | */ | 538 | */ |
524 | if (fe->i_clusters) { | 539 | if (fe->i_clusters) { |
540 | if (ocfs2_should_order_data(inode)) | ||
541 | ocfs2_begin_ordered_truncate(inode, 0); | ||
542 | |||
525 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 543 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
526 | if (IS_ERR(handle)) { | 544 | if (IS_ERR(handle)) { |
527 | status = PTR_ERR(handle); | 545 | status = PTR_ERR(handle); |
@@ -730,6 +748,13 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
730 | goto bail_unlock_dir; | 748 | goto bail_unlock_dir; |
731 | } | 749 | } |
732 | 750 | ||
751 | /*Free extended attribute resources associated with this inode.*/ | ||
752 | status = ocfs2_xattr_remove(inode, di_bh); | ||
753 | if (status < 0) { | ||
754 | mlog_errno(status); | ||
755 | goto bail_unlock_dir; | ||
756 | } | ||
757 | |||
733 | status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode, | 758 | status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode, |
734 | orphan_dir_bh); | 759 | orphan_dir_bh); |
735 | if (status < 0) | 760 | if (status < 0) |
@@ -1081,6 +1106,8 @@ void ocfs2_clear_inode(struct inode *inode) | |||
1081 | oi->ip_last_trans = 0; | 1106 | oi->ip_last_trans = 0; |
1082 | oi->ip_dir_start_lookup = 0; | 1107 | oi->ip_dir_start_lookup = 0; |
1083 | oi->ip_blkno = 0ULL; | 1108 | oi->ip_blkno = 0ULL; |
1109 | jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal, | ||
1110 | &oi->ip_jinode); | ||
1084 | 1111 | ||
1085 | bail: | 1112 | bail: |
1086 | mlog_exit_void(); | 1113 | mlog_exit_void(); |
@@ -1107,58 +1134,6 @@ void ocfs2_drop_inode(struct inode *inode) | |||
1107 | } | 1134 | } |
1108 | 1135 | ||
1109 | /* | 1136 | /* |
1110 | * TODO: this should probably be merged into ocfs2_get_block | ||
1111 | * | ||
1112 | * However, you now need to pay attention to the cont_prepare_write() | ||
1113 | * stuff in ocfs2_get_block (that is, ocfs2_get_block pretty much | ||
1114 | * expects never to extend). | ||
1115 | */ | ||
1116 | struct buffer_head *ocfs2_bread(struct inode *inode, | ||
1117 | int block, int *err, int reada) | ||
1118 | { | ||
1119 | struct buffer_head *bh = NULL; | ||
1120 | int tmperr; | ||
1121 | u64 p_blkno; | ||
1122 | int readflags = OCFS2_BH_CACHED; | ||
1123 | |||
1124 | if (reada) | ||
1125 | readflags |= OCFS2_BH_READAHEAD; | ||
1126 | |||
1127 | if (((u64)block << inode->i_sb->s_blocksize_bits) >= | ||
1128 | i_size_read(inode)) { | ||
1129 | BUG_ON(!reada); | ||
1130 | return NULL; | ||
1131 | } | ||
1132 | |||
1133 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
1134 | tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, | ||
1135 | NULL); | ||
1136 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
1137 | if (tmperr < 0) { | ||
1138 | mlog_errno(tmperr); | ||
1139 | goto fail; | ||
1140 | } | ||
1141 | |||
1142 | tmperr = ocfs2_read_block(OCFS2_SB(inode->i_sb), p_blkno, &bh, | ||
1143 | readflags, inode); | ||
1144 | if (tmperr < 0) | ||
1145 | goto fail; | ||
1146 | |||
1147 | tmperr = 0; | ||
1148 | |||
1149 | *err = 0; | ||
1150 | return bh; | ||
1151 | |||
1152 | fail: | ||
1153 | if (bh) { | ||
1154 | brelse(bh); | ||
1155 | bh = NULL; | ||
1156 | } | ||
1157 | *err = -EIO; | ||
1158 | return NULL; | ||
1159 | } | ||
1160 | |||
1161 | /* | ||
1162 | * This is called from our getattr. | 1137 | * This is called from our getattr. |
1163 | */ | 1138 | */ |
1164 | int ocfs2_inode_revalidate(struct dentry *dentry) | 1139 | int ocfs2_inode_revalidate(struct dentry *dentry) |
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 390a85596aa0..2f37af9bcc4a 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
@@ -40,6 +40,9 @@ struct ocfs2_inode_info | |||
40 | /* protects allocation changes on this inode. */ | 40 | /* protects allocation changes on this inode. */ |
41 | struct rw_semaphore ip_alloc_sem; | 41 | struct rw_semaphore ip_alloc_sem; |
42 | 42 | ||
43 | /* protects extended attribute changes on this inode */ | ||
44 | struct rw_semaphore ip_xattr_sem; | ||
45 | |||
43 | /* These fields are protected by ip_lock */ | 46 | /* These fields are protected by ip_lock */ |
44 | spinlock_t ip_lock; | 47 | spinlock_t ip_lock; |
45 | u32 ip_open_count; | 48 | u32 ip_open_count; |
@@ -68,6 +71,7 @@ struct ocfs2_inode_info | |||
68 | struct ocfs2_extent_map ip_extent_map; | 71 | struct ocfs2_extent_map ip_extent_map; |
69 | 72 | ||
70 | struct inode vfs_inode; | 73 | struct inode vfs_inode; |
74 | struct jbd2_inode ip_jinode; | ||
71 | }; | 75 | }; |
72 | 76 | ||
73 | /* | 77 | /* |
@@ -113,8 +117,6 @@ extern struct kmem_cache *ocfs2_inode_cache; | |||
113 | 117 | ||
114 | extern const struct address_space_operations ocfs2_aops; | 118 | extern const struct address_space_operations ocfs2_aops; |
115 | 119 | ||
116 | struct buffer_head *ocfs2_bread(struct inode *inode, int block, | ||
117 | int *err, int reada); | ||
118 | void ocfs2_clear_inode(struct inode *inode); | 120 | void ocfs2_clear_inode(struct inode *inode); |
119 | void ocfs2_delete_inode(struct inode *inode); | 121 | void ocfs2_delete_inode(struct inode *inode); |
120 | void ocfs2_drop_inode(struct inode *inode); | 122 | void ocfs2_drop_inode(struct inode *inode); |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 7b142f0ce995..9fcd36dcc9a0 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -102,8 +102,7 @@ bail_unlock: | |||
102 | bail: | 102 | bail: |
103 | mutex_unlock(&inode->i_mutex); | 103 | mutex_unlock(&inode->i_mutex); |
104 | 104 | ||
105 | if (bh) | 105 | brelse(bh); |
106 | brelse(bh); | ||
107 | 106 | ||
108 | mlog_exit(status); | 107 | mlog_exit(status); |
109 | return status; | 108 | return status; |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 7a37240f7a31..81e40677eecb 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -215,9 +215,9 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) | |||
215 | goto finally; | 215 | goto finally; |
216 | } | 216 | } |
217 | 217 | ||
218 | journal_lock_updates(journal->j_journal); | 218 | jbd2_journal_lock_updates(journal->j_journal); |
219 | status = journal_flush(journal->j_journal); | 219 | status = jbd2_journal_flush(journal->j_journal); |
220 | journal_unlock_updates(journal->j_journal); | 220 | jbd2_journal_unlock_updates(journal->j_journal); |
221 | if (status < 0) { | 221 | if (status < 0) { |
222 | up_write(&journal->j_trans_barrier); | 222 | up_write(&journal->j_trans_barrier); |
223 | mlog_errno(status); | 223 | mlog_errno(status); |
@@ -264,7 +264,7 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) | |||
264 | 264 | ||
265 | down_read(&osb->journal->j_trans_barrier); | 265 | down_read(&osb->journal->j_trans_barrier); |
266 | 266 | ||
267 | handle = journal_start(journal, max_buffs); | 267 | handle = jbd2_journal_start(journal, max_buffs); |
268 | if (IS_ERR(handle)) { | 268 | if (IS_ERR(handle)) { |
269 | up_read(&osb->journal->j_trans_barrier); | 269 | up_read(&osb->journal->j_trans_barrier); |
270 | 270 | ||
@@ -290,7 +290,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb, | |||
290 | 290 | ||
291 | BUG_ON(!handle); | 291 | BUG_ON(!handle); |
292 | 292 | ||
293 | ret = journal_stop(handle); | 293 | ret = jbd2_journal_stop(handle); |
294 | if (ret < 0) | 294 | if (ret < 0) |
295 | mlog_errno(ret); | 295 | mlog_errno(ret); |
296 | 296 | ||
@@ -304,7 +304,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb, | |||
304 | * transaction. extend_trans will either extend the current handle by | 304 | * transaction. extend_trans will either extend the current handle by |
305 | * nblocks, or commit it and start a new one with nblocks credits. | 305 | * nblocks, or commit it and start a new one with nblocks credits. |
306 | * | 306 | * |
307 | * This might call journal_restart() which will commit dirty buffers | 307 | * This might call jbd2_journal_restart() which will commit dirty buffers |
308 | * and then restart the transaction. Before calling | 308 | * and then restart the transaction. Before calling |
309 | * ocfs2_extend_trans(), any changed blocks should have been | 309 | * ocfs2_extend_trans(), any changed blocks should have been |
310 | * dirtied. After calling it, all blocks which need to be changed must | 310 | * dirtied. After calling it, all blocks which need to be changed must |
@@ -332,7 +332,7 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks) | |||
332 | #ifdef CONFIG_OCFS2_DEBUG_FS | 332 | #ifdef CONFIG_OCFS2_DEBUG_FS |
333 | status = 1; | 333 | status = 1; |
334 | #else | 334 | #else |
335 | status = journal_extend(handle, nblocks); | 335 | status = jbd2_journal_extend(handle, nblocks); |
336 | if (status < 0) { | 336 | if (status < 0) { |
337 | mlog_errno(status); | 337 | mlog_errno(status); |
338 | goto bail; | 338 | goto bail; |
@@ -340,8 +340,10 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks) | |||
340 | #endif | 340 | #endif |
341 | 341 | ||
342 | if (status > 0) { | 342 | if (status > 0) { |
343 | mlog(0, "journal_extend failed, trying journal_restart\n"); | 343 | mlog(0, |
344 | status = journal_restart(handle, nblocks); | 344 | "jbd2_journal_extend failed, trying " |
345 | "jbd2_journal_restart\n"); | ||
346 | status = jbd2_journal_restart(handle, nblocks); | ||
345 | if (status < 0) { | 347 | if (status < 0) { |
346 | mlog_errno(status); | 348 | mlog_errno(status); |
347 | goto bail; | 349 | goto bail; |
@@ -393,11 +395,11 @@ int ocfs2_journal_access(handle_t *handle, | |||
393 | switch (type) { | 395 | switch (type) { |
394 | case OCFS2_JOURNAL_ACCESS_CREATE: | 396 | case OCFS2_JOURNAL_ACCESS_CREATE: |
395 | case OCFS2_JOURNAL_ACCESS_WRITE: | 397 | case OCFS2_JOURNAL_ACCESS_WRITE: |
396 | status = journal_get_write_access(handle, bh); | 398 | status = jbd2_journal_get_write_access(handle, bh); |
397 | break; | 399 | break; |
398 | 400 | ||
399 | case OCFS2_JOURNAL_ACCESS_UNDO: | 401 | case OCFS2_JOURNAL_ACCESS_UNDO: |
400 | status = journal_get_undo_access(handle, bh); | 402 | status = jbd2_journal_get_undo_access(handle, bh); |
401 | break; | 403 | break; |
402 | 404 | ||
403 | default: | 405 | default: |
@@ -422,7 +424,7 @@ int ocfs2_journal_dirty(handle_t *handle, | |||
422 | mlog_entry("(bh->b_blocknr=%llu)\n", | 424 | mlog_entry("(bh->b_blocknr=%llu)\n", |
423 | (unsigned long long)bh->b_blocknr); | 425 | (unsigned long long)bh->b_blocknr); |
424 | 426 | ||
425 | status = journal_dirty_metadata(handle, bh); | 427 | status = jbd2_journal_dirty_metadata(handle, bh); |
426 | if (status < 0) | 428 | if (status < 0) |
427 | mlog(ML_ERROR, "Could not dirty metadata buffer. " | 429 | mlog(ML_ERROR, "Could not dirty metadata buffer. " |
428 | "(bh->b_blocknr=%llu)\n", | 430 | "(bh->b_blocknr=%llu)\n", |
@@ -432,6 +434,7 @@ int ocfs2_journal_dirty(handle_t *handle, | |||
432 | return status; | 434 | return status; |
433 | } | 435 | } |
434 | 436 | ||
437 | #ifdef CONFIG_OCFS2_COMPAT_JBD | ||
435 | int ocfs2_journal_dirty_data(handle_t *handle, | 438 | int ocfs2_journal_dirty_data(handle_t *handle, |
436 | struct buffer_head *bh) | 439 | struct buffer_head *bh) |
437 | { | 440 | { |
@@ -443,8 +446,9 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
443 | 446 | ||
444 | return err; | 447 | return err; |
445 | } | 448 | } |
449 | #endif | ||
446 | 450 | ||
447 | #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD_DEFAULT_MAX_COMMIT_AGE) | 451 | #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE) |
448 | 452 | ||
449 | void ocfs2_set_journal_params(struct ocfs2_super *osb) | 453 | void ocfs2_set_journal_params(struct ocfs2_super *osb) |
450 | { | 454 | { |
@@ -457,9 +461,9 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb) | |||
457 | spin_lock(&journal->j_state_lock); | 461 | spin_lock(&journal->j_state_lock); |
458 | journal->j_commit_interval = commit_interval; | 462 | journal->j_commit_interval = commit_interval; |
459 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) | 463 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) |
460 | journal->j_flags |= JFS_BARRIER; | 464 | journal->j_flags |= JBD2_BARRIER; |
461 | else | 465 | else |
462 | journal->j_flags &= ~JFS_BARRIER; | 466 | journal->j_flags &= ~JBD2_BARRIER; |
463 | spin_unlock(&journal->j_state_lock); | 467 | spin_unlock(&journal->j_state_lock); |
464 | } | 468 | } |
465 | 469 | ||
@@ -524,14 +528,14 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) | |||
524 | mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters); | 528 | mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters); |
525 | 529 | ||
526 | /* call the kernels journal init function now */ | 530 | /* call the kernels journal init function now */ |
527 | j_journal = journal_init_inode(inode); | 531 | j_journal = jbd2_journal_init_inode(inode); |
528 | if (j_journal == NULL) { | 532 | if (j_journal == NULL) { |
529 | mlog(ML_ERROR, "Linux journal layer error\n"); | 533 | mlog(ML_ERROR, "Linux journal layer error\n"); |
530 | status = -EINVAL; | 534 | status = -EINVAL; |
531 | goto done; | 535 | goto done; |
532 | } | 536 | } |
533 | 537 | ||
534 | mlog(0, "Returned from journal_init_inode\n"); | 538 | mlog(0, "Returned from jbd2_journal_init_inode\n"); |
535 | mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen); | 539 | mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen); |
536 | 540 | ||
537 | *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & | 541 | *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & |
@@ -550,8 +554,7 @@ done: | |||
550 | if (status < 0) { | 554 | if (status < 0) { |
551 | if (inode_lock) | 555 | if (inode_lock) |
552 | ocfs2_inode_unlock(inode, 1); | 556 | ocfs2_inode_unlock(inode, 1); |
553 | if (bh != NULL) | 557 | brelse(bh); |
554 | brelse(bh); | ||
555 | if (inode) { | 558 | if (inode) { |
556 | OCFS2_I(inode)->ip_open_count--; | 559 | OCFS2_I(inode)->ip_open_count--; |
557 | iput(inode); | 560 | iput(inode); |
@@ -639,7 +642,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) | |||
639 | if (journal->j_state != OCFS2_JOURNAL_LOADED) | 642 | if (journal->j_state != OCFS2_JOURNAL_LOADED) |
640 | goto done; | 643 | goto done; |
641 | 644 | ||
642 | /* need to inc inode use count as journal_destroy will iput. */ | 645 | /* need to inc inode use count - jbd2_journal_destroy will iput. */ |
643 | if (!igrab(inode)) | 646 | if (!igrab(inode)) |
644 | BUG(); | 647 | BUG(); |
645 | 648 | ||
@@ -668,9 +671,9 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) | |||
668 | BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0); | 671 | BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0); |
669 | 672 | ||
670 | if (ocfs2_mount_local(osb)) { | 673 | if (ocfs2_mount_local(osb)) { |
671 | journal_lock_updates(journal->j_journal); | 674 | jbd2_journal_lock_updates(journal->j_journal); |
672 | status = journal_flush(journal->j_journal); | 675 | status = jbd2_journal_flush(journal->j_journal); |
673 | journal_unlock_updates(journal->j_journal); | 676 | jbd2_journal_unlock_updates(journal->j_journal); |
674 | if (status < 0) | 677 | if (status < 0) |
675 | mlog_errno(status); | 678 | mlog_errno(status); |
676 | } | 679 | } |
@@ -686,7 +689,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) | |||
686 | } | 689 | } |
687 | 690 | ||
688 | /* Shutdown the kernel journal system */ | 691 | /* Shutdown the kernel journal system */ |
689 | journal_destroy(journal->j_journal); | 692 | jbd2_journal_destroy(journal->j_journal); |
690 | 693 | ||
691 | OCFS2_I(inode)->ip_open_count--; | 694 | OCFS2_I(inode)->ip_open_count--; |
692 | 695 | ||
@@ -711,15 +714,15 @@ static void ocfs2_clear_journal_error(struct super_block *sb, | |||
711 | { | 714 | { |
712 | int olderr; | 715 | int olderr; |
713 | 716 | ||
714 | olderr = journal_errno(journal); | 717 | olderr = jbd2_journal_errno(journal); |
715 | if (olderr) { | 718 | if (olderr) { |
716 | mlog(ML_ERROR, "File system error %d recorded in " | 719 | mlog(ML_ERROR, "File system error %d recorded in " |
717 | "journal %u.\n", olderr, slot); | 720 | "journal %u.\n", olderr, slot); |
718 | mlog(ML_ERROR, "File system on device %s needs checking.\n", | 721 | mlog(ML_ERROR, "File system on device %s needs checking.\n", |
719 | sb->s_id); | 722 | sb->s_id); |
720 | 723 | ||
721 | journal_ack_err(journal); | 724 | jbd2_journal_ack_err(journal); |
722 | journal_clear_err(journal); | 725 | jbd2_journal_clear_err(journal); |
723 | } | 726 | } |
724 | } | 727 | } |
725 | 728 | ||
@@ -734,7 +737,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed) | |||
734 | 737 | ||
735 | osb = journal->j_osb; | 738 | osb = journal->j_osb; |
736 | 739 | ||
737 | status = journal_load(journal->j_journal); | 740 | status = jbd2_journal_load(journal->j_journal); |
738 | if (status < 0) { | 741 | if (status < 0) { |
739 | mlog(ML_ERROR, "Failed to load journal!\n"); | 742 | mlog(ML_ERROR, "Failed to load journal!\n"); |
740 | goto done; | 743 | goto done; |
@@ -778,7 +781,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full) | |||
778 | 781 | ||
779 | BUG_ON(!journal); | 782 | BUG_ON(!journal); |
780 | 783 | ||
781 | status = journal_wipe(journal->j_journal, full); | 784 | status = jbd2_journal_wipe(journal->j_journal, full); |
782 | if (status < 0) { | 785 | if (status < 0) { |
783 | mlog_errno(status); | 786 | mlog_errno(status); |
784 | goto bail; | 787 | goto bail; |
@@ -847,9 +850,8 @@ static int ocfs2_force_read_journal(struct inode *inode) | |||
847 | 850 | ||
848 | /* We are reading journal data which should not | 851 | /* We are reading journal data which should not |
849 | * be put in the uptodate cache */ | 852 | * be put in the uptodate cache */ |
850 | status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb), | 853 | status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb), |
851 | p_blkno, p_blocks, bhs, 0, | 854 | p_blkno, p_blocks, bhs); |
852 | NULL); | ||
853 | if (status < 0) { | 855 | if (status < 0) { |
854 | mlog_errno(status); | 856 | mlog_errno(status); |
855 | goto bail; | 857 | goto bail; |
@@ -865,8 +867,7 @@ static int ocfs2_force_read_journal(struct inode *inode) | |||
865 | 867 | ||
866 | bail: | 868 | bail: |
867 | for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++) | 869 | for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++) |
868 | if (bhs[i]) | 870 | brelse(bhs[i]); |
869 | brelse(bhs[i]); | ||
870 | mlog_exit(status); | 871 | mlog_exit(status); |
871 | return status; | 872 | return status; |
872 | } | 873 | } |
@@ -1133,7 +1134,8 @@ static int ocfs2_read_journal_inode(struct ocfs2_super *osb, | |||
1133 | } | 1134 | } |
1134 | SET_INODE_JOURNAL(inode); | 1135 | SET_INODE_JOURNAL(inode); |
1135 | 1136 | ||
1136 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode); | 1137 | status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, bh, |
1138 | OCFS2_BH_IGNORE_CACHE); | ||
1137 | if (status < 0) { | 1139 | if (status < 0) { |
1138 | mlog_errno(status); | 1140 | mlog_errno(status); |
1139 | goto bail; | 1141 | goto bail; |
@@ -1229,19 +1231,19 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
1229 | } | 1231 | } |
1230 | 1232 | ||
1231 | mlog(0, "calling journal_init_inode\n"); | 1233 | mlog(0, "calling journal_init_inode\n"); |
1232 | journal = journal_init_inode(inode); | 1234 | journal = jbd2_journal_init_inode(inode); |
1233 | if (journal == NULL) { | 1235 | if (journal == NULL) { |
1234 | mlog(ML_ERROR, "Linux journal layer error\n"); | 1236 | mlog(ML_ERROR, "Linux journal layer error\n"); |
1235 | status = -EIO; | 1237 | status = -EIO; |
1236 | goto done; | 1238 | goto done; |
1237 | } | 1239 | } |
1238 | 1240 | ||
1239 | status = journal_load(journal); | 1241 | status = jbd2_journal_load(journal); |
1240 | if (status < 0) { | 1242 | if (status < 0) { |
1241 | mlog_errno(status); | 1243 | mlog_errno(status); |
1242 | if (!igrab(inode)) | 1244 | if (!igrab(inode)) |
1243 | BUG(); | 1245 | BUG(); |
1244 | journal_destroy(journal); | 1246 | jbd2_journal_destroy(journal); |
1245 | goto done; | 1247 | goto done; |
1246 | } | 1248 | } |
1247 | 1249 | ||
@@ -1249,9 +1251,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
1249 | 1251 | ||
1250 | /* wipe the journal */ | 1252 | /* wipe the journal */ |
1251 | mlog(0, "flushing the journal.\n"); | 1253 | mlog(0, "flushing the journal.\n"); |
1252 | journal_lock_updates(journal); | 1254 | jbd2_journal_lock_updates(journal); |
1253 | status = journal_flush(journal); | 1255 | status = jbd2_journal_flush(journal); |
1254 | journal_unlock_updates(journal); | 1256 | jbd2_journal_unlock_updates(journal); |
1255 | if (status < 0) | 1257 | if (status < 0) |
1256 | mlog_errno(status); | 1258 | mlog_errno(status); |
1257 | 1259 | ||
@@ -1272,7 +1274,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
1272 | if (!igrab(inode)) | 1274 | if (!igrab(inode)) |
1273 | BUG(); | 1275 | BUG(); |
1274 | 1276 | ||
1275 | journal_destroy(journal); | 1277 | jbd2_journal_destroy(journal); |
1276 | 1278 | ||
1277 | done: | 1279 | done: |
1278 | /* drop the lock on this nodes journal */ | 1280 | /* drop the lock on this nodes journal */ |
@@ -1282,8 +1284,7 @@ done: | |||
1282 | if (inode) | 1284 | if (inode) |
1283 | iput(inode); | 1285 | iput(inode); |
1284 | 1286 | ||
1285 | if (bh) | 1287 | brelse(bh); |
1286 | brelse(bh); | ||
1287 | 1288 | ||
1288 | mlog_exit(status); | 1289 | mlog_exit(status); |
1289 | return status; | 1290 | return status; |
@@ -1418,13 +1419,13 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | |||
1418 | { | 1419 | { |
1419 | unsigned int node_num; | 1420 | unsigned int node_num; |
1420 | int status, i; | 1421 | int status, i; |
1422 | u32 gen; | ||
1421 | struct buffer_head *bh = NULL; | 1423 | struct buffer_head *bh = NULL; |
1422 | struct ocfs2_dinode *di; | 1424 | struct ocfs2_dinode *di; |
1423 | 1425 | ||
1424 | /* This is called with the super block cluster lock, so we | 1426 | /* This is called with the super block cluster lock, so we |
1425 | * know that the slot map can't change underneath us. */ | 1427 | * know that the slot map can't change underneath us. */ |
1426 | 1428 | ||
1427 | spin_lock(&osb->osb_lock); | ||
1428 | for (i = 0; i < osb->max_slots; i++) { | 1429 | for (i = 0; i < osb->max_slots; i++) { |
1429 | /* Read journal inode to get the recovery generation */ | 1430 | /* Read journal inode to get the recovery generation */ |
1430 | status = ocfs2_read_journal_inode(osb, i, &bh, NULL); | 1431 | status = ocfs2_read_journal_inode(osb, i, &bh, NULL); |
@@ -1433,23 +1434,31 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | |||
1433 | goto bail; | 1434 | goto bail; |
1434 | } | 1435 | } |
1435 | di = (struct ocfs2_dinode *)bh->b_data; | 1436 | di = (struct ocfs2_dinode *)bh->b_data; |
1436 | osb->slot_recovery_generations[i] = | 1437 | gen = ocfs2_get_recovery_generation(di); |
1437 | ocfs2_get_recovery_generation(di); | ||
1438 | brelse(bh); | 1438 | brelse(bh); |
1439 | bh = NULL; | 1439 | bh = NULL; |
1440 | 1440 | ||
1441 | spin_lock(&osb->osb_lock); | ||
1442 | osb->slot_recovery_generations[i] = gen; | ||
1443 | |||
1441 | mlog(0, "Slot %u recovery generation is %u\n", i, | 1444 | mlog(0, "Slot %u recovery generation is %u\n", i, |
1442 | osb->slot_recovery_generations[i]); | 1445 | osb->slot_recovery_generations[i]); |
1443 | 1446 | ||
1444 | if (i == osb->slot_num) | 1447 | if (i == osb->slot_num) { |
1448 | spin_unlock(&osb->osb_lock); | ||
1445 | continue; | 1449 | continue; |
1450 | } | ||
1446 | 1451 | ||
1447 | status = ocfs2_slot_to_node_num_locked(osb, i, &node_num); | 1452 | status = ocfs2_slot_to_node_num_locked(osb, i, &node_num); |
1448 | if (status == -ENOENT) | 1453 | if (status == -ENOENT) { |
1454 | spin_unlock(&osb->osb_lock); | ||
1449 | continue; | 1455 | continue; |
1456 | } | ||
1450 | 1457 | ||
1451 | if (__ocfs2_recovery_map_test(osb, node_num)) | 1458 | if (__ocfs2_recovery_map_test(osb, node_num)) { |
1459 | spin_unlock(&osb->osb_lock); | ||
1452 | continue; | 1460 | continue; |
1461 | } | ||
1453 | spin_unlock(&osb->osb_lock); | 1462 | spin_unlock(&osb->osb_lock); |
1454 | 1463 | ||
1455 | /* Ok, we have a slot occupied by another node which | 1464 | /* Ok, we have a slot occupied by another node which |
@@ -1465,10 +1474,7 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | |||
1465 | mlog_errno(status); | 1474 | mlog_errno(status); |
1466 | goto bail; | 1475 | goto bail; |
1467 | } | 1476 | } |
1468 | |||
1469 | spin_lock(&osb->osb_lock); | ||
1470 | } | 1477 | } |
1471 | spin_unlock(&osb->osb_lock); | ||
1472 | 1478 | ||
1473 | status = 0; | 1479 | status = 0; |
1474 | bail: | 1480 | bail: |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 2178ebffa05f..d4d14e9a3cea 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -27,7 +27,12 @@ | |||
27 | #define OCFS2_JOURNAL_H | 27 | #define OCFS2_JOURNAL_H |
28 | 28 | ||
29 | #include <linux/fs.h> | 29 | #include <linux/fs.h> |
30 | #include <linux/jbd.h> | 30 | #ifndef CONFIG_OCFS2_COMPAT_JBD |
31 | # include <linux/jbd2.h> | ||
32 | #else | ||
33 | # include <linux/jbd.h> | ||
34 | # include "ocfs2_jbd_compat.h" | ||
35 | #endif | ||
31 | 36 | ||
32 | enum ocfs2_journal_state { | 37 | enum ocfs2_journal_state { |
33 | OCFS2_JOURNAL_FREE = 0, | 38 | OCFS2_JOURNAL_FREE = 0, |
@@ -215,8 +220,8 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode) | |||
215 | * buffer. Will have to call ocfs2_journal_dirty once | 220 | * buffer. Will have to call ocfs2_journal_dirty once |
216 | * we've actually dirtied it. Type is one of . or . | 221 | * we've actually dirtied it. Type is one of . or . |
217 | * ocfs2_journal_dirty - Mark a journalled buffer as having dirty data. | 222 | * ocfs2_journal_dirty - Mark a journalled buffer as having dirty data. |
218 | * ocfs2_journal_dirty_data - Indicate that a data buffer should go out before | 223 | * ocfs2_jbd2_file_inode - Mark an inode so that its data goes out before |
219 | * the current handle commits. | 224 | * the current handle commits. |
220 | */ | 225 | */ |
221 | 226 | ||
222 | /* You must always start_trans with a number of buffs > 0, but it's | 227 | /* You must always start_trans with a number of buffs > 0, but it's |
@@ -268,8 +273,10 @@ int ocfs2_journal_access(handle_t *handle, | |||
268 | */ | 273 | */ |
269 | int ocfs2_journal_dirty(handle_t *handle, | 274 | int ocfs2_journal_dirty(handle_t *handle, |
270 | struct buffer_head *bh); | 275 | struct buffer_head *bh); |
276 | #ifdef CONFIG_OCFS2_COMPAT_JBD | ||
271 | int ocfs2_journal_dirty_data(handle_t *handle, | 277 | int ocfs2_journal_dirty_data(handle_t *handle, |
272 | struct buffer_head *bh); | 278 | struct buffer_head *bh); |
279 | #endif | ||
273 | 280 | ||
274 | /* | 281 | /* |
275 | * Credit Macros: | 282 | * Credit Macros: |
@@ -283,6 +290,9 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
283 | /* simple file updates like chmod, etc. */ | 290 | /* simple file updates like chmod, etc. */ |
284 | #define OCFS2_INODE_UPDATE_CREDITS 1 | 291 | #define OCFS2_INODE_UPDATE_CREDITS 1 |
285 | 292 | ||
293 | /* extended attribute block update */ | ||
294 | #define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1 | ||
295 | |||
286 | /* group extend. inode update and last group update. */ | 296 | /* group extend. inode update and last group update. */ |
287 | #define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) | 297 | #define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) |
288 | 298 | ||
@@ -340,11 +350,23 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
340 | #define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \ | 350 | #define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \ |
341 | + OCFS2_UNLINK_CREDITS) | 351 | + OCFS2_UNLINK_CREDITS) |
342 | 352 | ||
353 | /* global bitmap dinode, group desc., relinked group, | ||
354 | * suballocator dinode, group desc., relinked group, | ||
355 | * dinode, xattr block */ | ||
356 | #define OCFS2_XATTR_BLOCK_CREATE_CREDITS (OCFS2_SUBALLOC_ALLOC * 2 + \ | ||
357 | + OCFS2_INODE_UPDATE_CREDITS \ | ||
358 | + OCFS2_XATTR_BLOCK_UPDATE_CREDITS) | ||
359 | |||
360 | /* | ||
361 | * Please note that the caller must make sure that root_el is the root | ||
362 | * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise | ||
363 | * the result may be wrong. | ||
364 | */ | ||
343 | static inline int ocfs2_calc_extend_credits(struct super_block *sb, | 365 | static inline int ocfs2_calc_extend_credits(struct super_block *sb, |
344 | struct ocfs2_dinode *fe, | 366 | struct ocfs2_extent_list *root_el, |
345 | u32 bits_wanted) | 367 | u32 bits_wanted) |
346 | { | 368 | { |
347 | int bitmap_blocks, sysfile_bitmap_blocks, dinode_blocks; | 369 | int bitmap_blocks, sysfile_bitmap_blocks, extent_blocks; |
348 | 370 | ||
349 | /* bitmap dinode, group desc. + relinked group. */ | 371 | /* bitmap dinode, group desc. + relinked group. */ |
350 | bitmap_blocks = OCFS2_SUBALLOC_ALLOC; | 372 | bitmap_blocks = OCFS2_SUBALLOC_ALLOC; |
@@ -355,16 +377,16 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb, | |||
355 | * however many metadata chunks needed * a remaining suballoc | 377 | * however many metadata chunks needed * a remaining suballoc |
356 | * alloc. */ | 378 | * alloc. */ |
357 | sysfile_bitmap_blocks = 1 + | 379 | sysfile_bitmap_blocks = 1 + |
358 | (OCFS2_SUBALLOC_ALLOC - 1) * ocfs2_extend_meta_needed(fe); | 380 | (OCFS2_SUBALLOC_ALLOC - 1) * ocfs2_extend_meta_needed(root_el); |
359 | 381 | ||
360 | /* this does not include *new* metadata blocks, which are | 382 | /* this does not include *new* metadata blocks, which are |
361 | * accounted for in sysfile_bitmap_blocks. fe + | 383 | * accounted for in sysfile_bitmap_blocks. root_el + |
362 | * prev. last_eb_blk + blocks along edge of tree. | 384 | * prev. last_eb_blk + blocks along edge of tree. |
363 | * calc_symlink_credits passes because we just need 1 | 385 | * calc_symlink_credits passes because we just need 1 |
364 | * credit for the dinode there. */ | 386 | * credit for the dinode there. */ |
365 | dinode_blocks = 1 + 1 + le16_to_cpu(fe->id2.i_list.l_tree_depth); | 387 | extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); |
366 | 388 | ||
367 | return bitmap_blocks + sysfile_bitmap_blocks + dinode_blocks; | 389 | return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks; |
368 | } | 390 | } |
369 | 391 | ||
370 | static inline int ocfs2_calc_symlink_credits(struct super_block *sb) | 392 | static inline int ocfs2_calc_symlink_credits(struct super_block *sb) |
@@ -415,4 +437,16 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb, | |||
415 | return credits; | 437 | return credits; |
416 | } | 438 | } |
417 | 439 | ||
440 | static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode) | ||
441 | { | ||
442 | return jbd2_journal_file_inode(handle, &OCFS2_I(inode)->ip_jinode); | ||
443 | } | ||
444 | |||
445 | static inline int ocfs2_begin_ordered_truncate(struct inode *inode, | ||
446 | loff_t new_size) | ||
447 | { | ||
448 | return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode, | ||
449 | new_size); | ||
450 | } | ||
451 | |||
418 | #endif /* OCFS2_JOURNAL_H */ | 452 | #endif /* OCFS2_JOURNAL_H */ |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 28e492e4ec88..687b28713c32 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
29 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
30 | #include <linux/bitops.h> | 30 | #include <linux/bitops.h> |
31 | #include <linux/debugfs.h> | ||
31 | 32 | ||
32 | #define MLOG_MASK_PREFIX ML_DISK_ALLOC | 33 | #define MLOG_MASK_PREFIX ML_DISK_ALLOC |
33 | #include <cluster/masklog.h> | 34 | #include <cluster/masklog.h> |
@@ -47,8 +48,6 @@ | |||
47 | 48 | ||
48 | #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) | 49 | #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) |
49 | 50 | ||
50 | static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb); | ||
51 | |||
52 | static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); | 51 | static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); |
53 | 52 | ||
54 | static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, | 53 | static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, |
@@ -75,24 +74,129 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
75 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, | 74 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, |
76 | struct inode *local_alloc_inode); | 75 | struct inode *local_alloc_inode); |
77 | 76 | ||
78 | static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb) | 77 | #ifdef CONFIG_OCFS2_FS_STATS |
78 | |||
79 | static int ocfs2_la_debug_open(struct inode *inode, struct file *file) | ||
80 | { | ||
81 | file->private_data = inode->i_private; | ||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | #define LA_DEBUG_BUF_SZ PAGE_CACHE_SIZE | ||
86 | #define LA_DEBUG_VER 1 | ||
87 | static ssize_t ocfs2_la_debug_read(struct file *file, char __user *userbuf, | ||
88 | size_t count, loff_t *ppos) | ||
89 | { | ||
90 | static DEFINE_MUTEX(la_debug_mutex); | ||
91 | struct ocfs2_super *osb = file->private_data; | ||
92 | int written, ret; | ||
93 | char *buf = osb->local_alloc_debug_buf; | ||
94 | |||
95 | mutex_lock(&la_debug_mutex); | ||
96 | memset(buf, 0, LA_DEBUG_BUF_SZ); | ||
97 | |||
98 | written = snprintf(buf, LA_DEBUG_BUF_SZ, | ||
99 | "0x%x\t0x%llx\t%u\t%u\t0x%x\n", | ||
100 | LA_DEBUG_VER, | ||
101 | (unsigned long long)osb->la_last_gd, | ||
102 | osb->local_alloc_default_bits, | ||
103 | osb->local_alloc_bits, osb->local_alloc_state); | ||
104 | |||
105 | ret = simple_read_from_buffer(userbuf, count, ppos, buf, written); | ||
106 | |||
107 | mutex_unlock(&la_debug_mutex); | ||
108 | return ret; | ||
109 | } | ||
110 | |||
111 | static const struct file_operations ocfs2_la_debug_fops = { | ||
112 | .open = ocfs2_la_debug_open, | ||
113 | .read = ocfs2_la_debug_read, | ||
114 | }; | ||
115 | |||
116 | static void ocfs2_init_la_debug(struct ocfs2_super *osb) | ||
117 | { | ||
118 | osb->local_alloc_debug_buf = kmalloc(LA_DEBUG_BUF_SZ, GFP_NOFS); | ||
119 | if (!osb->local_alloc_debug_buf) | ||
120 | return; | ||
121 | |||
122 | osb->local_alloc_debug = debugfs_create_file("local_alloc_stats", | ||
123 | S_IFREG|S_IRUSR, | ||
124 | osb->osb_debug_root, | ||
125 | osb, | ||
126 | &ocfs2_la_debug_fops); | ||
127 | if (!osb->local_alloc_debug) { | ||
128 | kfree(osb->local_alloc_debug_buf); | ||
129 | osb->local_alloc_debug_buf = NULL; | ||
130 | } | ||
131 | } | ||
132 | |||
133 | static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb) | ||
134 | { | ||
135 | if (osb->local_alloc_debug) | ||
136 | debugfs_remove(osb->local_alloc_debug); | ||
137 | |||
138 | if (osb->local_alloc_debug_buf) | ||
139 | kfree(osb->local_alloc_debug_buf); | ||
140 | |||
141 | osb->local_alloc_debug_buf = NULL; | ||
142 | osb->local_alloc_debug = NULL; | ||
143 | } | ||
144 | #else /* CONFIG_OCFS2_FS_STATS */ | ||
145 | static void ocfs2_init_la_debug(struct ocfs2_super *osb) | ||
146 | { | ||
147 | return; | ||
148 | } | ||
149 | static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb) | ||
150 | { | ||
151 | return; | ||
152 | } | ||
153 | #endif | ||
154 | |||
155 | static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) | ||
79 | { | 156 | { |
80 | BUG_ON(osb->s_clustersize_bits > 20); | 157 | return (osb->local_alloc_state == OCFS2_LA_THROTTLED || |
158 | osb->local_alloc_state == OCFS2_LA_ENABLED); | ||
159 | } | ||
81 | 160 | ||
82 | /* Size local alloc windows by the megabyte */ | 161 | void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, |
83 | return osb->local_alloc_size << (20 - osb->s_clustersize_bits); | 162 | unsigned int num_clusters) |
163 | { | ||
164 | spin_lock(&osb->osb_lock); | ||
165 | if (osb->local_alloc_state == OCFS2_LA_DISABLED || | ||
166 | osb->local_alloc_state == OCFS2_LA_THROTTLED) | ||
167 | if (num_clusters >= osb->local_alloc_default_bits) { | ||
168 | cancel_delayed_work(&osb->la_enable_wq); | ||
169 | osb->local_alloc_state = OCFS2_LA_ENABLED; | ||
170 | } | ||
171 | spin_unlock(&osb->osb_lock); | ||
172 | } | ||
173 | |||
174 | void ocfs2_la_enable_worker(struct work_struct *work) | ||
175 | { | ||
176 | struct ocfs2_super *osb = | ||
177 | container_of(work, struct ocfs2_super, | ||
178 | la_enable_wq.work); | ||
179 | spin_lock(&osb->osb_lock); | ||
180 | osb->local_alloc_state = OCFS2_LA_ENABLED; | ||
181 | spin_unlock(&osb->osb_lock); | ||
84 | } | 182 | } |
85 | 183 | ||
86 | /* | 184 | /* |
87 | * Tell us whether a given allocation should use the local alloc | 185 | * Tell us whether a given allocation should use the local alloc |
88 | * file. Otherwise, it has to go to the main bitmap. | 186 | * file. Otherwise, it has to go to the main bitmap. |
187 | * | ||
188 | * This function does semi-dirty reads of local alloc size and state! | ||
189 | * This is ok however, as the values are re-checked once under mutex. | ||
89 | */ | 190 | */ |
90 | int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) | 191 | int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) |
91 | { | 192 | { |
92 | int la_bits = ocfs2_local_alloc_window_bits(osb); | ||
93 | int ret = 0; | 193 | int ret = 0; |
194 | int la_bits; | ||
195 | |||
196 | spin_lock(&osb->osb_lock); | ||
197 | la_bits = osb->local_alloc_bits; | ||
94 | 198 | ||
95 | if (osb->local_alloc_state != OCFS2_LA_ENABLED) | 199 | if (!ocfs2_la_state_enabled(osb)) |
96 | goto bail; | 200 | goto bail; |
97 | 201 | ||
98 | /* la_bits should be at least twice the size (in clusters) of | 202 | /* la_bits should be at least twice the size (in clusters) of |
@@ -106,6 +210,7 @@ int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) | |||
106 | bail: | 210 | bail: |
107 | mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", | 211 | mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", |
108 | osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); | 212 | osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); |
213 | spin_unlock(&osb->osb_lock); | ||
109 | return ret; | 214 | return ret; |
110 | } | 215 | } |
111 | 216 | ||
@@ -120,14 +225,18 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
120 | 225 | ||
121 | mlog_entry_void(); | 226 | mlog_entry_void(); |
122 | 227 | ||
123 | if (osb->local_alloc_size == 0) | 228 | ocfs2_init_la_debug(osb); |
229 | |||
230 | if (osb->local_alloc_bits == 0) | ||
124 | goto bail; | 231 | goto bail; |
125 | 232 | ||
126 | if (ocfs2_local_alloc_window_bits(osb) >= osb->bitmap_cpg) { | 233 | if (osb->local_alloc_bits >= osb->bitmap_cpg) { |
127 | mlog(ML_NOTICE, "Requested local alloc window %d is larger " | 234 | mlog(ML_NOTICE, "Requested local alloc window %d is larger " |
128 | "than max possible %u. Using defaults.\n", | 235 | "than max possible %u. Using defaults.\n", |
129 | ocfs2_local_alloc_window_bits(osb), (osb->bitmap_cpg - 1)); | 236 | osb->local_alloc_bits, (osb->bitmap_cpg - 1)); |
130 | osb->local_alloc_size = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; | 237 | osb->local_alloc_bits = |
238 | ocfs2_megabytes_to_clusters(osb->sb, | ||
239 | OCFS2_DEFAULT_LOCAL_ALLOC_SIZE); | ||
131 | } | 240 | } |
132 | 241 | ||
133 | /* read the alloc off disk */ | 242 | /* read the alloc off disk */ |
@@ -139,8 +248,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
139 | goto bail; | 248 | goto bail; |
140 | } | 249 | } |
141 | 250 | ||
142 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, | 251 | status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, |
143 | &alloc_bh, 0, inode); | 252 | &alloc_bh, OCFS2_BH_IGNORE_CACHE); |
144 | if (status < 0) { | 253 | if (status < 0) { |
145 | mlog_errno(status); | 254 | mlog_errno(status); |
146 | goto bail; | 255 | goto bail; |
@@ -185,13 +294,14 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
185 | 294 | ||
186 | bail: | 295 | bail: |
187 | if (status < 0) | 296 | if (status < 0) |
188 | if (alloc_bh) | 297 | brelse(alloc_bh); |
189 | brelse(alloc_bh); | ||
190 | if (inode) | 298 | if (inode) |
191 | iput(inode); | 299 | iput(inode); |
192 | 300 | ||
193 | mlog(0, "Local alloc window bits = %d\n", | 301 | if (status < 0) |
194 | ocfs2_local_alloc_window_bits(osb)); | 302 | ocfs2_shutdown_la_debug(osb); |
303 | |||
304 | mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits); | ||
195 | 305 | ||
196 | mlog_exit(status); | 306 | mlog_exit(status); |
197 | return status; | 307 | return status; |
@@ -217,6 +327,11 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) | |||
217 | 327 | ||
218 | mlog_entry_void(); | 328 | mlog_entry_void(); |
219 | 329 | ||
330 | cancel_delayed_work(&osb->la_enable_wq); | ||
331 | flush_workqueue(ocfs2_wq); | ||
332 | |||
333 | ocfs2_shutdown_la_debug(osb); | ||
334 | |||
220 | if (osb->local_alloc_state == OCFS2_LA_UNUSED) | 335 | if (osb->local_alloc_state == OCFS2_LA_UNUSED) |
221 | goto out; | 336 | goto out; |
222 | 337 | ||
@@ -295,8 +410,7 @@ out_commit: | |||
295 | ocfs2_commit_trans(osb, handle); | 410 | ocfs2_commit_trans(osb, handle); |
296 | 411 | ||
297 | out_unlock: | 412 | out_unlock: |
298 | if (main_bm_bh) | 413 | brelse(main_bm_bh); |
299 | brelse(main_bm_bh); | ||
300 | 414 | ||
301 | ocfs2_inode_unlock(main_bm_inode, 1); | 415 | ocfs2_inode_unlock(main_bm_inode, 1); |
302 | 416 | ||
@@ -345,8 +459,8 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, | |||
345 | 459 | ||
346 | mutex_lock(&inode->i_mutex); | 460 | mutex_lock(&inode->i_mutex); |
347 | 461 | ||
348 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, | 462 | status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, |
349 | &alloc_bh, 0, inode); | 463 | &alloc_bh, OCFS2_BH_IGNORE_CACHE); |
350 | if (status < 0) { | 464 | if (status < 0) { |
351 | mlog_errno(status); | 465 | mlog_errno(status); |
352 | goto bail; | 466 | goto bail; |
@@ -372,8 +486,7 @@ bail: | |||
372 | *alloc_copy = NULL; | 486 | *alloc_copy = NULL; |
373 | } | 487 | } |
374 | 488 | ||
375 | if (alloc_bh) | 489 | brelse(alloc_bh); |
376 | brelse(alloc_bh); | ||
377 | 490 | ||
378 | if (inode) { | 491 | if (inode) { |
379 | mutex_unlock(&inode->i_mutex); | 492 | mutex_unlock(&inode->i_mutex); |
@@ -441,8 +554,7 @@ out_unlock: | |||
441 | out_mutex: | 554 | out_mutex: |
442 | mutex_unlock(&main_bm_inode->i_mutex); | 555 | mutex_unlock(&main_bm_inode->i_mutex); |
443 | 556 | ||
444 | if (main_bm_bh) | 557 | brelse(main_bm_bh); |
445 | brelse(main_bm_bh); | ||
446 | 558 | ||
447 | iput(main_bm_inode); | 559 | iput(main_bm_inode); |
448 | 560 | ||
@@ -453,8 +565,48 @@ out: | |||
453 | return status; | 565 | return status; |
454 | } | 566 | } |
455 | 567 | ||
568 | /* Check to see if the local alloc window is within ac->ac_max_block */ | ||
569 | static int ocfs2_local_alloc_in_range(struct inode *inode, | ||
570 | struct ocfs2_alloc_context *ac, | ||
571 | u32 bits_wanted) | ||
572 | { | ||
573 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
574 | struct ocfs2_dinode *alloc; | ||
575 | struct ocfs2_local_alloc *la; | ||
576 | int start; | ||
577 | u64 block_off; | ||
578 | |||
579 | if (!ac->ac_max_block) | ||
580 | return 1; | ||
581 | |||
582 | alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; | ||
583 | la = OCFS2_LOCAL_ALLOC(alloc); | ||
584 | |||
585 | start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); | ||
586 | if (start == -1) { | ||
587 | mlog_errno(-ENOSPC); | ||
588 | return 0; | ||
589 | } | ||
590 | |||
591 | /* | ||
592 | * Converting (bm_off + start + bits_wanted) to blocks gives us | ||
593 | * the blkno just past our actual allocation. This is perfect | ||
594 | * to compare with ac_max_block. | ||
595 | */ | ||
596 | block_off = ocfs2_clusters_to_blocks(inode->i_sb, | ||
597 | le32_to_cpu(la->la_bm_off) + | ||
598 | start + bits_wanted); | ||
599 | mlog(0, "Checking %llu against %llu\n", | ||
600 | (unsigned long long)block_off, | ||
601 | (unsigned long long)ac->ac_max_block); | ||
602 | if (block_off > ac->ac_max_block) | ||
603 | return 0; | ||
604 | |||
605 | return 1; | ||
606 | } | ||
607 | |||
456 | /* | 608 | /* |
457 | * make sure we've got at least bitswanted contiguous bits in the | 609 | * make sure we've got at least bits_wanted contiguous bits in the |
458 | * local alloc. You lose them when you drop i_mutex. | 610 | * local alloc. You lose them when you drop i_mutex. |
459 | * | 611 | * |
460 | * We will add ourselves to the transaction passed in, but may start | 612 | * We will add ourselves to the transaction passed in, but may start |
@@ -485,16 +637,18 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, | |||
485 | 637 | ||
486 | mutex_lock(&local_alloc_inode->i_mutex); | 638 | mutex_lock(&local_alloc_inode->i_mutex); |
487 | 639 | ||
488 | if (osb->local_alloc_state != OCFS2_LA_ENABLED) { | 640 | /* |
489 | status = -ENOSPC; | 641 | * We must double check state and allocator bits because |
490 | goto bail; | 642 | * another process may have changed them while holding i_mutex. |
491 | } | 643 | */ |
492 | 644 | spin_lock(&osb->osb_lock); | |
493 | if (bits_wanted > ocfs2_local_alloc_window_bits(osb)) { | 645 | if (!ocfs2_la_state_enabled(osb) || |
494 | mlog(0, "Asking for more than my max window size!\n"); | 646 | (bits_wanted > osb->local_alloc_bits)) { |
647 | spin_unlock(&osb->osb_lock); | ||
495 | status = -ENOSPC; | 648 | status = -ENOSPC; |
496 | goto bail; | 649 | goto bail; |
497 | } | 650 | } |
651 | spin_unlock(&osb->osb_lock); | ||
498 | 652 | ||
499 | alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; | 653 | alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; |
500 | 654 | ||
@@ -522,6 +676,36 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, | |||
522 | mlog_errno(status); | 676 | mlog_errno(status); |
523 | goto bail; | 677 | goto bail; |
524 | } | 678 | } |
679 | |||
680 | /* | ||
681 | * Under certain conditions, the window slide code | ||
682 | * might have reduced the number of bits available or | ||
683 | * disabled the the local alloc entirely. Re-check | ||
684 | * here and return -ENOSPC if necessary. | ||
685 | */ | ||
686 | status = -ENOSPC; | ||
687 | if (!ocfs2_la_state_enabled(osb)) | ||
688 | goto bail; | ||
689 | |||
690 | free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - | ||
691 | le32_to_cpu(alloc->id1.bitmap1.i_used); | ||
692 | if (bits_wanted > free_bits) | ||
693 | goto bail; | ||
694 | } | ||
695 | |||
696 | if (ac->ac_max_block) | ||
697 | mlog(0, "Calling in_range for max block %llu\n", | ||
698 | (unsigned long long)ac->ac_max_block); | ||
699 | |||
700 | if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac, | ||
701 | bits_wanted)) { | ||
702 | /* | ||
703 | * The window is outside ac->ac_max_block. | ||
704 | * This errno tells the caller to keep localalloc enabled | ||
705 | * but to get the allocation from the main bitmap. | ||
706 | */ | ||
707 | status = -EFBIG; | ||
708 | goto bail; | ||
525 | } | 709 | } |
526 | 710 | ||
527 | ac->ac_inode = local_alloc_inode; | 711 | ac->ac_inode = local_alloc_inode; |
@@ -789,6 +973,85 @@ bail: | |||
789 | return status; | 973 | return status; |
790 | } | 974 | } |
791 | 975 | ||
976 | enum ocfs2_la_event { | ||
977 | OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */ | ||
978 | OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has | ||
979 | * enough bits theoretically | ||
980 | * free, but a contiguous | ||
981 | * allocation could not be | ||
982 | * found. */ | ||
983 | OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have | ||
984 | * enough bits free to satisfy | ||
985 | * our request. */ | ||
986 | }; | ||
987 | #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ) | ||
988 | /* | ||
989 | * Given an event, calculate the size of our next local alloc window. | ||
990 | * | ||
991 | * This should always be called under i_mutex of the local alloc inode | ||
992 | * so that local alloc disabling doesn't race with processes trying to | ||
993 | * use the allocator. | ||
994 | * | ||
995 | * Returns the state which the local alloc was left in. This value can | ||
996 | * be ignored by some paths. | ||
997 | */ | ||
998 | static int ocfs2_recalc_la_window(struct ocfs2_super *osb, | ||
999 | enum ocfs2_la_event event) | ||
1000 | { | ||
1001 | unsigned int bits; | ||
1002 | int state; | ||
1003 | |||
1004 | spin_lock(&osb->osb_lock); | ||
1005 | if (osb->local_alloc_state == OCFS2_LA_DISABLED) { | ||
1006 | WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED); | ||
1007 | goto out_unlock; | ||
1008 | } | ||
1009 | |||
1010 | /* | ||
1011 | * ENOSPC and fragmentation are treated similarly for now. | ||
1012 | */ | ||
1013 | if (event == OCFS2_LA_EVENT_ENOSPC || | ||
1014 | event == OCFS2_LA_EVENT_FRAGMENTED) { | ||
1015 | /* | ||
1016 | * We ran out of contiguous space in the primary | ||
1017 | * bitmap. Drastically reduce the number of bits used | ||
1018 | * by local alloc until we have to disable it. | ||
1019 | */ | ||
1020 | bits = osb->local_alloc_bits >> 1; | ||
1021 | if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) { | ||
1022 | /* | ||
1023 | * By setting state to THROTTLED, we'll keep | ||
1024 | * the number of local alloc bits used down | ||
1025 | * until an event occurs which would give us | ||
1026 | * reason to assume the bitmap situation might | ||
1027 | * have changed. | ||
1028 | */ | ||
1029 | osb->local_alloc_state = OCFS2_LA_THROTTLED; | ||
1030 | osb->local_alloc_bits = bits; | ||
1031 | } else { | ||
1032 | osb->local_alloc_state = OCFS2_LA_DISABLED; | ||
1033 | } | ||
1034 | queue_delayed_work(ocfs2_wq, &osb->la_enable_wq, | ||
1035 | OCFS2_LA_ENABLE_INTERVAL); | ||
1036 | goto out_unlock; | ||
1037 | } | ||
1038 | |||
1039 | /* | ||
1040 | * Don't increase the size of the local alloc window until we | ||
1041 | * know we might be able to fulfill the request. Otherwise, we | ||
1042 | * risk bouncing around the global bitmap during periods of | ||
1043 | * low space. | ||
1044 | */ | ||
1045 | if (osb->local_alloc_state != OCFS2_LA_THROTTLED) | ||
1046 | osb->local_alloc_bits = osb->local_alloc_default_bits; | ||
1047 | |||
1048 | out_unlock: | ||
1049 | state = osb->local_alloc_state; | ||
1050 | spin_unlock(&osb->osb_lock); | ||
1051 | |||
1052 | return state; | ||
1053 | } | ||
1054 | |||
792 | static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, | 1055 | static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, |
793 | struct ocfs2_alloc_context **ac, | 1056 | struct ocfs2_alloc_context **ac, |
794 | struct inode **bitmap_inode, | 1057 | struct inode **bitmap_inode, |
@@ -803,12 +1066,21 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, | |||
803 | goto bail; | 1066 | goto bail; |
804 | } | 1067 | } |
805 | 1068 | ||
806 | (*ac)->ac_bits_wanted = ocfs2_local_alloc_window_bits(osb); | 1069 | retry_enospc: |
1070 | (*ac)->ac_bits_wanted = osb->local_alloc_bits; | ||
807 | 1071 | ||
808 | status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); | 1072 | status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); |
1073 | if (status == -ENOSPC) { | ||
1074 | if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == | ||
1075 | OCFS2_LA_DISABLED) | ||
1076 | goto bail; | ||
1077 | |||
1078 | ocfs2_free_ac_resource(*ac); | ||
1079 | memset(*ac, 0, sizeof(struct ocfs2_alloc_context)); | ||
1080 | goto retry_enospc; | ||
1081 | } | ||
809 | if (status < 0) { | 1082 | if (status < 0) { |
810 | if (status != -ENOSPC) | 1083 | mlog_errno(status); |
811 | mlog_errno(status); | ||
812 | goto bail; | 1084 | goto bail; |
813 | } | 1085 | } |
814 | 1086 | ||
@@ -849,7 +1121,7 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
849 | "one\n"); | 1121 | "one\n"); |
850 | 1122 | ||
851 | mlog(0, "Allocating %u clusters for a new window.\n", | 1123 | mlog(0, "Allocating %u clusters for a new window.\n", |
852 | ocfs2_local_alloc_window_bits(osb)); | 1124 | osb->local_alloc_bits); |
853 | 1125 | ||
854 | /* Instruct the allocation code to try the most recently used | 1126 | /* Instruct the allocation code to try the most recently used |
855 | * cluster group. We'll re-record the group used this pass | 1127 | * cluster group. We'll re-record the group used this pass |
@@ -859,9 +1131,36 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
859 | /* we used the generic suballoc reserve function, but we set | 1131 | /* we used the generic suballoc reserve function, but we set |
860 | * everything up nicely, so there's no reason why we can't use | 1132 | * everything up nicely, so there's no reason why we can't use |
861 | * the more specific cluster api to claim bits. */ | 1133 | * the more specific cluster api to claim bits. */ |
862 | status = ocfs2_claim_clusters(osb, handle, ac, | 1134 | status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits, |
863 | ocfs2_local_alloc_window_bits(osb), | ||
864 | &cluster_off, &cluster_count); | 1135 | &cluster_off, &cluster_count); |
1136 | if (status == -ENOSPC) { | ||
1137 | retry_enospc: | ||
1138 | /* | ||
1139 | * Note: We could also try syncing the journal here to | ||
1140 | * allow use of any free bits which the current | ||
1141 | * transaction can't give us access to. --Mark | ||
1142 | */ | ||
1143 | if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) == | ||
1144 | OCFS2_LA_DISABLED) | ||
1145 | goto bail; | ||
1146 | |||
1147 | status = ocfs2_claim_clusters(osb, handle, ac, | ||
1148 | osb->local_alloc_bits, | ||
1149 | &cluster_off, | ||
1150 | &cluster_count); | ||
1151 | if (status == -ENOSPC) | ||
1152 | goto retry_enospc; | ||
1153 | /* | ||
1154 | * We only shrunk the *minimum* number of in our | ||
1155 | * request - it's entirely possible that the allocator | ||
1156 | * might give us more than we asked for. | ||
1157 | */ | ||
1158 | if (status == 0) { | ||
1159 | spin_lock(&osb->osb_lock); | ||
1160 | osb->local_alloc_bits = cluster_count; | ||
1161 | spin_unlock(&osb->osb_lock); | ||
1162 | } | ||
1163 | } | ||
865 | if (status < 0) { | 1164 | if (status < 0) { |
866 | if (status != -ENOSPC) | 1165 | if (status != -ENOSPC) |
867 | mlog_errno(status); | 1166 | mlog_errno(status); |
@@ -905,6 +1204,8 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, | |||
905 | 1204 | ||
906 | mlog_entry_void(); | 1205 | mlog_entry_void(); |
907 | 1206 | ||
1207 | ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE); | ||
1208 | |||
908 | /* This will lock the main bitmap for us. */ | 1209 | /* This will lock the main bitmap for us. */ |
909 | status = ocfs2_local_alloc_reserve_for_window(osb, | 1210 | status = ocfs2_local_alloc_reserve_for_window(osb, |
910 | &ac, | 1211 | &ac, |
@@ -976,8 +1277,7 @@ bail: | |||
976 | if (handle) | 1277 | if (handle) |
977 | ocfs2_commit_trans(osb, handle); | 1278 | ocfs2_commit_trans(osb, handle); |
978 | 1279 | ||
979 | if (main_bm_bh) | 1280 | brelse(main_bm_bh); |
980 | brelse(main_bm_bh); | ||
981 | 1281 | ||
982 | if (main_bm_inode) | 1282 | if (main_bm_inode) |
983 | iput(main_bm_inode); | 1283 | iput(main_bm_inode); |
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h index 3f76631e110c..ac5ea9f86653 100644 --- a/fs/ocfs2/localalloc.h +++ b/fs/ocfs2/localalloc.h | |||
@@ -52,4 +52,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, | |||
52 | u32 *bit_off, | 52 | u32 *bit_off, |
53 | u32 *num_bits); | 53 | u32 *num_bits); |
54 | 54 | ||
55 | void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, | ||
56 | unsigned int num_clusters); | ||
57 | void ocfs2_la_enable_worker(struct work_struct *work); | ||
58 | |||
55 | #endif /* OCFS2_LOCALALLOC_H */ | 59 | #endif /* OCFS2_LOCALALLOC_H */ |
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c index 203f87143877..544ac6245175 100644 --- a/fs/ocfs2/locks.c +++ b/fs/ocfs2/locks.c | |||
@@ -24,6 +24,7 @@ | |||
24 | */ | 24 | */ |
25 | 25 | ||
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/fcntl.h> | ||
27 | 28 | ||
28 | #define MLOG_MASK_PREFIX ML_INODE | 29 | #define MLOG_MASK_PREFIX ML_INODE |
29 | #include <cluster/masklog.h> | 30 | #include <cluster/masklog.h> |
@@ -32,6 +33,7 @@ | |||
32 | 33 | ||
33 | #include "dlmglue.h" | 34 | #include "dlmglue.h" |
34 | #include "file.h" | 35 | #include "file.h" |
36 | #include "inode.h" | ||
35 | #include "locks.h" | 37 | #include "locks.h" |
36 | 38 | ||
37 | static int ocfs2_do_flock(struct file *file, struct inode *inode, | 39 | static int ocfs2_do_flock(struct file *file, struct inode *inode, |
@@ -123,3 +125,16 @@ int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl) | |||
123 | else | 125 | else |
124 | return ocfs2_do_flock(file, inode, cmd, fl); | 126 | return ocfs2_do_flock(file, inode, cmd, fl); |
125 | } | 127 | } |
128 | |||
129 | int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl) | ||
130 | { | ||
131 | struct inode *inode = file->f_mapping->host; | ||
132 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
133 | |||
134 | if (!(fl->fl_flags & FL_POSIX)) | ||
135 | return -ENOLCK; | ||
136 | if (__mandatory_lock(inode)) | ||
137 | return -ENOLCK; | ||
138 | |||
139 | return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl); | ||
140 | } | ||
diff --git a/fs/ocfs2/locks.h b/fs/ocfs2/locks.h index 9743ef2324ec..496d488b271f 100644 --- a/fs/ocfs2/locks.h +++ b/fs/ocfs2/locks.h | |||
@@ -27,5 +27,6 @@ | |||
27 | #define OCFS2_LOCKS_H | 27 | #define OCFS2_LOCKS_H |
28 | 28 | ||
29 | int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl); | 29 | int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl); |
30 | int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl); | ||
30 | 31 | ||
31 | #endif /* OCFS2_LOCKS_H */ | 32 | #endif /* OCFS2_LOCKS_H */ |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index d5d808fe0140..485a6aa0ad39 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -60,6 +60,7 @@ | |||
60 | #include "symlink.h" | 60 | #include "symlink.h" |
61 | #include "sysfile.h" | 61 | #include "sysfile.h" |
62 | #include "uptodate.h" | 62 | #include "uptodate.h" |
63 | #include "xattr.h" | ||
63 | 64 | ||
64 | #include "buffer_head_io.h" | 65 | #include "buffer_head_io.h" |
65 | 66 | ||
@@ -327,14 +328,9 @@ leave: | |||
327 | if (status == -ENOSPC) | 328 | if (status == -ENOSPC) |
328 | mlog(0, "Disk is full\n"); | 329 | mlog(0, "Disk is full\n"); |
329 | 330 | ||
330 | if (new_fe_bh) | 331 | brelse(new_fe_bh); |
331 | brelse(new_fe_bh); | 332 | brelse(de_bh); |
332 | 333 | brelse(parent_fe_bh); | |
333 | if (de_bh) | ||
334 | brelse(de_bh); | ||
335 | |||
336 | if (parent_fe_bh) | ||
337 | brelse(parent_fe_bh); | ||
338 | 334 | ||
339 | if ((status < 0) && inode) | 335 | if ((status < 0) && inode) |
340 | iput(inode); | 336 | iput(inode); |
@@ -647,12 +643,9 @@ out_unlock_inode: | |||
647 | out: | 643 | out: |
648 | ocfs2_inode_unlock(dir, 1); | 644 | ocfs2_inode_unlock(dir, 1); |
649 | 645 | ||
650 | if (de_bh) | 646 | brelse(de_bh); |
651 | brelse(de_bh); | 647 | brelse(fe_bh); |
652 | if (fe_bh) | 648 | brelse(parent_fe_bh); |
653 | brelse(fe_bh); | ||
654 | if (parent_fe_bh) | ||
655 | brelse(parent_fe_bh); | ||
656 | 649 | ||
657 | mlog_exit(err); | 650 | mlog_exit(err); |
658 | 651 | ||
@@ -851,17 +844,10 @@ leave: | |||
851 | iput(orphan_dir); | 844 | iput(orphan_dir); |
852 | } | 845 | } |
853 | 846 | ||
854 | if (fe_bh) | 847 | brelse(fe_bh); |
855 | brelse(fe_bh); | 848 | brelse(dirent_bh); |
856 | 849 | brelse(parent_node_bh); | |
857 | if (dirent_bh) | 850 | brelse(orphan_entry_bh); |
858 | brelse(dirent_bh); | ||
859 | |||
860 | if (parent_node_bh) | ||
861 | brelse(parent_node_bh); | ||
862 | |||
863 | if (orphan_entry_bh) | ||
864 | brelse(orphan_entry_bh); | ||
865 | 851 | ||
866 | mlog_exit(status); | 852 | mlog_exit(status); |
867 | 853 | ||
@@ -1372,24 +1358,15 @@ bail: | |||
1372 | 1358 | ||
1373 | if (new_inode) | 1359 | if (new_inode) |
1374 | iput(new_inode); | 1360 | iput(new_inode); |
1375 | if (newfe_bh) | 1361 | brelse(newfe_bh); |
1376 | brelse(newfe_bh); | 1362 | brelse(old_inode_bh); |
1377 | if (old_inode_bh) | 1363 | brelse(old_dir_bh); |
1378 | brelse(old_inode_bh); | 1364 | brelse(new_dir_bh); |
1379 | if (old_dir_bh) | 1365 | brelse(new_de_bh); |
1380 | brelse(old_dir_bh); | 1366 | brelse(old_de_bh); |
1381 | if (new_dir_bh) | 1367 | brelse(old_inode_de_bh); |
1382 | brelse(new_dir_bh); | 1368 | brelse(orphan_entry_bh); |
1383 | if (new_de_bh) | 1369 | brelse(insert_entry_bh); |
1384 | brelse(new_de_bh); | ||
1385 | if (old_de_bh) | ||
1386 | brelse(old_de_bh); | ||
1387 | if (old_inode_de_bh) | ||
1388 | brelse(old_inode_de_bh); | ||
1389 | if (orphan_entry_bh) | ||
1390 | brelse(orphan_entry_bh); | ||
1391 | if (insert_entry_bh) | ||
1392 | brelse(insert_entry_bh); | ||
1393 | 1370 | ||
1394 | mlog_exit(status); | 1371 | mlog_exit(status); |
1395 | 1372 | ||
@@ -1492,8 +1469,7 @@ bail: | |||
1492 | 1469 | ||
1493 | if (bhs) { | 1470 | if (bhs) { |
1494 | for(i = 0; i < blocks; i++) | 1471 | for(i = 0; i < blocks; i++) |
1495 | if (bhs[i]) | 1472 | brelse(bhs[i]); |
1496 | brelse(bhs[i]); | ||
1497 | kfree(bhs); | 1473 | kfree(bhs); |
1498 | } | 1474 | } |
1499 | 1475 | ||
@@ -1598,10 +1574,10 @@ static int ocfs2_symlink(struct inode *dir, | |||
1598 | u32 offset = 0; | 1574 | u32 offset = 0; |
1599 | 1575 | ||
1600 | inode->i_op = &ocfs2_symlink_inode_operations; | 1576 | inode->i_op = &ocfs2_symlink_inode_operations; |
1601 | status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 0, | 1577 | status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0, |
1602 | new_fe_bh, | 1578 | new_fe_bh, |
1603 | handle, data_ac, NULL, | 1579 | handle, data_ac, NULL, |
1604 | NULL); | 1580 | NULL); |
1605 | if (status < 0) { | 1581 | if (status < 0) { |
1606 | if (status != -ENOSPC && status != -EINTR) { | 1582 | if (status != -ENOSPC && status != -EINTR) { |
1607 | mlog(ML_ERROR, | 1583 | mlog(ML_ERROR, |
@@ -1659,12 +1635,9 @@ bail: | |||
1659 | 1635 | ||
1660 | ocfs2_inode_unlock(dir, 1); | 1636 | ocfs2_inode_unlock(dir, 1); |
1661 | 1637 | ||
1662 | if (new_fe_bh) | 1638 | brelse(new_fe_bh); |
1663 | brelse(new_fe_bh); | 1639 | brelse(parent_fe_bh); |
1664 | if (parent_fe_bh) | 1640 | brelse(de_bh); |
1665 | brelse(parent_fe_bh); | ||
1666 | if (de_bh) | ||
1667 | brelse(de_bh); | ||
1668 | if (inode_ac) | 1641 | if (inode_ac) |
1669 | ocfs2_free_alloc_context(inode_ac); | 1642 | ocfs2_free_alloc_context(inode_ac); |
1670 | if (data_ac) | 1643 | if (data_ac) |
@@ -1759,8 +1732,7 @@ leave: | |||
1759 | iput(orphan_dir_inode); | 1732 | iput(orphan_dir_inode); |
1760 | } | 1733 | } |
1761 | 1734 | ||
1762 | if (orphan_dir_bh) | 1735 | brelse(orphan_dir_bh); |
1763 | brelse(orphan_dir_bh); | ||
1764 | 1736 | ||
1765 | mlog_exit(status); | 1737 | mlog_exit(status); |
1766 | return status; | 1738 | return status; |
@@ -1780,10 +1752,9 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
1780 | 1752 | ||
1781 | mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); | 1753 | mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); |
1782 | 1754 | ||
1783 | status = ocfs2_read_block(osb, | 1755 | status = ocfs2_read_block(orphan_dir_inode, |
1784 | OCFS2_I(orphan_dir_inode)->ip_blkno, | 1756 | OCFS2_I(orphan_dir_inode)->ip_blkno, |
1785 | &orphan_dir_bh, OCFS2_BH_CACHED, | 1757 | &orphan_dir_bh); |
1786 | orphan_dir_inode); | ||
1787 | if (status < 0) { | 1758 | if (status < 0) { |
1788 | mlog_errno(status); | 1759 | mlog_errno(status); |
1789 | goto leave; | 1760 | goto leave; |
@@ -1829,8 +1800,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
1829 | (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); | 1800 | (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); |
1830 | 1801 | ||
1831 | leave: | 1802 | leave: |
1832 | if (orphan_dir_bh) | 1803 | brelse(orphan_dir_bh); |
1833 | brelse(orphan_dir_bh); | ||
1834 | 1804 | ||
1835 | mlog_exit(status); | 1805 | mlog_exit(status); |
1836 | return status; | 1806 | return status; |
@@ -1898,8 +1868,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, | |||
1898 | } | 1868 | } |
1899 | 1869 | ||
1900 | leave: | 1870 | leave: |
1901 | if (target_de_bh) | 1871 | brelse(target_de_bh); |
1902 | brelse(target_de_bh); | ||
1903 | 1872 | ||
1904 | mlog_exit(status); | 1873 | mlog_exit(status); |
1905 | return status; | 1874 | return status; |
@@ -1918,4 +1887,8 @@ const struct inode_operations ocfs2_dir_iops = { | |||
1918 | .setattr = ocfs2_setattr, | 1887 | .setattr = ocfs2_setattr, |
1919 | .getattr = ocfs2_getattr, | 1888 | .getattr = ocfs2_getattr, |
1920 | .permission = ocfs2_permission, | 1889 | .permission = ocfs2_permission, |
1890 | .setxattr = generic_setxattr, | ||
1891 | .getxattr = generic_getxattr, | ||
1892 | .listxattr = ocfs2_listxattr, | ||
1893 | .removexattr = generic_removexattr, | ||
1921 | }; | 1894 | }; |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 7f625f2b1117..a21a465490c4 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -34,7 +34,12 @@ | |||
34 | #include <linux/workqueue.h> | 34 | #include <linux/workqueue.h> |
35 | #include <linux/kref.h> | 35 | #include <linux/kref.h> |
36 | #include <linux/mutex.h> | 36 | #include <linux/mutex.h> |
37 | #include <linux/jbd.h> | 37 | #ifndef CONFIG_OCFS2_COMPAT_JBD |
38 | # include <linux/jbd2.h> | ||
39 | #else | ||
40 | # include <linux/jbd.h> | ||
41 | # include "ocfs2_jbd_compat.h" | ||
42 | #endif | ||
38 | 43 | ||
39 | /* For union ocfs2_dlm_lksb */ | 44 | /* For union ocfs2_dlm_lksb */ |
40 | #include "stackglue.h" | 45 | #include "stackglue.h" |
@@ -171,9 +176,13 @@ struct ocfs2_alloc_stats | |||
171 | 176 | ||
172 | enum ocfs2_local_alloc_state | 177 | enum ocfs2_local_alloc_state |
173 | { | 178 | { |
174 | OCFS2_LA_UNUSED = 0, | 179 | OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for |
175 | OCFS2_LA_ENABLED, | 180 | * this mountpoint. */ |
176 | OCFS2_LA_DISABLED | 181 | OCFS2_LA_ENABLED, /* Local alloc is in use. */ |
182 | OCFS2_LA_THROTTLED, /* Local alloc is in use, but number | ||
183 | * of bits has been reduced. */ | ||
184 | OCFS2_LA_DISABLED /* Local alloc has temporarily been | ||
185 | * disabled. */ | ||
177 | }; | 186 | }; |
178 | 187 | ||
179 | enum ocfs2_mount_options | 188 | enum ocfs2_mount_options |
@@ -184,6 +193,8 @@ enum ocfs2_mount_options | |||
184 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ | 193 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ |
185 | OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ | 194 | OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ |
186 | OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ | 195 | OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ |
196 | OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ | ||
197 | OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ | ||
187 | }; | 198 | }; |
188 | 199 | ||
189 | #define OCFS2_OSB_SOFT_RO 0x0001 | 200 | #define OCFS2_OSB_SOFT_RO 0x0001 |
@@ -214,6 +225,7 @@ struct ocfs2_super | |||
214 | u32 bitmap_cpg; | 225 | u32 bitmap_cpg; |
215 | u8 *uuid; | 226 | u8 *uuid; |
216 | char *uuid_str; | 227 | char *uuid_str; |
228 | u32 uuid_hash; | ||
217 | u8 *vol_label; | 229 | u8 *vol_label; |
218 | u64 first_cluster_group_blkno; | 230 | u64 first_cluster_group_blkno; |
219 | u32 fs_generation; | 231 | u32 fs_generation; |
@@ -241,6 +253,7 @@ struct ocfs2_super | |||
241 | int s_sectsize_bits; | 253 | int s_sectsize_bits; |
242 | int s_clustersize; | 254 | int s_clustersize; |
243 | int s_clustersize_bits; | 255 | int s_clustersize_bits; |
256 | unsigned int s_xattr_inline_size; | ||
244 | 257 | ||
245 | atomic_t vol_state; | 258 | atomic_t vol_state; |
246 | struct mutex recovery_lock; | 259 | struct mutex recovery_lock; |
@@ -252,11 +265,27 @@ struct ocfs2_super | |||
252 | struct ocfs2_journal *journal; | 265 | struct ocfs2_journal *journal; |
253 | unsigned long osb_commit_interval; | 266 | unsigned long osb_commit_interval; |
254 | 267 | ||
255 | int local_alloc_size; | 268 | struct delayed_work la_enable_wq; |
256 | enum ocfs2_local_alloc_state local_alloc_state; | 269 | |
270 | /* | ||
271 | * Must hold local alloc i_mutex and osb->osb_lock to change | ||
272 | * local_alloc_bits. Reads can be done under either lock. | ||
273 | */ | ||
274 | unsigned int local_alloc_bits; | ||
275 | unsigned int local_alloc_default_bits; | ||
276 | |||
277 | enum ocfs2_local_alloc_state local_alloc_state; /* protected | ||
278 | * by osb_lock */ | ||
279 | |||
257 | struct buffer_head *local_alloc_bh; | 280 | struct buffer_head *local_alloc_bh; |
281 | |||
258 | u64 la_last_gd; | 282 | u64 la_last_gd; |
259 | 283 | ||
284 | #ifdef CONFIG_OCFS2_FS_STATS | ||
285 | struct dentry *local_alloc_debug; | ||
286 | char *local_alloc_debug_buf; | ||
287 | #endif | ||
288 | |||
260 | /* Next two fields are for local node slot recovery during | 289 | /* Next two fields are for local node slot recovery during |
261 | * mount. */ | 290 | * mount. */ |
262 | int dirty; | 291 | int dirty; |
@@ -340,6 +369,13 @@ static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb) | |||
340 | return 0; | 369 | return 0; |
341 | } | 370 | } |
342 | 371 | ||
372 | static inline int ocfs2_supports_xattr(struct ocfs2_super *osb) | ||
373 | { | ||
374 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR) | ||
375 | return 1; | ||
376 | return 0; | ||
377 | } | ||
378 | |||
343 | /* set / clear functions because cluster events can make these happen | 379 | /* set / clear functions because cluster events can make these happen |
344 | * in parallel so we want the transitions to be atomic. this also | 380 | * in parallel so we want the transitions to be atomic. this also |
345 | * means that any future flags osb_flags must be protected by spinlock | 381 | * means that any future flags osb_flags must be protected by spinlock |
@@ -554,6 +590,14 @@ static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb) | |||
554 | return pages_per_cluster; | 590 | return pages_per_cluster; |
555 | } | 591 | } |
556 | 592 | ||
593 | static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb, | ||
594 | unsigned int megs) | ||
595 | { | ||
596 | BUILD_BUG_ON(OCFS2_MAX_CLUSTERSIZE > 1048576); | ||
597 | |||
598 | return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); | ||
599 | } | ||
600 | |||
557 | static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) | 601 | static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) |
558 | { | 602 | { |
559 | spin_lock(&osb->osb_lock); | 603 | spin_lock(&osb->osb_lock); |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 4f619850ccf7..f24ce3d3f956 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -64,6 +64,7 @@ | |||
64 | #define OCFS2_INODE_SIGNATURE "INODE01" | 64 | #define OCFS2_INODE_SIGNATURE "INODE01" |
65 | #define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01" | 65 | #define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01" |
66 | #define OCFS2_GROUP_DESC_SIGNATURE "GROUP01" | 66 | #define OCFS2_GROUP_DESC_SIGNATURE "GROUP01" |
67 | #define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01" | ||
67 | 68 | ||
68 | /* Compatibility flags */ | 69 | /* Compatibility flags */ |
69 | #define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ | 70 | #define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ |
@@ -90,7 +91,8 @@ | |||
90 | | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ | 91 | | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ |
91 | | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ | 92 | | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ |
92 | | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ | 93 | | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ |
93 | | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK) | 94 | | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \ |
95 | | OCFS2_FEATURE_INCOMPAT_XATTR) | ||
94 | #define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN | 96 | #define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN |
95 | 97 | ||
96 | /* | 98 | /* |
@@ -127,10 +129,6 @@ | |||
127 | /* Support for data packed into inode blocks */ | 129 | /* Support for data packed into inode blocks */ |
128 | #define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040 | 130 | #define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040 |
129 | 131 | ||
130 | /* Support for the extended slot map */ | ||
131 | #define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100 | ||
132 | |||
133 | |||
134 | /* | 132 | /* |
135 | * Support for alternate, userspace cluster stacks. If set, the superblock | 133 | * Support for alternate, userspace cluster stacks. If set, the superblock |
136 | * field s_cluster_info contains a tag for the alternate stack in use as | 134 | * field s_cluster_info contains a tag for the alternate stack in use as |
@@ -142,6 +140,12 @@ | |||
142 | */ | 140 | */ |
143 | #define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080 | 141 | #define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080 |
144 | 142 | ||
143 | /* Support for the extended slot map */ | ||
144 | #define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100 | ||
145 | |||
146 | /* Support for extended attributes */ | ||
147 | #define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200 | ||
148 | |||
145 | /* | 149 | /* |
146 | * backup superblock flag is used to indicate that this volume | 150 | * backup superblock flag is used to indicate that this volume |
147 | * has backup superblocks. | 151 | * has backup superblocks. |
@@ -299,6 +303,12 @@ struct ocfs2_new_group_input { | |||
299 | */ | 303 | */ |
300 | #define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8 | 304 | #define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8 |
301 | 305 | ||
306 | /* | ||
307 | * Inline extended attribute size (in bytes) | ||
308 | * The value chosen should be aligned to 16 byte boundaries. | ||
309 | */ | ||
310 | #define OCFS2_MIN_XATTR_INLINE_SIZE 256 | ||
311 | |||
302 | struct ocfs2_system_inode_info { | 312 | struct ocfs2_system_inode_info { |
303 | char *si_name; | 313 | char *si_name; |
304 | int si_iflags; | 314 | int si_iflags; |
@@ -563,7 +573,7 @@ struct ocfs2_super_block { | |||
563 | /*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts | 573 | /*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts |
564 | before tunefs required */ | 574 | before tunefs required */ |
565 | __le16 s_tunefs_flag; | 575 | __le16 s_tunefs_flag; |
566 | __le32 s_reserved1; | 576 | __le32 s_uuid_hash; /* hash value of uuid */ |
567 | __le64 s_first_cluster_group; /* Block offset of 1st cluster | 577 | __le64 s_first_cluster_group; /* Block offset of 1st cluster |
568 | * group header */ | 578 | * group header */ |
569 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ | 579 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ |
@@ -571,7 +581,11 @@ struct ocfs2_super_block { | |||
571 | /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace | 581 | /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace |
572 | stack. Only valid | 582 | stack. Only valid |
573 | with INCOMPAT flag. */ | 583 | with INCOMPAT flag. */ |
574 | /*B8*/ __le64 s_reserved2[17]; /* Fill out superblock */ | 584 | /*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size |
585 | for this fs*/ | ||
586 | __le16 s_reserved0; | ||
587 | __le32 s_reserved1; | ||
588 | /*C0*/ __le64 s_reserved2[16]; /* Fill out superblock */ | ||
575 | /*140*/ | 589 | /*140*/ |
576 | 590 | ||
577 | /* | 591 | /* |
@@ -621,7 +635,8 @@ struct ocfs2_dinode { | |||
621 | belongs to */ | 635 | belongs to */ |
622 | __le16 i_suballoc_bit; /* Bit offset in suballocator | 636 | __le16 i_suballoc_bit; /* Bit offset in suballocator |
623 | block group */ | 637 | block group */ |
624 | /*10*/ __le32 i_reserved0; | 638 | /*10*/ __le16 i_reserved0; |
639 | __le16 i_xattr_inline_size; | ||
625 | __le32 i_clusters; /* Cluster count */ | 640 | __le32 i_clusters; /* Cluster count */ |
626 | __le32 i_uid; /* Owner UID */ | 641 | __le32 i_uid; /* Owner UID */ |
627 | __le32 i_gid; /* Owning GID */ | 642 | __le32 i_gid; /* Owning GID */ |
@@ -640,11 +655,12 @@ struct ocfs2_dinode { | |||
640 | __le32 i_atime_nsec; | 655 | __le32 i_atime_nsec; |
641 | __le32 i_ctime_nsec; | 656 | __le32 i_ctime_nsec; |
642 | __le32 i_mtime_nsec; | 657 | __le32 i_mtime_nsec; |
643 | __le32 i_attr; | 658 | /*70*/ __le32 i_attr; |
644 | __le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL | 659 | __le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL |
645 | was set in i_flags */ | 660 | was set in i_flags */ |
646 | __le16 i_dyn_features; | 661 | __le16 i_dyn_features; |
647 | /*70*/ __le64 i_reserved2[8]; | 662 | __le64 i_xattr_loc; |
663 | /*80*/ __le64 i_reserved2[7]; | ||
648 | /*B8*/ union { | 664 | /*B8*/ union { |
649 | __le64 i_pad1; /* Generic way to refer to this | 665 | __le64 i_pad1; /* Generic way to refer to this |
650 | 64bit union */ | 666 | 64bit union */ |
@@ -715,6 +731,136 @@ struct ocfs2_group_desc | |||
715 | /*40*/ __u8 bg_bitmap[0]; | 731 | /*40*/ __u8 bg_bitmap[0]; |
716 | }; | 732 | }; |
717 | 733 | ||
734 | /* | ||
735 | * On disk extended attribute structure for OCFS2. | ||
736 | */ | ||
737 | |||
738 | /* | ||
739 | * ocfs2_xattr_entry indicates one extend attribute. | ||
740 | * | ||
741 | * Note that it can be stored in inode, one block or one xattr bucket. | ||
742 | */ | ||
743 | struct ocfs2_xattr_entry { | ||
744 | __le32 xe_name_hash; /* hash value of xattr prefix+suffix. */ | ||
745 | __le16 xe_name_offset; /* byte offset from the 1st etnry in the local | ||
746 | local xattr storage(inode, xattr block or | ||
747 | xattr bucket). */ | ||
748 | __u8 xe_name_len; /* xattr name len, does't include prefix. */ | ||
749 | __u8 xe_type; /* the low 7 bits indicates the name prefix's | ||
750 | * type and the highest 1 bits indicate whether | ||
751 | * the EA is stored in the local storage. */ | ||
752 | __le64 xe_value_size; /* real xattr value length. */ | ||
753 | }; | ||
754 | |||
755 | /* | ||
756 | * On disk structure for xattr header. | ||
757 | * | ||
758 | * One ocfs2_xattr_header describes how many ocfs2_xattr_entry records in | ||
759 | * the local xattr storage. | ||
760 | */ | ||
761 | struct ocfs2_xattr_header { | ||
762 | __le16 xh_count; /* contains the count of how | ||
763 | many records are in the | ||
764 | local xattr storage. */ | ||
765 | __le16 xh_free_start; /* current offset for storing | ||
766 | xattr. */ | ||
767 | __le16 xh_name_value_len; /* total length of name/value | ||
768 | length in this bucket. */ | ||
769 | __le16 xh_num_buckets; /* bucket nums in one extent | ||
770 | record, only valid in the | ||
771 | first bucket. */ | ||
772 | __le64 xh_csum; | ||
773 | struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */ | ||
774 | }; | ||
775 | |||
776 | /* | ||
777 | * On disk structure for xattr value root. | ||
778 | * | ||
779 | * It is used when one extended attribute's size is larger, and we will save it | ||
780 | * in an outside cluster. It will stored in a b-tree like file content. | ||
781 | */ | ||
782 | struct ocfs2_xattr_value_root { | ||
783 | /*00*/ __le32 xr_clusters; /* clusters covered by xattr value. */ | ||
784 | __le32 xr_reserved0; | ||
785 | __le64 xr_last_eb_blk; /* Pointer to last extent block */ | ||
786 | /*10*/ struct ocfs2_extent_list xr_list; /* Extent record list */ | ||
787 | }; | ||
788 | |||
789 | /* | ||
790 | * On disk structure for xattr tree root. | ||
791 | * | ||
792 | * It is used when there are too many extended attributes for one file. These | ||
793 | * attributes will be organized and stored in an indexed-btree. | ||
794 | */ | ||
795 | struct ocfs2_xattr_tree_root { | ||
796 | /*00*/ __le32 xt_clusters; /* clusters covered by xattr. */ | ||
797 | __le32 xt_reserved0; | ||
798 | __le64 xt_last_eb_blk; /* Pointer to last extent block */ | ||
799 | /*10*/ struct ocfs2_extent_list xt_list; /* Extent record list */ | ||
800 | }; | ||
801 | |||
802 | #define OCFS2_XATTR_INDEXED 0x1 | ||
803 | #define OCFS2_HASH_SHIFT 5 | ||
804 | #define OCFS2_XATTR_ROUND 3 | ||
805 | #define OCFS2_XATTR_SIZE(size) (((size) + OCFS2_XATTR_ROUND) & \ | ||
806 | ~(OCFS2_XATTR_ROUND)) | ||
807 | |||
808 | #define OCFS2_XATTR_BUCKET_SIZE 4096 | ||
809 | #define OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET (OCFS2_XATTR_BUCKET_SIZE \ | ||
810 | / OCFS2_MIN_BLOCKSIZE) | ||
811 | |||
812 | /* | ||
813 | * On disk structure for xattr block. | ||
814 | */ | ||
815 | struct ocfs2_xattr_block { | ||
816 | /*00*/ __u8 xb_signature[8]; /* Signature for verification */ | ||
817 | __le16 xb_suballoc_slot; /* Slot suballocator this | ||
818 | block belongs to. */ | ||
819 | __le16 xb_suballoc_bit; /* Bit offset in suballocator | ||
820 | block group */ | ||
821 | __le32 xb_fs_generation; /* Must match super block */ | ||
822 | /*10*/ __le64 xb_blkno; /* Offset on disk, in blocks */ | ||
823 | __le64 xb_csum; | ||
824 | /*20*/ __le16 xb_flags; /* Indicates whether this block contains | ||
825 | real xattr or a xattr tree. */ | ||
826 | __le16 xb_reserved0; | ||
827 | __le32 xb_reserved1; | ||
828 | __le64 xb_reserved2; | ||
829 | /*30*/ union { | ||
830 | struct ocfs2_xattr_header xb_header; /* xattr header if this | ||
831 | block contains xattr */ | ||
832 | struct ocfs2_xattr_tree_root xb_root;/* xattr tree root if this | ||
833 | block cotains xattr | ||
834 | tree. */ | ||
835 | } xb_attrs; | ||
836 | }; | ||
837 | |||
838 | #define OCFS2_XATTR_ENTRY_LOCAL 0x80 | ||
839 | #define OCFS2_XATTR_TYPE_MASK 0x7F | ||
840 | static inline void ocfs2_xattr_set_local(struct ocfs2_xattr_entry *xe, | ||
841 | int local) | ||
842 | { | ||
843 | if (local) | ||
844 | xe->xe_type |= OCFS2_XATTR_ENTRY_LOCAL; | ||
845 | else | ||
846 | xe->xe_type &= ~OCFS2_XATTR_ENTRY_LOCAL; | ||
847 | } | ||
848 | |||
849 | static inline int ocfs2_xattr_is_local(struct ocfs2_xattr_entry *xe) | ||
850 | { | ||
851 | return xe->xe_type & OCFS2_XATTR_ENTRY_LOCAL; | ||
852 | } | ||
853 | |||
854 | static inline void ocfs2_xattr_set_type(struct ocfs2_xattr_entry *xe, int type) | ||
855 | { | ||
856 | xe->xe_type |= type & OCFS2_XATTR_TYPE_MASK; | ||
857 | } | ||
858 | |||
859 | static inline int ocfs2_xattr_get_type(struct ocfs2_xattr_entry *xe) | ||
860 | { | ||
861 | return xe->xe_type & OCFS2_XATTR_TYPE_MASK; | ||
862 | } | ||
863 | |||
718 | #ifdef __KERNEL__ | 864 | #ifdef __KERNEL__ |
719 | static inline int ocfs2_fast_symlink_chars(struct super_block *sb) | 865 | static inline int ocfs2_fast_symlink_chars(struct super_block *sb) |
720 | { | 866 | { |
@@ -728,6 +874,20 @@ static inline int ocfs2_max_inline_data(struct super_block *sb) | |||
728 | offsetof(struct ocfs2_dinode, id2.i_data.id_data); | 874 | offsetof(struct ocfs2_dinode, id2.i_data.id_data); |
729 | } | 875 | } |
730 | 876 | ||
877 | static inline int ocfs2_max_inline_data_with_xattr(struct super_block *sb, | ||
878 | struct ocfs2_dinode *di) | ||
879 | { | ||
880 | unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size); | ||
881 | |||
882 | if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL) | ||
883 | return sb->s_blocksize - | ||
884 | offsetof(struct ocfs2_dinode, id2.i_data.id_data) - | ||
885 | xattrsize; | ||
886 | else | ||
887 | return sb->s_blocksize - | ||
888 | offsetof(struct ocfs2_dinode, id2.i_data.id_data); | ||
889 | } | ||
890 | |||
731 | static inline int ocfs2_extent_recs_per_inode(struct super_block *sb) | 891 | static inline int ocfs2_extent_recs_per_inode(struct super_block *sb) |
732 | { | 892 | { |
733 | int size; | 893 | int size; |
@@ -738,6 +898,24 @@ static inline int ocfs2_extent_recs_per_inode(struct super_block *sb) | |||
738 | return size / sizeof(struct ocfs2_extent_rec); | 898 | return size / sizeof(struct ocfs2_extent_rec); |
739 | } | 899 | } |
740 | 900 | ||
901 | static inline int ocfs2_extent_recs_per_inode_with_xattr( | ||
902 | struct super_block *sb, | ||
903 | struct ocfs2_dinode *di) | ||
904 | { | ||
905 | int size; | ||
906 | unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size); | ||
907 | |||
908 | if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL) | ||
909 | size = sb->s_blocksize - | ||
910 | offsetof(struct ocfs2_dinode, id2.i_list.l_recs) - | ||
911 | xattrsize; | ||
912 | else | ||
913 | size = sb->s_blocksize - | ||
914 | offsetof(struct ocfs2_dinode, id2.i_list.l_recs); | ||
915 | |||
916 | return size / sizeof(struct ocfs2_extent_rec); | ||
917 | } | ||
918 | |||
741 | static inline int ocfs2_chain_recs_per_inode(struct super_block *sb) | 919 | static inline int ocfs2_chain_recs_per_inode(struct super_block *sb) |
742 | { | 920 | { |
743 | int size; | 921 | int size; |
@@ -801,6 +979,17 @@ static inline u64 ocfs2_backup_super_blkno(struct super_block *sb, int index) | |||
801 | return 0; | 979 | return 0; |
802 | 980 | ||
803 | } | 981 | } |
982 | |||
983 | static inline u16 ocfs2_xattr_recs_per_xb(struct super_block *sb) | ||
984 | { | ||
985 | int size; | ||
986 | |||
987 | size = sb->s_blocksize - | ||
988 | offsetof(struct ocfs2_xattr_block, | ||
989 | xb_attrs.xb_root.xt_list.l_recs); | ||
990 | |||
991 | return size / sizeof(struct ocfs2_extent_rec); | ||
992 | } | ||
804 | #else | 993 | #else |
805 | static inline int ocfs2_fast_symlink_chars(int blocksize) | 994 | static inline int ocfs2_fast_symlink_chars(int blocksize) |
806 | { | 995 | { |
@@ -884,6 +1073,17 @@ static inline uint64_t ocfs2_backup_super_blkno(int blocksize, int index) | |||
884 | 1073 | ||
885 | return 0; | 1074 | return 0; |
886 | } | 1075 | } |
1076 | |||
1077 | static inline int ocfs2_xattr_recs_per_xb(int blocksize) | ||
1078 | { | ||
1079 | int size; | ||
1080 | |||
1081 | size = blocksize - | ||
1082 | offsetof(struct ocfs2_xattr_block, | ||
1083 | xb_attrs.xb_root.xt_list.l_recs); | ||
1084 | |||
1085 | return size / sizeof(struct ocfs2_extent_rec); | ||
1086 | } | ||
887 | #endif /* __KERNEL__ */ | 1087 | #endif /* __KERNEL__ */ |
888 | 1088 | ||
889 | 1089 | ||
diff --git a/fs/ocfs2/ocfs2_jbd_compat.h b/fs/ocfs2/ocfs2_jbd_compat.h new file mode 100644 index 000000000000..b91c78f8f558 --- /dev/null +++ b/fs/ocfs2/ocfs2_jbd_compat.h | |||
@@ -0,0 +1,82 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * ocfs2_jbd_compat.h | ||
5 | * | ||
6 | * Compatibility defines for JBD. | ||
7 | * | ||
8 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License version 2 as published by the Free Software Foundation. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
17 | * General Public License for more details. | ||
18 | */ | ||
19 | |||
20 | #ifndef OCFS2_JBD_COMPAT_H | ||
21 | #define OCFS2_JBD_COMPAT_H | ||
22 | |||
23 | #ifndef CONFIG_OCFS2_COMPAT_JBD | ||
24 | # error Should not have been included | ||
25 | #endif | ||
26 | |||
27 | struct jbd2_inode { | ||
28 | unsigned int dummy; | ||
29 | }; | ||
30 | |||
31 | #define JBD2_BARRIER JFS_BARRIER | ||
32 | #define JBD2_DEFAULT_MAX_COMMIT_AGE JBD_DEFAULT_MAX_COMMIT_AGE | ||
33 | |||
34 | #define jbd2_journal_ack_err journal_ack_err | ||
35 | #define jbd2_journal_clear_err journal_clear_err | ||
36 | #define jbd2_journal_destroy journal_destroy | ||
37 | #define jbd2_journal_dirty_metadata journal_dirty_metadata | ||
38 | #define jbd2_journal_errno journal_errno | ||
39 | #define jbd2_journal_extend journal_extend | ||
40 | #define jbd2_journal_flush journal_flush | ||
41 | #define jbd2_journal_force_commit journal_force_commit | ||
42 | #define jbd2_journal_get_write_access journal_get_write_access | ||
43 | #define jbd2_journal_get_undo_access journal_get_undo_access | ||
44 | #define jbd2_journal_init_inode journal_init_inode | ||
45 | #define jbd2_journal_invalidatepage journal_invalidatepage | ||
46 | #define jbd2_journal_load journal_load | ||
47 | #define jbd2_journal_lock_updates journal_lock_updates | ||
48 | #define jbd2_journal_restart journal_restart | ||
49 | #define jbd2_journal_start journal_start | ||
50 | #define jbd2_journal_start_commit journal_start_commit | ||
51 | #define jbd2_journal_stop journal_stop | ||
52 | #define jbd2_journal_try_to_free_buffers journal_try_to_free_buffers | ||
53 | #define jbd2_journal_unlock_updates journal_unlock_updates | ||
54 | #define jbd2_journal_wipe journal_wipe | ||
55 | #define jbd2_log_wait_commit log_wait_commit | ||
56 | |||
57 | static inline int jbd2_journal_file_inode(handle_t *handle, | ||
58 | struct jbd2_inode *inode) | ||
59 | { | ||
60 | return 0; | ||
61 | } | ||
62 | |||
63 | static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, | ||
64 | loff_t new_size) | ||
65 | { | ||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | static inline void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, | ||
70 | struct inode *inode) | ||
71 | { | ||
72 | return; | ||
73 | } | ||
74 | |||
75 | static inline void jbd2_journal_release_jbd_inode(journal_t *journal, | ||
76 | struct jbd2_inode *jinode) | ||
77 | { | ||
78 | return; | ||
79 | } | ||
80 | |||
81 | |||
82 | #endif /* OCFS2_JBD_COMPAT_H */ | ||
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index 8166968e9015..ffd48db229a7 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c | |||
@@ -200,7 +200,7 @@ static int update_backups(struct inode * inode, u32 clusters, char *data) | |||
200 | if (cluster > clusters) | 200 | if (cluster > clusters) |
201 | break; | 201 | break; |
202 | 202 | ||
203 | ret = ocfs2_read_block(osb, blkno, &backup, 0, NULL); | 203 | ret = ocfs2_read_blocks_sync(osb, blkno, 1, &backup); |
204 | if (ret < 0) { | 204 | if (ret < 0) { |
205 | mlog_errno(ret); | 205 | mlog_errno(ret); |
206 | break; | 206 | break; |
@@ -236,8 +236,8 @@ static void ocfs2_update_super_and_backups(struct inode *inode, | |||
236 | * update the superblock last. | 236 | * update the superblock last. |
237 | * It doesn't matter if the write failed. | 237 | * It doesn't matter if the write failed. |
238 | */ | 238 | */ |
239 | ret = ocfs2_read_block(osb, OCFS2_SUPER_BLOCK_BLKNO, | 239 | ret = ocfs2_read_blocks_sync(osb, OCFS2_SUPER_BLOCK_BLKNO, 1, |
240 | &super_bh, 0, NULL); | 240 | &super_bh); |
241 | if (ret < 0) { | 241 | if (ret < 0) { |
242 | mlog_errno(ret); | 242 | mlog_errno(ret); |
243 | goto out; | 243 | goto out; |
@@ -332,8 +332,7 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters) | |||
332 | lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, | 332 | lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, |
333 | first_new_cluster - 1); | 333 | first_new_cluster - 1); |
334 | 334 | ||
335 | ret = ocfs2_read_block(osb, lgd_blkno, &group_bh, OCFS2_BH_CACHED, | 335 | ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh); |
336 | main_bm_inode); | ||
337 | if (ret < 0) { | 336 | if (ret < 0) { |
338 | mlog_errno(ret); | 337 | mlog_errno(ret); |
339 | goto out_unlock; | 338 | goto out_unlock; |
@@ -540,7 +539,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) | |||
540 | goto out_unlock; | 539 | goto out_unlock; |
541 | } | 540 | } |
542 | 541 | ||
543 | ret = ocfs2_read_block(osb, input->group, &group_bh, 0, NULL); | 542 | ret = ocfs2_read_blocks_sync(osb, input->group, 1, &group_bh); |
544 | if (ret < 0) { | 543 | if (ret < 0) { |
545 | mlog(ML_ERROR, "Can't read the group descriptor # %llu " | 544 | mlog(ML_ERROR, "Can't read the group descriptor # %llu " |
546 | "from the device.", (unsigned long long)input->group); | 545 | "from the device.", (unsigned long long)input->group); |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index bb5ff8939bf1..bdda2d8f8508 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
@@ -150,8 +150,8 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb) | |||
150 | * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If | 150 | * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If |
151 | * this is not true, the read of -1 (UINT64_MAX) will fail. | 151 | * this is not true, the read of -1 (UINT64_MAX) will fail. |
152 | */ | 152 | */ |
153 | ret = ocfs2_read_blocks(osb, -1, si->si_blocks, si->si_bh, 0, | 153 | ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh, |
154 | si->si_inode); | 154 | OCFS2_BH_IGNORE_CACHE); |
155 | if (ret == 0) { | 155 | if (ret == 0) { |
156 | spin_lock(&osb->osb_lock); | 156 | spin_lock(&osb->osb_lock); |
157 | ocfs2_update_slot_info(si); | 157 | ocfs2_update_slot_info(si); |
@@ -404,7 +404,8 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, | |||
404 | (unsigned long long)blkno); | 404 | (unsigned long long)blkno); |
405 | 405 | ||
406 | bh = NULL; /* Acquire a fresh bh */ | 406 | bh = NULL; /* Acquire a fresh bh */ |
407 | status = ocfs2_read_block(osb, blkno, &bh, 0, si->si_inode); | 407 | status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh, |
408 | OCFS2_BH_IGNORE_CACHE); | ||
408 | if (status < 0) { | 409 | if (status < 0) { |
409 | mlog_errno(status); | 410 | mlog_errno(status); |
410 | goto bail; | 411 | goto bail; |
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 353fc35c6748..faec2d879357 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include "ocfs2.h" /* For struct ocfs2_lock_res */ | 28 | #include "ocfs2.h" /* For struct ocfs2_lock_res */ |
29 | #include "stackglue.h" | 29 | #include "stackglue.h" |
30 | 30 | ||
31 | #include <linux/dlm_plock.h> | ||
31 | 32 | ||
32 | /* | 33 | /* |
33 | * The control protocol starts with a handshake. Until the handshake | 34 | * The control protocol starts with a handshake. Until the handshake |
@@ -746,6 +747,37 @@ static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) | |||
746 | { | 747 | { |
747 | } | 748 | } |
748 | 749 | ||
750 | static int user_plock(struct ocfs2_cluster_connection *conn, | ||
751 | u64 ino, | ||
752 | struct file *file, | ||
753 | int cmd, | ||
754 | struct file_lock *fl) | ||
755 | { | ||
756 | /* | ||
757 | * This more or less just demuxes the plock request into any | ||
758 | * one of three dlm calls. | ||
759 | * | ||
760 | * Internally, fs/dlm will pass these to a misc device, which | ||
761 | * a userspace daemon will read and write to. | ||
762 | * | ||
763 | * For now, cancel requests (which happen internally only), | ||
764 | * are turned into unlocks. Most of this function taken from | ||
765 | * gfs2_lock. | ||
766 | */ | ||
767 | |||
768 | if (cmd == F_CANCELLK) { | ||
769 | cmd = F_SETLK; | ||
770 | fl->fl_type = F_UNLCK; | ||
771 | } | ||
772 | |||
773 | if (IS_GETLK(cmd)) | ||
774 | return dlm_posix_get(conn->cc_lockspace, ino, file, fl); | ||
775 | else if (fl->fl_type == F_UNLCK) | ||
776 | return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl); | ||
777 | else | ||
778 | return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl); | ||
779 | } | ||
780 | |||
749 | /* | 781 | /* |
750 | * Compare a requested locking protocol version against the current one. | 782 | * Compare a requested locking protocol version against the current one. |
751 | * | 783 | * |
@@ -839,6 +871,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = { | |||
839 | .dlm_unlock = user_dlm_unlock, | 871 | .dlm_unlock = user_dlm_unlock, |
840 | .lock_status = user_dlm_lock_status, | 872 | .lock_status = user_dlm_lock_status, |
841 | .lock_lvb = user_dlm_lvb, | 873 | .lock_lvb = user_dlm_lvb, |
874 | .plock = user_plock, | ||
842 | .dump_lksb = user_dlm_dump_lksb, | 875 | .dump_lksb = user_dlm_dump_lksb, |
843 | }; | 876 | }; |
844 | 877 | ||
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 10e149ae5e3a..68b668b0e60a 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c | |||
@@ -97,13 +97,14 @@ static int ocfs2_stack_driver_request(const char *stack_name, | |||
97 | goto out; | 97 | goto out; |
98 | } | 98 | } |
99 | 99 | ||
100 | /* Ok, the stack is pinned */ | ||
101 | p->sp_count++; | ||
102 | active_stack = p; | 100 | active_stack = p; |
103 | |||
104 | rc = 0; | 101 | rc = 0; |
105 | 102 | ||
106 | out: | 103 | out: |
104 | /* If we found it, pin it */ | ||
105 | if (!rc) | ||
106 | active_stack->sp_count++; | ||
107 | |||
107 | spin_unlock(&ocfs2_stack_lock); | 108 | spin_unlock(&ocfs2_stack_lock); |
108 | return rc; | 109 | return rc; |
109 | } | 110 | } |
@@ -287,6 +288,26 @@ void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) | |||
287 | } | 288 | } |
288 | EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); | 289 | EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); |
289 | 290 | ||
291 | int ocfs2_stack_supports_plocks(void) | ||
292 | { | ||
293 | return active_stack && active_stack->sp_ops->plock; | ||
294 | } | ||
295 | EXPORT_SYMBOL_GPL(ocfs2_stack_supports_plocks); | ||
296 | |||
297 | /* | ||
298 | * ocfs2_plock() can only be safely called if | ||
299 | * ocfs2_stack_supports_plocks() returned true | ||
300 | */ | ||
301 | int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino, | ||
302 | struct file *file, int cmd, struct file_lock *fl) | ||
303 | { | ||
304 | WARN_ON_ONCE(active_stack->sp_ops->plock == NULL); | ||
305 | if (active_stack->sp_ops->plock) | ||
306 | return active_stack->sp_ops->plock(conn, ino, file, cmd, fl); | ||
307 | return -EOPNOTSUPP; | ||
308 | } | ||
309 | EXPORT_SYMBOL_GPL(ocfs2_plock); | ||
310 | |||
290 | int ocfs2_cluster_connect(const char *stack_name, | 311 | int ocfs2_cluster_connect(const char *stack_name, |
291 | const char *group, | 312 | const char *group, |
292 | int grouplen, | 313 | int grouplen, |
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index db56281dd1be..c571af375ef8 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h | |||
@@ -28,6 +28,10 @@ | |||
28 | #include "dlm/dlmapi.h" | 28 | #include "dlm/dlmapi.h" |
29 | #include <linux/dlm.h> | 29 | #include <linux/dlm.h> |
30 | 30 | ||
31 | /* Needed for plock-related prototypes */ | ||
32 | struct file; | ||
33 | struct file_lock; | ||
34 | |||
31 | /* | 35 | /* |
32 | * dlmconstants.h does not have a LOCAL flag. We hope to remove it | 36 | * dlmconstants.h does not have a LOCAL flag. We hope to remove it |
33 | * some day, but right now we need it. Let's fake it. This value is larger | 37 | * some day, but right now we need it. Let's fake it. This value is larger |
@@ -187,6 +191,17 @@ struct ocfs2_stack_operations { | |||
187 | void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); | 191 | void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); |
188 | 192 | ||
189 | /* | 193 | /* |
194 | * Cluster-aware posix locks | ||
195 | * | ||
196 | * This is NULL for stacks which do not support posix locks. | ||
197 | */ | ||
198 | int (*plock)(struct ocfs2_cluster_connection *conn, | ||
199 | u64 ino, | ||
200 | struct file *file, | ||
201 | int cmd, | ||
202 | struct file_lock *fl); | ||
203 | |||
204 | /* | ||
190 | * This is an optoinal debugging hook. If provided, the | 205 | * This is an optoinal debugging hook. If provided, the |
191 | * stack can dump debugging information about this lock. | 206 | * stack can dump debugging information about this lock. |
192 | */ | 207 | */ |
@@ -240,6 +255,10 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb); | |||
240 | void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); | 255 | void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); |
241 | void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); | 256 | void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); |
242 | 257 | ||
258 | int ocfs2_stack_supports_plocks(void); | ||
259 | int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino, | ||
260 | struct file *file, int cmd, struct file_lock *fl); | ||
261 | |||
243 | void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto); | 262 | void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto); |
244 | 263 | ||
245 | 264 | ||
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index d2d278fb9819..c5ff18b46b57 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -62,15 +62,18 @@ static int ocfs2_block_group_fill(handle_t *handle, | |||
62 | struct ocfs2_chain_list *cl); | 62 | struct ocfs2_chain_list *cl); |
63 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | 63 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, |
64 | struct inode *alloc_inode, | 64 | struct inode *alloc_inode, |
65 | struct buffer_head *bh); | 65 | struct buffer_head *bh, |
66 | u64 max_block); | ||
66 | 67 | ||
67 | static int ocfs2_cluster_group_search(struct inode *inode, | 68 | static int ocfs2_cluster_group_search(struct inode *inode, |
68 | struct buffer_head *group_bh, | 69 | struct buffer_head *group_bh, |
69 | u32 bits_wanted, u32 min_bits, | 70 | u32 bits_wanted, u32 min_bits, |
71 | u64 max_block, | ||
70 | u16 *bit_off, u16 *bits_found); | 72 | u16 *bit_off, u16 *bits_found); |
71 | static int ocfs2_block_group_search(struct inode *inode, | 73 | static int ocfs2_block_group_search(struct inode *inode, |
72 | struct buffer_head *group_bh, | 74 | struct buffer_head *group_bh, |
73 | u32 bits_wanted, u32 min_bits, | 75 | u32 bits_wanted, u32 min_bits, |
76 | u64 max_block, | ||
74 | u16 *bit_off, u16 *bits_found); | 77 | u16 *bit_off, u16 *bits_found); |
75 | static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, | 78 | static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, |
76 | struct ocfs2_alloc_context *ac, | 79 | struct ocfs2_alloc_context *ac, |
@@ -110,8 +113,11 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode, | |||
110 | u64 data_blkno, | 113 | u64 data_blkno, |
111 | u64 *bg_blkno, | 114 | u64 *bg_blkno, |
112 | u16 *bg_bit_off); | 115 | u16 *bg_bit_off); |
116 | static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, | ||
117 | u32 bits_wanted, u64 max_block, | ||
118 | struct ocfs2_alloc_context **ac); | ||
113 | 119 | ||
114 | static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) | 120 | void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) |
115 | { | 121 | { |
116 | struct inode *inode = ac->ac_inode; | 122 | struct inode *inode = ac->ac_inode; |
117 | 123 | ||
@@ -124,10 +130,8 @@ static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) | |||
124 | iput(inode); | 130 | iput(inode); |
125 | ac->ac_inode = NULL; | 131 | ac->ac_inode = NULL; |
126 | } | 132 | } |
127 | if (ac->ac_bh) { | 133 | brelse(ac->ac_bh); |
128 | brelse(ac->ac_bh); | 134 | ac->ac_bh = NULL; |
129 | ac->ac_bh = NULL; | ||
130 | } | ||
131 | } | 135 | } |
132 | 136 | ||
133 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | 137 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) |
@@ -276,7 +280,8 @@ static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl) | |||
276 | */ | 280 | */ |
277 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | 281 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, |
278 | struct inode *alloc_inode, | 282 | struct inode *alloc_inode, |
279 | struct buffer_head *bh) | 283 | struct buffer_head *bh, |
284 | u64 max_block) | ||
280 | { | 285 | { |
281 | int status, credits; | 286 | int status, credits; |
282 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; | 287 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; |
@@ -294,9 +299,9 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |||
294 | mlog_entry_void(); | 299 | mlog_entry_void(); |
295 | 300 | ||
296 | cl = &fe->id2.i_chain; | 301 | cl = &fe->id2.i_chain; |
297 | status = ocfs2_reserve_clusters(osb, | 302 | status = ocfs2_reserve_clusters_with_limit(osb, |
298 | le16_to_cpu(cl->cl_cpg), | 303 | le16_to_cpu(cl->cl_cpg), |
299 | &ac); | 304 | max_block, &ac); |
300 | if (status < 0) { | 305 | if (status < 0) { |
301 | if (status != -ENOSPC) | 306 | if (status != -ENOSPC) |
302 | mlog_errno(status); | 307 | mlog_errno(status); |
@@ -394,8 +399,7 @@ bail: | |||
394 | if (ac) | 399 | if (ac) |
395 | ocfs2_free_alloc_context(ac); | 400 | ocfs2_free_alloc_context(ac); |
396 | 401 | ||
397 | if (bg_bh) | 402 | brelse(bg_bh); |
398 | brelse(bg_bh); | ||
399 | 403 | ||
400 | mlog_exit(status); | 404 | mlog_exit(status); |
401 | return status; | 405 | return status; |
@@ -469,7 +473,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
469 | goto bail; | 473 | goto bail; |
470 | } | 474 | } |
471 | 475 | ||
472 | status = ocfs2_block_group_alloc(osb, alloc_inode, bh); | 476 | status = ocfs2_block_group_alloc(osb, alloc_inode, bh, |
477 | ac->ac_max_block); | ||
473 | if (status < 0) { | 478 | if (status < 0) { |
474 | if (status != -ENOSPC) | 479 | if (status != -ENOSPC) |
475 | mlog_errno(status); | 480 | mlog_errno(status); |
@@ -486,16 +491,15 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
486 | get_bh(bh); | 491 | get_bh(bh); |
487 | ac->ac_bh = bh; | 492 | ac->ac_bh = bh; |
488 | bail: | 493 | bail: |
489 | if (bh) | 494 | brelse(bh); |
490 | brelse(bh); | ||
491 | 495 | ||
492 | mlog_exit(status); | 496 | mlog_exit(status); |
493 | return status; | 497 | return status; |
494 | } | 498 | } |
495 | 499 | ||
496 | int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | 500 | int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb, |
497 | struct ocfs2_dinode *fe, | 501 | int blocks, |
498 | struct ocfs2_alloc_context **ac) | 502 | struct ocfs2_alloc_context **ac) |
499 | { | 503 | { |
500 | int status; | 504 | int status; |
501 | u32 slot; | 505 | u32 slot; |
@@ -507,7 +511,7 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | |||
507 | goto bail; | 511 | goto bail; |
508 | } | 512 | } |
509 | 513 | ||
510 | (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); | 514 | (*ac)->ac_bits_wanted = blocks; |
511 | (*ac)->ac_which = OCFS2_AC_USE_META; | 515 | (*ac)->ac_which = OCFS2_AC_USE_META; |
512 | slot = osb->slot_num; | 516 | slot = osb->slot_num; |
513 | (*ac)->ac_group_search = ocfs2_block_group_search; | 517 | (*ac)->ac_group_search = ocfs2_block_group_search; |
@@ -532,6 +536,15 @@ bail: | |||
532 | return status; | 536 | return status; |
533 | } | 537 | } |
534 | 538 | ||
539 | int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | ||
540 | struct ocfs2_extent_list *root_el, | ||
541 | struct ocfs2_alloc_context **ac) | ||
542 | { | ||
543 | return ocfs2_reserve_new_metadata_blocks(osb, | ||
544 | ocfs2_extend_meta_needed(root_el), | ||
545 | ac); | ||
546 | } | ||
547 | |||
535 | static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, | 548 | static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, |
536 | struct ocfs2_alloc_context *ac) | 549 | struct ocfs2_alloc_context *ac) |
537 | { | 550 | { |
@@ -582,6 +595,14 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | |||
582 | (*ac)->ac_group_search = ocfs2_block_group_search; | 595 | (*ac)->ac_group_search = ocfs2_block_group_search; |
583 | 596 | ||
584 | /* | 597 | /* |
598 | * stat(2) can't handle i_ino > 32bits, so we tell the | ||
599 | * lower levels not to allocate us a block group past that | ||
600 | * limit. The 'inode64' mount option avoids this behavior. | ||
601 | */ | ||
602 | if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64)) | ||
603 | (*ac)->ac_max_block = (u32)~0U; | ||
604 | |||
605 | /* | ||
585 | * slot is set when we successfully steal inode from other nodes. | 606 | * slot is set when we successfully steal inode from other nodes. |
586 | * It is reset in 3 places: | 607 | * It is reset in 3 places: |
587 | * 1. when we flush the truncate log | 608 | * 1. when we flush the truncate log |
@@ -661,9 +682,9 @@ bail: | |||
661 | /* Callers don't need to care which bitmap (local alloc or main) to | 682 | /* Callers don't need to care which bitmap (local alloc or main) to |
662 | * use so we figure it out for them, but unfortunately this clutters | 683 | * use so we figure it out for them, but unfortunately this clutters |
663 | * things a bit. */ | 684 | * things a bit. */ |
664 | int ocfs2_reserve_clusters(struct ocfs2_super *osb, | 685 | static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, |
665 | u32 bits_wanted, | 686 | u32 bits_wanted, u64 max_block, |
666 | struct ocfs2_alloc_context **ac) | 687 | struct ocfs2_alloc_context **ac) |
667 | { | 688 | { |
668 | int status; | 689 | int status; |
669 | 690 | ||
@@ -677,24 +698,20 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb, | |||
677 | } | 698 | } |
678 | 699 | ||
679 | (*ac)->ac_bits_wanted = bits_wanted; | 700 | (*ac)->ac_bits_wanted = bits_wanted; |
701 | (*ac)->ac_max_block = max_block; | ||
680 | 702 | ||
681 | status = -ENOSPC; | 703 | status = -ENOSPC; |
682 | if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { | 704 | if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { |
683 | status = ocfs2_reserve_local_alloc_bits(osb, | 705 | status = ocfs2_reserve_local_alloc_bits(osb, |
684 | bits_wanted, | 706 | bits_wanted, |
685 | *ac); | 707 | *ac); |
686 | if ((status < 0) && (status != -ENOSPC)) { | 708 | if (status == -EFBIG) { |
709 | /* The local alloc window is outside ac_max_block. | ||
710 | * use the main bitmap. */ | ||
711 | status = -ENOSPC; | ||
712 | } else if ((status < 0) && (status != -ENOSPC)) { | ||
687 | mlog_errno(status); | 713 | mlog_errno(status); |
688 | goto bail; | 714 | goto bail; |
689 | } else if (status == -ENOSPC) { | ||
690 | /* reserve_local_bits will return enospc with | ||
691 | * the local alloc inode still locked, so we | ||
692 | * can change this safely here. */ | ||
693 | mlog(0, "Disabling local alloc\n"); | ||
694 | /* We set to OCFS2_LA_DISABLED so that umount | ||
695 | * can clean up what's left of the local | ||
696 | * allocation */ | ||
697 | osb->local_alloc_state = OCFS2_LA_DISABLED; | ||
698 | } | 715 | } |
699 | } | 716 | } |
700 | 717 | ||
@@ -718,6 +735,13 @@ bail: | |||
718 | return status; | 735 | return status; |
719 | } | 736 | } |
720 | 737 | ||
738 | int ocfs2_reserve_clusters(struct ocfs2_super *osb, | ||
739 | u32 bits_wanted, | ||
740 | struct ocfs2_alloc_context **ac) | ||
741 | { | ||
742 | return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac); | ||
743 | } | ||
744 | |||
721 | /* | 745 | /* |
722 | * More or less lifted from ext3. I'll leave their description below: | 746 | * More or less lifted from ext3. I'll leave their description below: |
723 | * | 747 | * |
@@ -1000,11 +1024,14 @@ static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg | |||
1000 | static int ocfs2_cluster_group_search(struct inode *inode, | 1024 | static int ocfs2_cluster_group_search(struct inode *inode, |
1001 | struct buffer_head *group_bh, | 1025 | struct buffer_head *group_bh, |
1002 | u32 bits_wanted, u32 min_bits, | 1026 | u32 bits_wanted, u32 min_bits, |
1027 | u64 max_block, | ||
1003 | u16 *bit_off, u16 *bits_found) | 1028 | u16 *bit_off, u16 *bits_found) |
1004 | { | 1029 | { |
1005 | int search = -ENOSPC; | 1030 | int search = -ENOSPC; |
1006 | int ret; | 1031 | int ret; |
1032 | u64 blkoff; | ||
1007 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; | 1033 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; |
1034 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1008 | u16 tmp_off, tmp_found; | 1035 | u16 tmp_off, tmp_found; |
1009 | unsigned int max_bits, gd_cluster_off; | 1036 | unsigned int max_bits, gd_cluster_off; |
1010 | 1037 | ||
@@ -1037,6 +1064,17 @@ static int ocfs2_cluster_group_search(struct inode *inode, | |||
1037 | if (ret) | 1064 | if (ret) |
1038 | return ret; | 1065 | return ret; |
1039 | 1066 | ||
1067 | if (max_block) { | ||
1068 | blkoff = ocfs2_clusters_to_blocks(inode->i_sb, | ||
1069 | gd_cluster_off + | ||
1070 | tmp_off + tmp_found); | ||
1071 | mlog(0, "Checking %llu against %llu\n", | ||
1072 | (unsigned long long)blkoff, | ||
1073 | (unsigned long long)max_block); | ||
1074 | if (blkoff > max_block) | ||
1075 | return -ENOSPC; | ||
1076 | } | ||
1077 | |||
1040 | /* ocfs2_block_group_find_clear_bits() might | 1078 | /* ocfs2_block_group_find_clear_bits() might |
1041 | * return success, but we still want to return | 1079 | * return success, but we still want to return |
1042 | * -ENOSPC unless it found the minimum number | 1080 | * -ENOSPC unless it found the minimum number |
@@ -1045,6 +1083,12 @@ static int ocfs2_cluster_group_search(struct inode *inode, | |||
1045 | *bit_off = tmp_off; | 1083 | *bit_off = tmp_off; |
1046 | *bits_found = tmp_found; | 1084 | *bits_found = tmp_found; |
1047 | search = 0; /* success */ | 1085 | search = 0; /* success */ |
1086 | } else if (tmp_found) { | ||
1087 | /* | ||
1088 | * Don't show bits which we'll be returning | ||
1089 | * for allocation to the local alloc bitmap. | ||
1090 | */ | ||
1091 | ocfs2_local_alloc_seen_free_bits(osb, tmp_found); | ||
1048 | } | 1092 | } |
1049 | } | 1093 | } |
1050 | 1094 | ||
@@ -1054,19 +1098,31 @@ static int ocfs2_cluster_group_search(struct inode *inode, | |||
1054 | static int ocfs2_block_group_search(struct inode *inode, | 1098 | static int ocfs2_block_group_search(struct inode *inode, |
1055 | struct buffer_head *group_bh, | 1099 | struct buffer_head *group_bh, |
1056 | u32 bits_wanted, u32 min_bits, | 1100 | u32 bits_wanted, u32 min_bits, |
1101 | u64 max_block, | ||
1057 | u16 *bit_off, u16 *bits_found) | 1102 | u16 *bit_off, u16 *bits_found) |
1058 | { | 1103 | { |
1059 | int ret = -ENOSPC; | 1104 | int ret = -ENOSPC; |
1105 | u64 blkoff; | ||
1060 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; | 1106 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; |
1061 | 1107 | ||
1062 | BUG_ON(min_bits != 1); | 1108 | BUG_ON(min_bits != 1); |
1063 | BUG_ON(ocfs2_is_cluster_bitmap(inode)); | 1109 | BUG_ON(ocfs2_is_cluster_bitmap(inode)); |
1064 | 1110 | ||
1065 | if (bg->bg_free_bits_count) | 1111 | if (bg->bg_free_bits_count) { |
1066 | ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), | 1112 | ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), |
1067 | group_bh, bits_wanted, | 1113 | group_bh, bits_wanted, |
1068 | le16_to_cpu(bg->bg_bits), | 1114 | le16_to_cpu(bg->bg_bits), |
1069 | bit_off, bits_found); | 1115 | bit_off, bits_found); |
1116 | if (!ret && max_block) { | ||
1117 | blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off + | ||
1118 | *bits_found; | ||
1119 | mlog(0, "Checking %llu against %llu\n", | ||
1120 | (unsigned long long)blkoff, | ||
1121 | (unsigned long long)max_block); | ||
1122 | if (blkoff > max_block) | ||
1123 | ret = -ENOSPC; | ||
1124 | } | ||
1125 | } | ||
1070 | 1126 | ||
1071 | return ret; | 1127 | return ret; |
1072 | } | 1128 | } |
@@ -1116,8 +1172,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, | |||
1116 | struct ocfs2_group_desc *gd; | 1172 | struct ocfs2_group_desc *gd; |
1117 | struct inode *alloc_inode = ac->ac_inode; | 1173 | struct inode *alloc_inode = ac->ac_inode; |
1118 | 1174 | ||
1119 | ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno, | 1175 | ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh); |
1120 | &group_bh, OCFS2_BH_CACHED, alloc_inode); | ||
1121 | if (ret < 0) { | 1176 | if (ret < 0) { |
1122 | mlog_errno(ret); | 1177 | mlog_errno(ret); |
1123 | return ret; | 1178 | return ret; |
@@ -1131,7 +1186,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, | |||
1131 | } | 1186 | } |
1132 | 1187 | ||
1133 | ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, | 1188 | ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, |
1134 | bit_off, &found); | 1189 | ac->ac_max_block, bit_off, &found); |
1135 | if (ret < 0) { | 1190 | if (ret < 0) { |
1136 | if (ret != -ENOSPC) | 1191 | if (ret != -ENOSPC) |
1137 | mlog_errno(ret); | 1192 | mlog_errno(ret); |
@@ -1186,9 +1241,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1186 | bits_wanted, chain, | 1241 | bits_wanted, chain, |
1187 | (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); | 1242 | (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); |
1188 | 1243 | ||
1189 | status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), | 1244 | status = ocfs2_read_block(alloc_inode, |
1190 | le64_to_cpu(cl->cl_recs[chain].c_blkno), | 1245 | le64_to_cpu(cl->cl_recs[chain].c_blkno), |
1191 | &group_bh, OCFS2_BH_CACHED, alloc_inode); | 1246 | &group_bh); |
1192 | if (status < 0) { | 1247 | if (status < 0) { |
1193 | mlog_errno(status); | 1248 | mlog_errno(status); |
1194 | goto bail; | 1249 | goto bail; |
@@ -1204,21 +1259,20 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1204 | /* for now, the chain search is a bit simplistic. We just use | 1259 | /* for now, the chain search is a bit simplistic. We just use |
1205 | * the 1st group with any empty bits. */ | 1260 | * the 1st group with any empty bits. */ |
1206 | while ((status = ac->ac_group_search(alloc_inode, group_bh, | 1261 | while ((status = ac->ac_group_search(alloc_inode, group_bh, |
1207 | bits_wanted, min_bits, bit_off, | 1262 | bits_wanted, min_bits, |
1263 | ac->ac_max_block, bit_off, | ||
1208 | &tmp_bits)) == -ENOSPC) { | 1264 | &tmp_bits)) == -ENOSPC) { |
1209 | if (!bg->bg_next_group) | 1265 | if (!bg->bg_next_group) |
1210 | break; | 1266 | break; |
1211 | 1267 | ||
1212 | if (prev_group_bh) { | 1268 | brelse(prev_group_bh); |
1213 | brelse(prev_group_bh); | 1269 | prev_group_bh = NULL; |
1214 | prev_group_bh = NULL; | 1270 | |
1215 | } | ||
1216 | next_group = le64_to_cpu(bg->bg_next_group); | 1271 | next_group = le64_to_cpu(bg->bg_next_group); |
1217 | prev_group_bh = group_bh; | 1272 | prev_group_bh = group_bh; |
1218 | group_bh = NULL; | 1273 | group_bh = NULL; |
1219 | status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), | 1274 | status = ocfs2_read_block(alloc_inode, |
1220 | next_group, &group_bh, | 1275 | next_group, &group_bh); |
1221 | OCFS2_BH_CACHED, alloc_inode); | ||
1222 | if (status < 0) { | 1276 | if (status < 0) { |
1223 | mlog_errno(status); | 1277 | mlog_errno(status); |
1224 | goto bail; | 1278 | goto bail; |
@@ -1307,10 +1361,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1307 | *bg_blkno = le64_to_cpu(bg->bg_blkno); | 1361 | *bg_blkno = le64_to_cpu(bg->bg_blkno); |
1308 | *bits_left = le16_to_cpu(bg->bg_free_bits_count); | 1362 | *bits_left = le16_to_cpu(bg->bg_free_bits_count); |
1309 | bail: | 1363 | bail: |
1310 | if (group_bh) | 1364 | brelse(group_bh); |
1311 | brelse(group_bh); | 1365 | brelse(prev_group_bh); |
1312 | if (prev_group_bh) | ||
1313 | brelse(prev_group_bh); | ||
1314 | 1366 | ||
1315 | mlog_exit(status); | 1367 | mlog_exit(status); |
1316 | return status; | 1368 | return status; |
@@ -1723,7 +1775,6 @@ int ocfs2_free_suballoc_bits(handle_t *handle, | |||
1723 | { | 1775 | { |
1724 | int status = 0; | 1776 | int status = 0; |
1725 | u32 tmp_used; | 1777 | u32 tmp_used; |
1726 | struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); | ||
1727 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; | 1778 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; |
1728 | struct ocfs2_chain_list *cl = &fe->id2.i_chain; | 1779 | struct ocfs2_chain_list *cl = &fe->id2.i_chain; |
1729 | struct buffer_head *group_bh = NULL; | 1780 | struct buffer_head *group_bh = NULL; |
@@ -1742,8 +1793,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle, | |||
1742 | (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, | 1793 | (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, |
1743 | (unsigned long long)bg_blkno, start_bit); | 1794 | (unsigned long long)bg_blkno, start_bit); |
1744 | 1795 | ||
1745 | status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED, | 1796 | status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh); |
1746 | alloc_inode); | ||
1747 | if (status < 0) { | 1797 | if (status < 0) { |
1748 | mlog_errno(status); | 1798 | mlog_errno(status); |
1749 | goto bail; | 1799 | goto bail; |
@@ -1784,8 +1834,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle, | |||
1784 | } | 1834 | } |
1785 | 1835 | ||
1786 | bail: | 1836 | bail: |
1787 | if (group_bh) | 1837 | brelse(group_bh); |
1788 | brelse(group_bh); | ||
1789 | 1838 | ||
1790 | mlog_exit(status); | 1839 | mlog_exit(status); |
1791 | return status; | 1840 | return status; |
@@ -1838,9 +1887,15 @@ int ocfs2_free_clusters(handle_t *handle, | |||
1838 | status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, | 1887 | status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, |
1839 | bg_start_bit, bg_blkno, | 1888 | bg_start_bit, bg_blkno, |
1840 | num_clusters); | 1889 | num_clusters); |
1841 | if (status < 0) | 1890 | if (status < 0) { |
1842 | mlog_errno(status); | 1891 | mlog_errno(status); |
1892 | goto out; | ||
1893 | } | ||
1843 | 1894 | ||
1895 | ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb), | ||
1896 | num_clusters); | ||
1897 | |||
1898 | out: | ||
1844 | mlog_exit(status); | 1899 | mlog_exit(status); |
1845 | return status; | 1900 | return status; |
1846 | } | 1901 | } |
@@ -1891,3 +1946,84 @@ static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe) | |||
1891 | (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno); | 1946 | (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno); |
1892 | } | 1947 | } |
1893 | } | 1948 | } |
1949 | |||
1950 | /* | ||
1951 | * For a given allocation, determine which allocators will need to be | ||
1952 | * accessed, and lock them, reserving the appropriate number of bits. | ||
1953 | * | ||
1954 | * Sparse file systems call this from ocfs2_write_begin_nolock() | ||
1955 | * and ocfs2_allocate_unwritten_extents(). | ||
1956 | * | ||
1957 | * File systems which don't support holes call this from | ||
1958 | * ocfs2_extend_allocation(). | ||
1959 | */ | ||
1960 | int ocfs2_lock_allocators(struct inode *inode, | ||
1961 | struct ocfs2_extent_tree *et, | ||
1962 | u32 clusters_to_add, u32 extents_to_split, | ||
1963 | struct ocfs2_alloc_context **data_ac, | ||
1964 | struct ocfs2_alloc_context **meta_ac) | ||
1965 | { | ||
1966 | int ret = 0, num_free_extents; | ||
1967 | unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; | ||
1968 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1969 | |||
1970 | *meta_ac = NULL; | ||
1971 | if (data_ac) | ||
1972 | *data_ac = NULL; | ||
1973 | |||
1974 | BUG_ON(clusters_to_add != 0 && data_ac == NULL); | ||
1975 | |||
1976 | num_free_extents = ocfs2_num_free_extents(osb, inode, et); | ||
1977 | if (num_free_extents < 0) { | ||
1978 | ret = num_free_extents; | ||
1979 | mlog_errno(ret); | ||
1980 | goto out; | ||
1981 | } | ||
1982 | |||
1983 | /* | ||
1984 | * Sparse allocation file systems need to be more conservative | ||
1985 | * with reserving room for expansion - the actual allocation | ||
1986 | * happens while we've got a journal handle open so re-taking | ||
1987 | * a cluster lock (because we ran out of room for another | ||
1988 | * extent) will violate ordering rules. | ||
1989 | * | ||
1990 | * Most of the time we'll only be seeing this 1 cluster at a time | ||
1991 | * anyway. | ||
1992 | * | ||
1993 | * Always lock for any unwritten extents - we might want to | ||
1994 | * add blocks during a split. | ||
1995 | */ | ||
1996 | if (!num_free_extents || | ||
1997 | (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) { | ||
1998 | ret = ocfs2_reserve_new_metadata(osb, et->et_root_el, meta_ac); | ||
1999 | if (ret < 0) { | ||
2000 | if (ret != -ENOSPC) | ||
2001 | mlog_errno(ret); | ||
2002 | goto out; | ||
2003 | } | ||
2004 | } | ||
2005 | |||
2006 | if (clusters_to_add == 0) | ||
2007 | goto out; | ||
2008 | |||
2009 | ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); | ||
2010 | if (ret < 0) { | ||
2011 | if (ret != -ENOSPC) | ||
2012 | mlog_errno(ret); | ||
2013 | goto out; | ||
2014 | } | ||
2015 | |||
2016 | out: | ||
2017 | if (ret) { | ||
2018 | if (*meta_ac) { | ||
2019 | ocfs2_free_alloc_context(*meta_ac); | ||
2020 | *meta_ac = NULL; | ||
2021 | } | ||
2022 | |||
2023 | /* | ||
2024 | * We cannot have an error and a non null *data_ac. | ||
2025 | */ | ||
2026 | } | ||
2027 | |||
2028 | return ret; | ||
2029 | } | ||
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 544c600662bd..4df159d8f450 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
@@ -28,10 +28,11 @@ | |||
28 | 28 | ||
29 | typedef int (group_search_t)(struct inode *, | 29 | typedef int (group_search_t)(struct inode *, |
30 | struct buffer_head *, | 30 | struct buffer_head *, |
31 | u32, | 31 | u32, /* bits_wanted */ |
32 | u32, | 32 | u32, /* min_bits */ |
33 | u16 *, | 33 | u64, /* max_block */ |
34 | u16 *); | 34 | u16 *, /* *bit_off */ |
35 | u16 *); /* *bits_found */ | ||
35 | 36 | ||
36 | struct ocfs2_alloc_context { | 37 | struct ocfs2_alloc_context { |
37 | struct inode *ac_inode; /* which bitmap are we allocating from? */ | 38 | struct inode *ac_inode; /* which bitmap are we allocating from? */ |
@@ -51,6 +52,8 @@ struct ocfs2_alloc_context { | |||
51 | group_search_t *ac_group_search; | 52 | group_search_t *ac_group_search; |
52 | 53 | ||
53 | u64 ac_last_group; | 54 | u64 ac_last_group; |
55 | u64 ac_max_block; /* Highest block number to allocate. 0 is | ||
56 | is the same as ~0 - unlimited */ | ||
54 | }; | 57 | }; |
55 | 58 | ||
56 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); | 59 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); |
@@ -59,9 +62,17 @@ static inline int ocfs2_alloc_context_bits_left(struct ocfs2_alloc_context *ac) | |||
59 | return ac->ac_bits_wanted - ac->ac_bits_given; | 62 | return ac->ac_bits_wanted - ac->ac_bits_given; |
60 | } | 63 | } |
61 | 64 | ||
65 | /* | ||
66 | * Please note that the caller must make sure that root_el is the root | ||
67 | * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise | ||
68 | * the result may be wrong. | ||
69 | */ | ||
62 | int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | 70 | int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, |
63 | struct ocfs2_dinode *fe, | 71 | struct ocfs2_extent_list *root_el, |
64 | struct ocfs2_alloc_context **ac); | 72 | struct ocfs2_alloc_context **ac); |
73 | int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb, | ||
74 | int blocks, | ||
75 | struct ocfs2_alloc_context **ac); | ||
65 | int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | 76 | int ocfs2_reserve_new_inode(struct ocfs2_super *osb, |
66 | struct ocfs2_alloc_context **ac); | 77 | struct ocfs2_alloc_context **ac); |
67 | int ocfs2_reserve_clusters(struct ocfs2_super *osb, | 78 | int ocfs2_reserve_clusters(struct ocfs2_super *osb, |
@@ -147,6 +158,7 @@ static inline int ocfs2_is_cluster_bitmap(struct inode *inode) | |||
147 | * apis above. */ | 158 | * apis above. */ |
148 | int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, | 159 | int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, |
149 | struct ocfs2_alloc_context *ac); | 160 | struct ocfs2_alloc_context *ac); |
161 | void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac); | ||
150 | 162 | ||
151 | /* given a cluster offset, calculate which block group it belongs to | 163 | /* given a cluster offset, calculate which block group it belongs to |
152 | * and return that block offset. */ | 164 | * and return that block offset. */ |
@@ -156,4 +168,8 @@ u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster); | |||
156 | int ocfs2_check_group_descriptor(struct super_block *sb, | 168 | int ocfs2_check_group_descriptor(struct super_block *sb, |
157 | struct ocfs2_dinode *di, | 169 | struct ocfs2_dinode *di, |
158 | struct ocfs2_group_desc *gd); | 170 | struct ocfs2_group_desc *gd); |
171 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, | ||
172 | u32 clusters_to_add, u32 extents_to_split, | ||
173 | struct ocfs2_alloc_context **data_ac, | ||
174 | struct ocfs2_alloc_context **meta_ac); | ||
159 | #endif /* _CHAINALLOC_H_ */ | 175 | #endif /* _CHAINALLOC_H_ */ |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 88255d3f52b4..304b63ac78cf 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -64,6 +64,7 @@ | |||
64 | #include "sysfile.h" | 64 | #include "sysfile.h" |
65 | #include "uptodate.h" | 65 | #include "uptodate.h" |
66 | #include "ver.h" | 66 | #include "ver.h" |
67 | #include "xattr.h" | ||
67 | 68 | ||
68 | #include "buffer_head_io.h" | 69 | #include "buffer_head_io.h" |
69 | 70 | ||
@@ -154,10 +155,13 @@ enum { | |||
154 | Opt_localalloc, | 155 | Opt_localalloc, |
155 | Opt_localflocks, | 156 | Opt_localflocks, |
156 | Opt_stack, | 157 | Opt_stack, |
158 | Opt_user_xattr, | ||
159 | Opt_nouser_xattr, | ||
160 | Opt_inode64, | ||
157 | Opt_err, | 161 | Opt_err, |
158 | }; | 162 | }; |
159 | 163 | ||
160 | static match_table_t tokens = { | 164 | static const match_table_t tokens = { |
161 | {Opt_barrier, "barrier=%u"}, | 165 | {Opt_barrier, "barrier=%u"}, |
162 | {Opt_err_panic, "errors=panic"}, | 166 | {Opt_err_panic, "errors=panic"}, |
163 | {Opt_err_ro, "errors=remount-ro"}, | 167 | {Opt_err_ro, "errors=remount-ro"}, |
@@ -173,6 +177,9 @@ static match_table_t tokens = { | |||
173 | {Opt_localalloc, "localalloc=%d"}, | 177 | {Opt_localalloc, "localalloc=%d"}, |
174 | {Opt_localflocks, "localflocks"}, | 178 | {Opt_localflocks, "localflocks"}, |
175 | {Opt_stack, "cluster_stack=%s"}, | 179 | {Opt_stack, "cluster_stack=%s"}, |
180 | {Opt_user_xattr, "user_xattr"}, | ||
181 | {Opt_nouser_xattr, "nouser_xattr"}, | ||
182 | {Opt_inode64, "inode64"}, | ||
176 | {Opt_err, NULL} | 183 | {Opt_err, NULL} |
177 | }; | 184 | }; |
178 | 185 | ||
@@ -205,10 +212,11 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait) | |||
205 | ocfs2_schedule_truncate_log_flush(osb, 0); | 212 | ocfs2_schedule_truncate_log_flush(osb, 0); |
206 | } | 213 | } |
207 | 214 | ||
208 | if (journal_start_commit(OCFS2_SB(sb)->journal->j_journal, &target)) { | 215 | if (jbd2_journal_start_commit(OCFS2_SB(sb)->journal->j_journal, |
216 | &target)) { | ||
209 | if (wait) | 217 | if (wait) |
210 | log_wait_commit(OCFS2_SB(sb)->journal->j_journal, | 218 | jbd2_log_wait_commit(OCFS2_SB(sb)->journal->j_journal, |
211 | target); | 219 | target); |
212 | } | 220 | } |
213 | return 0; | 221 | return 0; |
214 | } | 222 | } |
@@ -325,6 +333,7 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb) | |||
325 | if (!oi) | 333 | if (!oi) |
326 | return NULL; | 334 | return NULL; |
327 | 335 | ||
336 | jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode); | ||
328 | return &oi->vfs_inode; | 337 | return &oi->vfs_inode; |
329 | } | 338 | } |
330 | 339 | ||
@@ -406,6 +415,15 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) | |||
406 | goto out; | 415 | goto out; |
407 | } | 416 | } |
408 | 417 | ||
418 | /* Probably don't want this on remount; it might | ||
419 | * mess with other nodes */ | ||
420 | if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64) && | ||
421 | (parsed_options.mount_opt & OCFS2_MOUNT_INODE64)) { | ||
422 | ret = -EINVAL; | ||
423 | mlog(ML_ERROR, "Cannot enable inode64 on remount\n"); | ||
424 | goto out; | ||
425 | } | ||
426 | |||
409 | /* We're going to/from readonly mode. */ | 427 | /* We're going to/from readonly mode. */ |
410 | if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { | 428 | if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { |
411 | /* Lock here so the check of HARD_RO and the potential | 429 | /* Lock here so the check of HARD_RO and the potential |
@@ -637,7 +655,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
637 | osb->s_atime_quantum = parsed_options.atime_quantum; | 655 | osb->s_atime_quantum = parsed_options.atime_quantum; |
638 | osb->preferred_slot = parsed_options.slot; | 656 | osb->preferred_slot = parsed_options.slot; |
639 | osb->osb_commit_interval = parsed_options.commit_interval; | 657 | osb->osb_commit_interval = parsed_options.commit_interval; |
640 | osb->local_alloc_size = parsed_options.localalloc_opt; | 658 | osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); |
659 | osb->local_alloc_bits = osb->local_alloc_default_bits; | ||
641 | 660 | ||
642 | status = ocfs2_verify_userspace_stack(osb, &parsed_options); | 661 | status = ocfs2_verify_userspace_stack(osb, &parsed_options); |
643 | if (status) | 662 | if (status) |
@@ -743,8 +762,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
743 | return status; | 762 | return status; |
744 | 763 | ||
745 | read_super_error: | 764 | read_super_error: |
746 | if (bh != NULL) | 765 | brelse(bh); |
747 | brelse(bh); | ||
748 | 766 | ||
749 | if (inode) | 767 | if (inode) |
750 | iput(inode); | 768 | iput(inode); |
@@ -847,6 +865,12 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
847 | case Opt_data_writeback: | 865 | case Opt_data_writeback: |
848 | mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK; | 866 | mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK; |
849 | break; | 867 | break; |
868 | case Opt_user_xattr: | ||
869 | mopt->mount_opt &= ~OCFS2_MOUNT_NOUSERXATTR; | ||
870 | break; | ||
871 | case Opt_nouser_xattr: | ||
872 | mopt->mount_opt |= OCFS2_MOUNT_NOUSERXATTR; | ||
873 | break; | ||
850 | case Opt_atime_quantum: | 874 | case Opt_atime_quantum: |
851 | if (match_int(&args[0], &option)) { | 875 | if (match_int(&args[0], &option)) { |
852 | status = 0; | 876 | status = 0; |
@@ -873,7 +897,7 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
873 | if (option < 0) | 897 | if (option < 0) |
874 | return 0; | 898 | return 0; |
875 | if (option == 0) | 899 | if (option == 0) |
876 | option = JBD_DEFAULT_MAX_COMMIT_AGE; | 900 | option = JBD2_DEFAULT_MAX_COMMIT_AGE; |
877 | mopt->commit_interval = HZ * option; | 901 | mopt->commit_interval = HZ * option; |
878 | break; | 902 | break; |
879 | case Opt_localalloc: | 903 | case Opt_localalloc: |
@@ -918,6 +942,9 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
918 | OCFS2_STACK_LABEL_LEN); | 942 | OCFS2_STACK_LABEL_LEN); |
919 | mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; | 943 | mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; |
920 | break; | 944 | break; |
945 | case Opt_inode64: | ||
946 | mopt->mount_opt |= OCFS2_MOUNT_INODE64; | ||
947 | break; | ||
921 | default: | 948 | default: |
922 | mlog(ML_ERROR, | 949 | mlog(ML_ERROR, |
923 | "Unrecognized mount option \"%s\" " | 950 | "Unrecognized mount option \"%s\" " |
@@ -938,6 +965,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
938 | { | 965 | { |
939 | struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb); | 966 | struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb); |
940 | unsigned long opts = osb->s_mount_opt; | 967 | unsigned long opts = osb->s_mount_opt; |
968 | unsigned int local_alloc_megs; | ||
941 | 969 | ||
942 | if (opts & OCFS2_MOUNT_HB_LOCAL) | 970 | if (opts & OCFS2_MOUNT_HB_LOCAL) |
943 | seq_printf(s, ",_netdev,heartbeat=local"); | 971 | seq_printf(s, ",_netdev,heartbeat=local"); |
@@ -970,8 +998,9 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
970 | seq_printf(s, ",commit=%u", | 998 | seq_printf(s, ",commit=%u", |
971 | (unsigned) (osb->osb_commit_interval / HZ)); | 999 | (unsigned) (osb->osb_commit_interval / HZ)); |
972 | 1000 | ||
973 | if (osb->local_alloc_size != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) | 1001 | local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits); |
974 | seq_printf(s, ",localalloc=%d", osb->local_alloc_size); | 1002 | if (local_alloc_megs != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) |
1003 | seq_printf(s, ",localalloc=%d", local_alloc_megs); | ||
975 | 1004 | ||
976 | if (opts & OCFS2_MOUNT_LOCALFLOCKS) | 1005 | if (opts & OCFS2_MOUNT_LOCALFLOCKS) |
977 | seq_printf(s, ",localflocks,"); | 1006 | seq_printf(s, ",localflocks,"); |
@@ -980,6 +1009,14 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
980 | seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, | 1009 | seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, |
981 | osb->osb_cluster_stack); | 1010 | osb->osb_cluster_stack); |
982 | 1011 | ||
1012 | if (opts & OCFS2_MOUNT_NOUSERXATTR) | ||
1013 | seq_printf(s, ",nouser_xattr"); | ||
1014 | else | ||
1015 | seq_printf(s, ",user_xattr"); | ||
1016 | |||
1017 | if (opts & OCFS2_MOUNT_INODE64) | ||
1018 | seq_printf(s, ",inode64"); | ||
1019 | |||
983 | return 0; | 1020 | return 0; |
984 | } | 1021 | } |
985 | 1022 | ||
@@ -1132,6 +1169,7 @@ static void ocfs2_inode_init_once(void *data) | |||
1132 | oi->ip_dir_start_lookup = 0; | 1169 | oi->ip_dir_start_lookup = 0; |
1133 | 1170 | ||
1134 | init_rwsem(&oi->ip_alloc_sem); | 1171 | init_rwsem(&oi->ip_alloc_sem); |
1172 | init_rwsem(&oi->ip_xattr_sem); | ||
1135 | mutex_init(&oi->ip_io_mutex); | 1173 | mutex_init(&oi->ip_io_mutex); |
1136 | 1174 | ||
1137 | oi->ip_blkno = 0ULL; | 1175 | oi->ip_blkno = 0ULL; |
@@ -1375,6 +1413,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1375 | sb->s_fs_info = osb; | 1413 | sb->s_fs_info = osb; |
1376 | sb->s_op = &ocfs2_sops; | 1414 | sb->s_op = &ocfs2_sops; |
1377 | sb->s_export_op = &ocfs2_export_ops; | 1415 | sb->s_export_op = &ocfs2_export_ops; |
1416 | sb->s_xattr = ocfs2_xattr_handlers; | ||
1378 | sb->s_time_gran = 1; | 1417 | sb->s_time_gran = 1; |
1379 | sb->s_flags |= MS_NOATIME; | 1418 | sb->s_flags |= MS_NOATIME; |
1380 | /* this is needed to support O_LARGEFILE */ | 1419 | /* this is needed to support O_LARGEFILE */ |
@@ -1421,8 +1460,12 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1421 | 1460 | ||
1422 | osb->slot_num = OCFS2_INVALID_SLOT; | 1461 | osb->slot_num = OCFS2_INVALID_SLOT; |
1423 | 1462 | ||
1463 | osb->s_xattr_inline_size = le16_to_cpu( | ||
1464 | di->id2.i_super.s_xattr_inline_size); | ||
1465 | |||
1424 | osb->local_alloc_state = OCFS2_LA_UNUSED; | 1466 | osb->local_alloc_state = OCFS2_LA_UNUSED; |
1425 | osb->local_alloc_bh = NULL; | 1467 | osb->local_alloc_bh = NULL; |
1468 | INIT_DELAYED_WORK(&osb->la_enable_wq, ocfs2_la_enable_worker); | ||
1426 | 1469 | ||
1427 | init_waitqueue_head(&osb->osb_mount_event); | 1470 | init_waitqueue_head(&osb->osb_mount_event); |
1428 | 1471 | ||
@@ -1568,6 +1611,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1568 | osb->first_cluster_group_blkno = | 1611 | osb->first_cluster_group_blkno = |
1569 | le64_to_cpu(di->id2.i_super.s_first_cluster_group); | 1612 | le64_to_cpu(di->id2.i_super.s_first_cluster_group); |
1570 | osb->fs_generation = le32_to_cpu(di->i_fs_generation); | 1613 | osb->fs_generation = le32_to_cpu(di->i_fs_generation); |
1614 | osb->uuid_hash = le32_to_cpu(di->id2.i_super.s_uuid_hash); | ||
1571 | mlog(0, "vol_label: %s\n", osb->vol_label); | 1615 | mlog(0, "vol_label: %s\n", osb->vol_label); |
1572 | mlog(0, "uuid: %s\n", osb->uuid_str); | 1616 | mlog(0, "uuid: %s\n", osb->uuid_str); |
1573 | mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n", | 1617 | mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n", |
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index ba9dbb51d25b..cbd03dfdc7b9 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include "inode.h" | 50 | #include "inode.h" |
51 | #include "journal.h" | 51 | #include "journal.h" |
52 | #include "symlink.h" | 52 | #include "symlink.h" |
53 | #include "xattr.h" | ||
53 | 54 | ||
54 | #include "buffer_head_io.h" | 55 | #include "buffer_head_io.h" |
55 | 56 | ||
@@ -83,11 +84,7 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode, | |||
83 | 84 | ||
84 | mlog_entry_void(); | 85 | mlog_entry_void(); |
85 | 86 | ||
86 | status = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 87 | status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, bh); |
87 | OCFS2_I(inode)->ip_blkno, | ||
88 | bh, | ||
89 | OCFS2_BH_CACHED, | ||
90 | inode); | ||
91 | if (status < 0) { | 88 | if (status < 0) { |
92 | mlog_errno(status); | 89 | mlog_errno(status); |
93 | link = ERR_PTR(status); | 90 | link = ERR_PTR(status); |
@@ -157,8 +154,7 @@ bail: | |||
157 | kunmap(page); | 154 | kunmap(page); |
158 | page_cache_release(page); | 155 | page_cache_release(page); |
159 | } | 156 | } |
160 | if (bh) | 157 | brelse(bh); |
161 | brelse(bh); | ||
162 | 158 | ||
163 | return ERR_PTR(status); | 159 | return ERR_PTR(status); |
164 | } | 160 | } |
@@ -168,10 +164,18 @@ const struct inode_operations ocfs2_symlink_inode_operations = { | |||
168 | .follow_link = ocfs2_follow_link, | 164 | .follow_link = ocfs2_follow_link, |
169 | .getattr = ocfs2_getattr, | 165 | .getattr = ocfs2_getattr, |
170 | .setattr = ocfs2_setattr, | 166 | .setattr = ocfs2_setattr, |
167 | .setxattr = generic_setxattr, | ||
168 | .getxattr = generic_getxattr, | ||
169 | .listxattr = ocfs2_listxattr, | ||
170 | .removexattr = generic_removexattr, | ||
171 | }; | 171 | }; |
172 | const struct inode_operations ocfs2_fast_symlink_inode_operations = { | 172 | const struct inode_operations ocfs2_fast_symlink_inode_operations = { |
173 | .readlink = ocfs2_readlink, | 173 | .readlink = ocfs2_readlink, |
174 | .follow_link = ocfs2_follow_link, | 174 | .follow_link = ocfs2_follow_link, |
175 | .getattr = ocfs2_getattr, | 175 | .getattr = ocfs2_getattr, |
176 | .setattr = ocfs2_setattr, | 176 | .setattr = ocfs2_setattr, |
177 | .setxattr = generic_setxattr, | ||
178 | .getxattr = generic_getxattr, | ||
179 | .listxattr = ocfs2_listxattr, | ||
180 | .removexattr = generic_removexattr, | ||
177 | }; | 181 | }; |
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index 4da8851f2b23..187b99ff0368 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c | |||
@@ -53,7 +53,11 @@ | |||
53 | #include <linux/highmem.h> | 53 | #include <linux/highmem.h> |
54 | #include <linux/buffer_head.h> | 54 | #include <linux/buffer_head.h> |
55 | #include <linux/rbtree.h> | 55 | #include <linux/rbtree.h> |
56 | #include <linux/jbd.h> | 56 | #ifndef CONFIG_OCFS2_COMPAT_JBD |
57 | # include <linux/jbd2.h> | ||
58 | #else | ||
59 | # include <linux/jbd.h> | ||
60 | #endif | ||
57 | 61 | ||
58 | #define MLOG_MASK_PREFIX ML_UPTODATE | 62 | #define MLOG_MASK_PREFIX ML_UPTODATE |
59 | 63 | ||
@@ -511,14 +515,10 @@ static void ocfs2_remove_metadata_tree(struct ocfs2_caching_info *ci, | |||
511 | ci->ci_num_cached--; | 515 | ci->ci_num_cached--; |
512 | } | 516 | } |
513 | 517 | ||
514 | /* Called when we remove a chunk of metadata from an inode. We don't | 518 | static void ocfs2_remove_block_from_cache(struct inode *inode, |
515 | * bother reverting things to an inlined array in the case of a remove | 519 | sector_t block) |
516 | * which moves us back under the limit. */ | ||
517 | void ocfs2_remove_from_cache(struct inode *inode, | ||
518 | struct buffer_head *bh) | ||
519 | { | 520 | { |
520 | int index; | 521 | int index; |
521 | sector_t block = bh->b_blocknr; | ||
522 | struct ocfs2_meta_cache_item *item = NULL; | 522 | struct ocfs2_meta_cache_item *item = NULL; |
523 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 523 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
524 | struct ocfs2_caching_info *ci = &oi->ip_metadata_cache; | 524 | struct ocfs2_caching_info *ci = &oi->ip_metadata_cache; |
@@ -544,6 +544,30 @@ void ocfs2_remove_from_cache(struct inode *inode, | |||
544 | kmem_cache_free(ocfs2_uptodate_cachep, item); | 544 | kmem_cache_free(ocfs2_uptodate_cachep, item); |
545 | } | 545 | } |
546 | 546 | ||
547 | /* | ||
548 | * Called when we remove a chunk of metadata from an inode. We don't | ||
549 | * bother reverting things to an inlined array in the case of a remove | ||
550 | * which moves us back under the limit. | ||
551 | */ | ||
552 | void ocfs2_remove_from_cache(struct inode *inode, | ||
553 | struct buffer_head *bh) | ||
554 | { | ||
555 | sector_t block = bh->b_blocknr; | ||
556 | |||
557 | ocfs2_remove_block_from_cache(inode, block); | ||
558 | } | ||
559 | |||
560 | /* Called when we remove xattr clusters from an inode. */ | ||
561 | void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode, | ||
562 | sector_t block, | ||
563 | u32 c_len) | ||
564 | { | ||
565 | unsigned int i, b_len = ocfs2_clusters_to_blocks(inode->i_sb, 1) * c_len; | ||
566 | |||
567 | for (i = 0; i < b_len; i++, block++) | ||
568 | ocfs2_remove_block_from_cache(inode, block); | ||
569 | } | ||
570 | |||
547 | int __init init_ocfs2_uptodate_cache(void) | 571 | int __init init_ocfs2_uptodate_cache(void) |
548 | { | 572 | { |
549 | ocfs2_uptodate_cachep = kmem_cache_create("ocfs2_uptodate", | 573 | ocfs2_uptodate_cachep = kmem_cache_create("ocfs2_uptodate", |
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h index 2e73206059a8..531b4b3a0c47 100644 --- a/fs/ocfs2/uptodate.h +++ b/fs/ocfs2/uptodate.h | |||
@@ -40,6 +40,9 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode, | |||
40 | struct buffer_head *bh); | 40 | struct buffer_head *bh); |
41 | void ocfs2_remove_from_cache(struct inode *inode, | 41 | void ocfs2_remove_from_cache(struct inode *inode, |
42 | struct buffer_head *bh); | 42 | struct buffer_head *bh); |
43 | void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode, | ||
44 | sector_t block, | ||
45 | u32 c_len); | ||
43 | int ocfs2_buffer_read_ahead(struct inode *inode, | 46 | int ocfs2_buffer_read_ahead(struct inode *inode, |
44 | struct buffer_head *bh); | 47 | struct buffer_head *bh); |
45 | 48 | ||
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c new file mode 100644 index 000000000000..802c41492214 --- /dev/null +++ b/fs/ocfs2/xattr.c | |||
@@ -0,0 +1,4832 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * xattr.c | ||
5 | * | ||
6 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
7 | * | ||
8 | * CREDITS: | ||
9 | * Lots of code in this file is taken from ext3. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or | ||
12 | * modify it under the terms of the GNU General Public | ||
13 | * License as published by the Free Software Foundation; either | ||
14 | * version 2 of the License, or (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
19 | * General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public | ||
22 | * License along with this program; if not, write to the | ||
23 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
24 | * Boston, MA 021110-1307, USA. | ||
25 | */ | ||
26 | |||
27 | #include <linux/capability.h> | ||
28 | #include <linux/fs.h> | ||
29 | #include <linux/types.h> | ||
30 | #include <linux/slab.h> | ||
31 | #include <linux/highmem.h> | ||
32 | #include <linux/pagemap.h> | ||
33 | #include <linux/uio.h> | ||
34 | #include <linux/sched.h> | ||
35 | #include <linux/splice.h> | ||
36 | #include <linux/mount.h> | ||
37 | #include <linux/writeback.h> | ||
38 | #include <linux/falloc.h> | ||
39 | #include <linux/sort.h> | ||
40 | #include <linux/init.h> | ||
41 | #include <linux/module.h> | ||
42 | #include <linux/string.h> | ||
43 | |||
44 | #define MLOG_MASK_PREFIX ML_XATTR | ||
45 | #include <cluster/masklog.h> | ||
46 | |||
47 | #include "ocfs2.h" | ||
48 | #include "alloc.h" | ||
49 | #include "dlmglue.h" | ||
50 | #include "file.h" | ||
51 | #include "symlink.h" | ||
52 | #include "sysfile.h" | ||
53 | #include "inode.h" | ||
54 | #include "journal.h" | ||
55 | #include "ocfs2_fs.h" | ||
56 | #include "suballoc.h" | ||
57 | #include "uptodate.h" | ||
58 | #include "buffer_head_io.h" | ||
59 | #include "super.h" | ||
60 | #include "xattr.h" | ||
61 | |||
62 | |||
63 | struct ocfs2_xattr_def_value_root { | ||
64 | struct ocfs2_xattr_value_root xv; | ||
65 | struct ocfs2_extent_rec er; | ||
66 | }; | ||
67 | |||
68 | struct ocfs2_xattr_bucket { | ||
69 | struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; | ||
70 | struct ocfs2_xattr_header *xh; | ||
71 | }; | ||
72 | |||
73 | #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) | ||
74 | #define OCFS2_XATTR_INLINE_SIZE 80 | ||
75 | |||
76 | static struct ocfs2_xattr_def_value_root def_xv = { | ||
77 | .xv.xr_list.l_count = cpu_to_le16(1), | ||
78 | }; | ||
79 | |||
80 | struct xattr_handler *ocfs2_xattr_handlers[] = { | ||
81 | &ocfs2_xattr_user_handler, | ||
82 | &ocfs2_xattr_trusted_handler, | ||
83 | NULL | ||
84 | }; | ||
85 | |||
86 | static struct xattr_handler *ocfs2_xattr_handler_map[] = { | ||
87 | [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, | ||
88 | [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, | ||
89 | }; | ||
90 | |||
91 | struct ocfs2_xattr_info { | ||
92 | int name_index; | ||
93 | const char *name; | ||
94 | const void *value; | ||
95 | size_t value_len; | ||
96 | }; | ||
97 | |||
98 | struct ocfs2_xattr_search { | ||
99 | struct buffer_head *inode_bh; | ||
100 | /* | ||
101 | * xattr_bh point to the block buffer head which has extended attribute | ||
102 | * when extended attribute in inode, xattr_bh is equal to inode_bh. | ||
103 | */ | ||
104 | struct buffer_head *xattr_bh; | ||
105 | struct ocfs2_xattr_header *header; | ||
106 | struct ocfs2_xattr_bucket bucket; | ||
107 | void *base; | ||
108 | void *end; | ||
109 | struct ocfs2_xattr_entry *here; | ||
110 | int not_found; | ||
111 | }; | ||
112 | |||
113 | static int ocfs2_xattr_bucket_get_name_value(struct inode *inode, | ||
114 | struct ocfs2_xattr_header *xh, | ||
115 | int index, | ||
116 | int *block_off, | ||
117 | int *new_offset); | ||
118 | |||
119 | static int ocfs2_xattr_index_block_find(struct inode *inode, | ||
120 | struct buffer_head *root_bh, | ||
121 | int name_index, | ||
122 | const char *name, | ||
123 | struct ocfs2_xattr_search *xs); | ||
124 | |||
125 | static int ocfs2_xattr_tree_list_index_block(struct inode *inode, | ||
126 | struct ocfs2_xattr_tree_root *xt, | ||
127 | char *buffer, | ||
128 | size_t buffer_size); | ||
129 | |||
130 | static int ocfs2_xattr_create_index_block(struct inode *inode, | ||
131 | struct ocfs2_xattr_search *xs); | ||
132 | |||
133 | static int ocfs2_xattr_set_entry_index_block(struct inode *inode, | ||
134 | struct ocfs2_xattr_info *xi, | ||
135 | struct ocfs2_xattr_search *xs); | ||
136 | |||
137 | static int ocfs2_delete_xattr_index_block(struct inode *inode, | ||
138 | struct buffer_head *xb_bh); | ||
139 | |||
140 | static inline const char *ocfs2_xattr_prefix(int name_index) | ||
141 | { | ||
142 | struct xattr_handler *handler = NULL; | ||
143 | |||
144 | if (name_index > 0 && name_index < OCFS2_XATTR_MAX) | ||
145 | handler = ocfs2_xattr_handler_map[name_index]; | ||
146 | |||
147 | return handler ? handler->prefix : NULL; | ||
148 | } | ||
149 | |||
150 | static u32 ocfs2_xattr_name_hash(struct inode *inode, | ||
151 | const char *name, | ||
152 | int name_len) | ||
153 | { | ||
154 | /* Get hash value of uuid from super block */ | ||
155 | u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash; | ||
156 | int i; | ||
157 | |||
158 | /* hash extended attribute name */ | ||
159 | for (i = 0; i < name_len; i++) { | ||
160 | hash = (hash << OCFS2_HASH_SHIFT) ^ | ||
161 | (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^ | ||
162 | *name++; | ||
163 | } | ||
164 | |||
165 | return hash; | ||
166 | } | ||
167 | |||
168 | /* | ||
169 | * ocfs2_xattr_hash_entry() | ||
170 | * | ||
171 | * Compute the hash of an extended attribute. | ||
172 | */ | ||
173 | static void ocfs2_xattr_hash_entry(struct inode *inode, | ||
174 | struct ocfs2_xattr_header *header, | ||
175 | struct ocfs2_xattr_entry *entry) | ||
176 | { | ||
177 | u32 hash = 0; | ||
178 | char *name = (char *)header + le16_to_cpu(entry->xe_name_offset); | ||
179 | |||
180 | hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len); | ||
181 | entry->xe_name_hash = cpu_to_le32(hash); | ||
182 | |||
183 | return; | ||
184 | } | ||
185 | |||
186 | static int ocfs2_xattr_extend_allocation(struct inode *inode, | ||
187 | u32 clusters_to_add, | ||
188 | struct buffer_head *xattr_bh, | ||
189 | struct ocfs2_xattr_value_root *xv) | ||
190 | { | ||
191 | int status = 0; | ||
192 | int restart_func = 0; | ||
193 | int credits = 0; | ||
194 | handle_t *handle = NULL; | ||
195 | struct ocfs2_alloc_context *data_ac = NULL; | ||
196 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
197 | enum ocfs2_alloc_restarted why; | ||
198 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
199 | u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters); | ||
200 | struct ocfs2_extent_tree et; | ||
201 | |||
202 | mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add); | ||
203 | |||
204 | ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv); | ||
205 | |||
206 | restart_all: | ||
207 | |||
208 | status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, | ||
209 | &data_ac, &meta_ac); | ||
210 | if (status) { | ||
211 | mlog_errno(status); | ||
212 | goto leave; | ||
213 | } | ||
214 | |||
215 | credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el, | ||
216 | clusters_to_add); | ||
217 | handle = ocfs2_start_trans(osb, credits); | ||
218 | if (IS_ERR(handle)) { | ||
219 | status = PTR_ERR(handle); | ||
220 | handle = NULL; | ||
221 | mlog_errno(status); | ||
222 | goto leave; | ||
223 | } | ||
224 | |||
225 | restarted_transaction: | ||
226 | status = ocfs2_journal_access(handle, inode, xattr_bh, | ||
227 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
228 | if (status < 0) { | ||
229 | mlog_errno(status); | ||
230 | goto leave; | ||
231 | } | ||
232 | |||
233 | prev_clusters = le32_to_cpu(xv->xr_clusters); | ||
234 | status = ocfs2_add_clusters_in_btree(osb, | ||
235 | inode, | ||
236 | &logical_start, | ||
237 | clusters_to_add, | ||
238 | 0, | ||
239 | &et, | ||
240 | handle, | ||
241 | data_ac, | ||
242 | meta_ac, | ||
243 | &why); | ||
244 | if ((status < 0) && (status != -EAGAIN)) { | ||
245 | if (status != -ENOSPC) | ||
246 | mlog_errno(status); | ||
247 | goto leave; | ||
248 | } | ||
249 | |||
250 | status = ocfs2_journal_dirty(handle, xattr_bh); | ||
251 | if (status < 0) { | ||
252 | mlog_errno(status); | ||
253 | goto leave; | ||
254 | } | ||
255 | |||
256 | clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters; | ||
257 | |||
258 | if (why != RESTART_NONE && clusters_to_add) { | ||
259 | if (why == RESTART_META) { | ||
260 | mlog(0, "restarting function.\n"); | ||
261 | restart_func = 1; | ||
262 | } else { | ||
263 | BUG_ON(why != RESTART_TRANS); | ||
264 | |||
265 | mlog(0, "restarting transaction.\n"); | ||
266 | /* TODO: This can be more intelligent. */ | ||
267 | credits = ocfs2_calc_extend_credits(osb->sb, | ||
268 | et.et_root_el, | ||
269 | clusters_to_add); | ||
270 | status = ocfs2_extend_trans(handle, credits); | ||
271 | if (status < 0) { | ||
272 | /* handle still has to be committed at | ||
273 | * this point. */ | ||
274 | status = -ENOMEM; | ||
275 | mlog_errno(status); | ||
276 | goto leave; | ||
277 | } | ||
278 | goto restarted_transaction; | ||
279 | } | ||
280 | } | ||
281 | |||
282 | leave: | ||
283 | if (handle) { | ||
284 | ocfs2_commit_trans(osb, handle); | ||
285 | handle = NULL; | ||
286 | } | ||
287 | if (data_ac) { | ||
288 | ocfs2_free_alloc_context(data_ac); | ||
289 | data_ac = NULL; | ||
290 | } | ||
291 | if (meta_ac) { | ||
292 | ocfs2_free_alloc_context(meta_ac); | ||
293 | meta_ac = NULL; | ||
294 | } | ||
295 | if ((!status) && restart_func) { | ||
296 | restart_func = 0; | ||
297 | goto restart_all; | ||
298 | } | ||
299 | |||
300 | return status; | ||
301 | } | ||
302 | |||
303 | static int __ocfs2_remove_xattr_range(struct inode *inode, | ||
304 | struct buffer_head *root_bh, | ||
305 | struct ocfs2_xattr_value_root *xv, | ||
306 | u32 cpos, u32 phys_cpos, u32 len, | ||
307 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
308 | { | ||
309 | int ret; | ||
310 | u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||
311 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
312 | struct inode *tl_inode = osb->osb_tl_inode; | ||
313 | handle_t *handle; | ||
314 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
315 | struct ocfs2_extent_tree et; | ||
316 | |||
317 | ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv); | ||
318 | |||
319 | ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); | ||
320 | if (ret) { | ||
321 | mlog_errno(ret); | ||
322 | return ret; | ||
323 | } | ||
324 | |||
325 | mutex_lock(&tl_inode->i_mutex); | ||
326 | |||
327 | if (ocfs2_truncate_log_needs_flush(osb)) { | ||
328 | ret = __ocfs2_flush_truncate_log(osb); | ||
329 | if (ret < 0) { | ||
330 | mlog_errno(ret); | ||
331 | goto out; | ||
332 | } | ||
333 | } | ||
334 | |||
335 | handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); | ||
336 | if (IS_ERR(handle)) { | ||
337 | ret = PTR_ERR(handle); | ||
338 | mlog_errno(ret); | ||
339 | goto out; | ||
340 | } | ||
341 | |||
342 | ret = ocfs2_journal_access(handle, inode, root_bh, | ||
343 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
344 | if (ret) { | ||
345 | mlog_errno(ret); | ||
346 | goto out_commit; | ||
347 | } | ||
348 | |||
349 | ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac, | ||
350 | dealloc); | ||
351 | if (ret) { | ||
352 | mlog_errno(ret); | ||
353 | goto out_commit; | ||
354 | } | ||
355 | |||
356 | le32_add_cpu(&xv->xr_clusters, -len); | ||
357 | |||
358 | ret = ocfs2_journal_dirty(handle, root_bh); | ||
359 | if (ret) { | ||
360 | mlog_errno(ret); | ||
361 | goto out_commit; | ||
362 | } | ||
363 | |||
364 | ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); | ||
365 | if (ret) | ||
366 | mlog_errno(ret); | ||
367 | |||
368 | out_commit: | ||
369 | ocfs2_commit_trans(osb, handle); | ||
370 | out: | ||
371 | mutex_unlock(&tl_inode->i_mutex); | ||
372 | |||
373 | if (meta_ac) | ||
374 | ocfs2_free_alloc_context(meta_ac); | ||
375 | |||
376 | return ret; | ||
377 | } | ||
378 | |||
379 | static int ocfs2_xattr_shrink_size(struct inode *inode, | ||
380 | u32 old_clusters, | ||
381 | u32 new_clusters, | ||
382 | struct buffer_head *root_bh, | ||
383 | struct ocfs2_xattr_value_root *xv) | ||
384 | { | ||
385 | int ret = 0; | ||
386 | u32 trunc_len, cpos, phys_cpos, alloc_size; | ||
387 | u64 block; | ||
388 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
389 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
390 | |||
391 | ocfs2_init_dealloc_ctxt(&dealloc); | ||
392 | |||
393 | if (old_clusters <= new_clusters) | ||
394 | return 0; | ||
395 | |||
396 | cpos = new_clusters; | ||
397 | trunc_len = old_clusters - new_clusters; | ||
398 | while (trunc_len) { | ||
399 | ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, | ||
400 | &alloc_size, &xv->xr_list); | ||
401 | if (ret) { | ||
402 | mlog_errno(ret); | ||
403 | goto out; | ||
404 | } | ||
405 | |||
406 | if (alloc_size > trunc_len) | ||
407 | alloc_size = trunc_len; | ||
408 | |||
409 | ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos, | ||
410 | phys_cpos, alloc_size, | ||
411 | &dealloc); | ||
412 | if (ret) { | ||
413 | mlog_errno(ret); | ||
414 | goto out; | ||
415 | } | ||
416 | |||
417 | block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||
418 | ocfs2_remove_xattr_clusters_from_cache(inode, block, | ||
419 | alloc_size); | ||
420 | cpos += alloc_size; | ||
421 | trunc_len -= alloc_size; | ||
422 | } | ||
423 | |||
424 | out: | ||
425 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
426 | ocfs2_run_deallocs(osb, &dealloc); | ||
427 | |||
428 | return ret; | ||
429 | } | ||
430 | |||
431 | static int ocfs2_xattr_value_truncate(struct inode *inode, | ||
432 | struct buffer_head *root_bh, | ||
433 | struct ocfs2_xattr_value_root *xv, | ||
434 | int len) | ||
435 | { | ||
436 | int ret; | ||
437 | u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); | ||
438 | u32 old_clusters = le32_to_cpu(xv->xr_clusters); | ||
439 | |||
440 | if (new_clusters == old_clusters) | ||
441 | return 0; | ||
442 | |||
443 | if (new_clusters > old_clusters) | ||
444 | ret = ocfs2_xattr_extend_allocation(inode, | ||
445 | new_clusters - old_clusters, | ||
446 | root_bh, xv); | ||
447 | else | ||
448 | ret = ocfs2_xattr_shrink_size(inode, | ||
449 | old_clusters, new_clusters, | ||
450 | root_bh, xv); | ||
451 | |||
452 | return ret; | ||
453 | } | ||
454 | |||
455 | static int ocfs2_xattr_list_entry(char *buffer, size_t size, | ||
456 | size_t *result, const char *prefix, | ||
457 | const char *name, int name_len) | ||
458 | { | ||
459 | char *p = buffer + *result; | ||
460 | int prefix_len = strlen(prefix); | ||
461 | int total_len = prefix_len + name_len + 1; | ||
462 | |||
463 | *result += total_len; | ||
464 | |||
465 | /* we are just looking for how big our buffer needs to be */ | ||
466 | if (!size) | ||
467 | return 0; | ||
468 | |||
469 | if (*result > size) | ||
470 | return -ERANGE; | ||
471 | |||
472 | memcpy(p, prefix, prefix_len); | ||
473 | memcpy(p + prefix_len, name, name_len); | ||
474 | p[prefix_len + name_len] = '\0'; | ||
475 | |||
476 | return 0; | ||
477 | } | ||
478 | |||
479 | static int ocfs2_xattr_list_entries(struct inode *inode, | ||
480 | struct ocfs2_xattr_header *header, | ||
481 | char *buffer, size_t buffer_size) | ||
482 | { | ||
483 | size_t result = 0; | ||
484 | int i, type, ret; | ||
485 | const char *prefix, *name; | ||
486 | |||
487 | for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) { | ||
488 | struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; | ||
489 | type = ocfs2_xattr_get_type(entry); | ||
490 | prefix = ocfs2_xattr_prefix(type); | ||
491 | |||
492 | if (prefix) { | ||
493 | name = (const char *)header + | ||
494 | le16_to_cpu(entry->xe_name_offset); | ||
495 | |||
496 | ret = ocfs2_xattr_list_entry(buffer, buffer_size, | ||
497 | &result, prefix, name, | ||
498 | entry->xe_name_len); | ||
499 | if (ret) | ||
500 | return ret; | ||
501 | } | ||
502 | } | ||
503 | |||
504 | return result; | ||
505 | } | ||
506 | |||
507 | static int ocfs2_xattr_ibody_list(struct inode *inode, | ||
508 | struct ocfs2_dinode *di, | ||
509 | char *buffer, | ||
510 | size_t buffer_size) | ||
511 | { | ||
512 | struct ocfs2_xattr_header *header = NULL; | ||
513 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
514 | int ret = 0; | ||
515 | |||
516 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) | ||
517 | return ret; | ||
518 | |||
519 | header = (struct ocfs2_xattr_header *) | ||
520 | ((void *)di + inode->i_sb->s_blocksize - | ||
521 | le16_to_cpu(di->i_xattr_inline_size)); | ||
522 | |||
523 | ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); | ||
524 | |||
525 | return ret; | ||
526 | } | ||
527 | |||
528 | static int ocfs2_xattr_block_list(struct inode *inode, | ||
529 | struct ocfs2_dinode *di, | ||
530 | char *buffer, | ||
531 | size_t buffer_size) | ||
532 | { | ||
533 | struct buffer_head *blk_bh = NULL; | ||
534 | struct ocfs2_xattr_block *xb; | ||
535 | int ret = 0; | ||
536 | |||
537 | if (!di->i_xattr_loc) | ||
538 | return ret; | ||
539 | |||
540 | ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); | ||
541 | if (ret < 0) { | ||
542 | mlog_errno(ret); | ||
543 | return ret; | ||
544 | } | ||
545 | /*Verify the signature of xattr block*/ | ||
546 | if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, | ||
547 | strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { | ||
548 | ret = -EFAULT; | ||
549 | goto cleanup; | ||
550 | } | ||
551 | |||
552 | xb = (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
553 | |||
554 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { | ||
555 | struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; | ||
556 | ret = ocfs2_xattr_list_entries(inode, header, | ||
557 | buffer, buffer_size); | ||
558 | } else { | ||
559 | struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root; | ||
560 | ret = ocfs2_xattr_tree_list_index_block(inode, xt, | ||
561 | buffer, buffer_size); | ||
562 | } | ||
563 | cleanup: | ||
564 | brelse(blk_bh); | ||
565 | |||
566 | return ret; | ||
567 | } | ||
568 | |||
569 | ssize_t ocfs2_listxattr(struct dentry *dentry, | ||
570 | char *buffer, | ||
571 | size_t size) | ||
572 | { | ||
573 | int ret = 0, i_ret = 0, b_ret = 0; | ||
574 | struct buffer_head *di_bh = NULL; | ||
575 | struct ocfs2_dinode *di = NULL; | ||
576 | struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode); | ||
577 | |||
578 | if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb))) | ||
579 | return -EOPNOTSUPP; | ||
580 | |||
581 | if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) | ||
582 | return ret; | ||
583 | |||
584 | ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0); | ||
585 | if (ret < 0) { | ||
586 | mlog_errno(ret); | ||
587 | return ret; | ||
588 | } | ||
589 | |||
590 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
591 | |||
592 | down_read(&oi->ip_xattr_sem); | ||
593 | i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size); | ||
594 | if (i_ret < 0) | ||
595 | b_ret = 0; | ||
596 | else { | ||
597 | if (buffer) { | ||
598 | buffer += i_ret; | ||
599 | size -= i_ret; | ||
600 | } | ||
601 | b_ret = ocfs2_xattr_block_list(dentry->d_inode, di, | ||
602 | buffer, size); | ||
603 | if (b_ret < 0) | ||
604 | i_ret = 0; | ||
605 | } | ||
606 | up_read(&oi->ip_xattr_sem); | ||
607 | ocfs2_inode_unlock(dentry->d_inode, 0); | ||
608 | |||
609 | brelse(di_bh); | ||
610 | |||
611 | return i_ret + b_ret; | ||
612 | } | ||
613 | |||
614 | static int ocfs2_xattr_find_entry(int name_index, | ||
615 | const char *name, | ||
616 | struct ocfs2_xattr_search *xs) | ||
617 | { | ||
618 | struct ocfs2_xattr_entry *entry; | ||
619 | size_t name_len; | ||
620 | int i, cmp = 1; | ||
621 | |||
622 | if (name == NULL) | ||
623 | return -EINVAL; | ||
624 | |||
625 | name_len = strlen(name); | ||
626 | entry = xs->here; | ||
627 | for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { | ||
628 | cmp = name_index - ocfs2_xattr_get_type(entry); | ||
629 | if (!cmp) | ||
630 | cmp = name_len - entry->xe_name_len; | ||
631 | if (!cmp) | ||
632 | cmp = memcmp(name, (xs->base + | ||
633 | le16_to_cpu(entry->xe_name_offset)), | ||
634 | name_len); | ||
635 | if (cmp == 0) | ||
636 | break; | ||
637 | entry += 1; | ||
638 | } | ||
639 | xs->here = entry; | ||
640 | |||
641 | return cmp ? -ENODATA : 0; | ||
642 | } | ||
643 | |||
644 | static int ocfs2_xattr_get_value_outside(struct inode *inode, | ||
645 | struct ocfs2_xattr_value_root *xv, | ||
646 | void *buffer, | ||
647 | size_t len) | ||
648 | { | ||
649 | u32 cpos, p_cluster, num_clusters, bpc, clusters; | ||
650 | u64 blkno; | ||
651 | int i, ret = 0; | ||
652 | size_t cplen, blocksize; | ||
653 | struct buffer_head *bh = NULL; | ||
654 | struct ocfs2_extent_list *el; | ||
655 | |||
656 | el = &xv->xr_list; | ||
657 | clusters = le32_to_cpu(xv->xr_clusters); | ||
658 | bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
659 | blocksize = inode->i_sb->s_blocksize; | ||
660 | |||
661 | cpos = 0; | ||
662 | while (cpos < clusters) { | ||
663 | ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, | ||
664 | &num_clusters, el); | ||
665 | if (ret) { | ||
666 | mlog_errno(ret); | ||
667 | goto out; | ||
668 | } | ||
669 | |||
670 | blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); | ||
671 | /* Copy ocfs2_xattr_value */ | ||
672 | for (i = 0; i < num_clusters * bpc; i++, blkno++) { | ||
673 | ret = ocfs2_read_block(inode, blkno, &bh); | ||
674 | if (ret) { | ||
675 | mlog_errno(ret); | ||
676 | goto out; | ||
677 | } | ||
678 | |||
679 | cplen = len >= blocksize ? blocksize : len; | ||
680 | memcpy(buffer, bh->b_data, cplen); | ||
681 | len -= cplen; | ||
682 | buffer += cplen; | ||
683 | |||
684 | brelse(bh); | ||
685 | bh = NULL; | ||
686 | if (len == 0) | ||
687 | break; | ||
688 | } | ||
689 | cpos += num_clusters; | ||
690 | } | ||
691 | out: | ||
692 | return ret; | ||
693 | } | ||
694 | |||
695 | static int ocfs2_xattr_ibody_get(struct inode *inode, | ||
696 | int name_index, | ||
697 | const char *name, | ||
698 | void *buffer, | ||
699 | size_t buffer_size, | ||
700 | struct ocfs2_xattr_search *xs) | ||
701 | { | ||
702 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
703 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
704 | struct ocfs2_xattr_value_root *xv; | ||
705 | size_t size; | ||
706 | int ret = 0; | ||
707 | |||
708 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) | ||
709 | return -ENODATA; | ||
710 | |||
711 | xs->end = (void *)di + inode->i_sb->s_blocksize; | ||
712 | xs->header = (struct ocfs2_xattr_header *) | ||
713 | (xs->end - le16_to_cpu(di->i_xattr_inline_size)); | ||
714 | xs->base = (void *)xs->header; | ||
715 | xs->here = xs->header->xh_entries; | ||
716 | |||
717 | ret = ocfs2_xattr_find_entry(name_index, name, xs); | ||
718 | if (ret) | ||
719 | return ret; | ||
720 | size = le64_to_cpu(xs->here->xe_value_size); | ||
721 | if (buffer) { | ||
722 | if (size > buffer_size) | ||
723 | return -ERANGE; | ||
724 | if (ocfs2_xattr_is_local(xs->here)) { | ||
725 | memcpy(buffer, (void *)xs->base + | ||
726 | le16_to_cpu(xs->here->xe_name_offset) + | ||
727 | OCFS2_XATTR_SIZE(xs->here->xe_name_len), size); | ||
728 | } else { | ||
729 | xv = (struct ocfs2_xattr_value_root *) | ||
730 | (xs->base + le16_to_cpu( | ||
731 | xs->here->xe_name_offset) + | ||
732 | OCFS2_XATTR_SIZE(xs->here->xe_name_len)); | ||
733 | ret = ocfs2_xattr_get_value_outside(inode, xv, | ||
734 | buffer, size); | ||
735 | if (ret < 0) { | ||
736 | mlog_errno(ret); | ||
737 | return ret; | ||
738 | } | ||
739 | } | ||
740 | } | ||
741 | |||
742 | return size; | ||
743 | } | ||
744 | |||
745 | static int ocfs2_xattr_block_get(struct inode *inode, | ||
746 | int name_index, | ||
747 | const char *name, | ||
748 | void *buffer, | ||
749 | size_t buffer_size, | ||
750 | struct ocfs2_xattr_search *xs) | ||
751 | { | ||
752 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
753 | struct buffer_head *blk_bh = NULL; | ||
754 | struct ocfs2_xattr_block *xb; | ||
755 | struct ocfs2_xattr_value_root *xv; | ||
756 | size_t size; | ||
757 | int ret = -ENODATA, name_offset, name_len, block_off, i; | ||
758 | |||
759 | if (!di->i_xattr_loc) | ||
760 | return ret; | ||
761 | |||
762 | memset(&xs->bucket, 0, sizeof(xs->bucket)); | ||
763 | |||
764 | ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); | ||
765 | if (ret < 0) { | ||
766 | mlog_errno(ret); | ||
767 | return ret; | ||
768 | } | ||
769 | /*Verify the signature of xattr block*/ | ||
770 | if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, | ||
771 | strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { | ||
772 | ret = -EFAULT; | ||
773 | goto cleanup; | ||
774 | } | ||
775 | |||
776 | xs->xattr_bh = blk_bh; | ||
777 | xb = (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
778 | |||
779 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { | ||
780 | xs->header = &xb->xb_attrs.xb_header; | ||
781 | xs->base = (void *)xs->header; | ||
782 | xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; | ||
783 | xs->here = xs->header->xh_entries; | ||
784 | |||
785 | ret = ocfs2_xattr_find_entry(name_index, name, xs); | ||
786 | } else | ||
787 | ret = ocfs2_xattr_index_block_find(inode, blk_bh, | ||
788 | name_index, | ||
789 | name, xs); | ||
790 | |||
791 | if (ret) | ||
792 | goto cleanup; | ||
793 | size = le64_to_cpu(xs->here->xe_value_size); | ||
794 | if (buffer) { | ||
795 | ret = -ERANGE; | ||
796 | if (size > buffer_size) | ||
797 | goto cleanup; | ||
798 | |||
799 | name_offset = le16_to_cpu(xs->here->xe_name_offset); | ||
800 | name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len); | ||
801 | i = xs->here - xs->header->xh_entries; | ||
802 | |||
803 | if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { | ||
804 | ret = ocfs2_xattr_bucket_get_name_value(inode, | ||
805 | xs->bucket.xh, | ||
806 | i, | ||
807 | &block_off, | ||
808 | &name_offset); | ||
809 | xs->base = xs->bucket.bhs[block_off]->b_data; | ||
810 | } | ||
811 | if (ocfs2_xattr_is_local(xs->here)) { | ||
812 | memcpy(buffer, (void *)xs->base + | ||
813 | name_offset + name_len, size); | ||
814 | } else { | ||
815 | xv = (struct ocfs2_xattr_value_root *) | ||
816 | (xs->base + name_offset + name_len); | ||
817 | ret = ocfs2_xattr_get_value_outside(inode, xv, | ||
818 | buffer, size); | ||
819 | if (ret < 0) { | ||
820 | mlog_errno(ret); | ||
821 | goto cleanup; | ||
822 | } | ||
823 | } | ||
824 | } | ||
825 | ret = size; | ||
826 | cleanup: | ||
827 | for (i = 0; i < OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET; i++) | ||
828 | brelse(xs->bucket.bhs[i]); | ||
829 | memset(&xs->bucket, 0, sizeof(xs->bucket)); | ||
830 | |||
831 | brelse(blk_bh); | ||
832 | return ret; | ||
833 | } | ||
834 | |||
835 | /* ocfs2_xattr_get() | ||
836 | * | ||
837 | * Copy an extended attribute into the buffer provided. | ||
838 | * Buffer is NULL to compute the size of buffer required. | ||
839 | */ | ||
840 | int ocfs2_xattr_get(struct inode *inode, | ||
841 | int name_index, | ||
842 | const char *name, | ||
843 | void *buffer, | ||
844 | size_t buffer_size) | ||
845 | { | ||
846 | int ret; | ||
847 | struct ocfs2_dinode *di = NULL; | ||
848 | struct buffer_head *di_bh = NULL; | ||
849 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
850 | struct ocfs2_xattr_search xis = { | ||
851 | .not_found = -ENODATA, | ||
852 | }; | ||
853 | struct ocfs2_xattr_search xbs = { | ||
854 | .not_found = -ENODATA, | ||
855 | }; | ||
856 | |||
857 | if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) | ||
858 | return -EOPNOTSUPP; | ||
859 | |||
860 | if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) | ||
861 | ret = -ENODATA; | ||
862 | |||
863 | ret = ocfs2_inode_lock(inode, &di_bh, 0); | ||
864 | if (ret < 0) { | ||
865 | mlog_errno(ret); | ||
866 | return ret; | ||
867 | } | ||
868 | xis.inode_bh = xbs.inode_bh = di_bh; | ||
869 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
870 | |||
871 | down_read(&oi->ip_xattr_sem); | ||
872 | ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, | ||
873 | buffer_size, &xis); | ||
874 | if (ret == -ENODATA) | ||
875 | ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, | ||
876 | buffer_size, &xbs); | ||
877 | up_read(&oi->ip_xattr_sem); | ||
878 | ocfs2_inode_unlock(inode, 0); | ||
879 | |||
880 | brelse(di_bh); | ||
881 | |||
882 | return ret; | ||
883 | } | ||
884 | |||
885 | static int __ocfs2_xattr_set_value_outside(struct inode *inode, | ||
886 | struct ocfs2_xattr_value_root *xv, | ||
887 | const void *value, | ||
888 | int value_len) | ||
889 | { | ||
890 | int ret = 0, i, cp_len, credits; | ||
891 | u16 blocksize = inode->i_sb->s_blocksize; | ||
892 | u32 p_cluster, num_clusters; | ||
893 | u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
894 | u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); | ||
895 | u64 blkno; | ||
896 | struct buffer_head *bh = NULL; | ||
897 | handle_t *handle; | ||
898 | |||
899 | BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); | ||
900 | |||
901 | credits = clusters * bpc; | ||
902 | handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), credits); | ||
903 | if (IS_ERR(handle)) { | ||
904 | ret = PTR_ERR(handle); | ||
905 | mlog_errno(ret); | ||
906 | goto out; | ||
907 | } | ||
908 | |||
909 | while (cpos < clusters) { | ||
910 | ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, | ||
911 | &num_clusters, &xv->xr_list); | ||
912 | if (ret) { | ||
913 | mlog_errno(ret); | ||
914 | goto out_commit; | ||
915 | } | ||
916 | |||
917 | blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); | ||
918 | |||
919 | for (i = 0; i < num_clusters * bpc; i++, blkno++) { | ||
920 | ret = ocfs2_read_block(inode, blkno, &bh); | ||
921 | if (ret) { | ||
922 | mlog_errno(ret); | ||
923 | goto out_commit; | ||
924 | } | ||
925 | |||
926 | ret = ocfs2_journal_access(handle, | ||
927 | inode, | ||
928 | bh, | ||
929 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
930 | if (ret < 0) { | ||
931 | mlog_errno(ret); | ||
932 | goto out_commit; | ||
933 | } | ||
934 | |||
935 | cp_len = value_len > blocksize ? blocksize : value_len; | ||
936 | memcpy(bh->b_data, value, cp_len); | ||
937 | value_len -= cp_len; | ||
938 | value += cp_len; | ||
939 | if (cp_len < blocksize) | ||
940 | memset(bh->b_data + cp_len, 0, | ||
941 | blocksize - cp_len); | ||
942 | |||
943 | ret = ocfs2_journal_dirty(handle, bh); | ||
944 | if (ret < 0) { | ||
945 | mlog_errno(ret); | ||
946 | goto out_commit; | ||
947 | } | ||
948 | brelse(bh); | ||
949 | bh = NULL; | ||
950 | |||
951 | /* | ||
952 | * XXX: do we need to empty all the following | ||
953 | * blocks in this cluster? | ||
954 | */ | ||
955 | if (!value_len) | ||
956 | break; | ||
957 | } | ||
958 | cpos += num_clusters; | ||
959 | } | ||
960 | out_commit: | ||
961 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
962 | out: | ||
963 | brelse(bh); | ||
964 | |||
965 | return ret; | ||
966 | } | ||
967 | |||
968 | static int ocfs2_xattr_cleanup(struct inode *inode, | ||
969 | struct ocfs2_xattr_info *xi, | ||
970 | struct ocfs2_xattr_search *xs, | ||
971 | size_t offs) | ||
972 | { | ||
973 | handle_t *handle = NULL; | ||
974 | int ret = 0; | ||
975 | size_t name_len = strlen(xi->name); | ||
976 | void *val = xs->base + offs; | ||
977 | size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; | ||
978 | |||
979 | handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), | ||
980 | OCFS2_XATTR_BLOCK_UPDATE_CREDITS); | ||
981 | if (IS_ERR(handle)) { | ||
982 | ret = PTR_ERR(handle); | ||
983 | mlog_errno(ret); | ||
984 | goto out; | ||
985 | } | ||
986 | ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, | ||
987 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
988 | if (ret) { | ||
989 | mlog_errno(ret); | ||
990 | goto out_commit; | ||
991 | } | ||
992 | /* Decrease xattr count */ | ||
993 | le16_add_cpu(&xs->header->xh_count, -1); | ||
994 | /* Remove the xattr entry and tree root which has already be set*/ | ||
995 | memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry)); | ||
996 | memset(val, 0, size); | ||
997 | |||
998 | ret = ocfs2_journal_dirty(handle, xs->xattr_bh); | ||
999 | if (ret < 0) | ||
1000 | mlog_errno(ret); | ||
1001 | out_commit: | ||
1002 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
1003 | out: | ||
1004 | return ret; | ||
1005 | } | ||
1006 | |||
1007 | static int ocfs2_xattr_update_entry(struct inode *inode, | ||
1008 | struct ocfs2_xattr_info *xi, | ||
1009 | struct ocfs2_xattr_search *xs, | ||
1010 | size_t offs) | ||
1011 | { | ||
1012 | handle_t *handle = NULL; | ||
1013 | int ret = 0; | ||
1014 | |||
1015 | handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), | ||
1016 | OCFS2_XATTR_BLOCK_UPDATE_CREDITS); | ||
1017 | if (IS_ERR(handle)) { | ||
1018 | ret = PTR_ERR(handle); | ||
1019 | mlog_errno(ret); | ||
1020 | goto out; | ||
1021 | } | ||
1022 | ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, | ||
1023 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1024 | if (ret) { | ||
1025 | mlog_errno(ret); | ||
1026 | goto out_commit; | ||
1027 | } | ||
1028 | |||
1029 | xs->here->xe_name_offset = cpu_to_le16(offs); | ||
1030 | xs->here->xe_value_size = cpu_to_le64(xi->value_len); | ||
1031 | if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE) | ||
1032 | ocfs2_xattr_set_local(xs->here, 1); | ||
1033 | else | ||
1034 | ocfs2_xattr_set_local(xs->here, 0); | ||
1035 | ocfs2_xattr_hash_entry(inode, xs->header, xs->here); | ||
1036 | |||
1037 | ret = ocfs2_journal_dirty(handle, xs->xattr_bh); | ||
1038 | if (ret < 0) | ||
1039 | mlog_errno(ret); | ||
1040 | out_commit: | ||
1041 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
1042 | out: | ||
1043 | return ret; | ||
1044 | } | ||
1045 | |||
1046 | /* | ||
1047 | * ocfs2_xattr_set_value_outside() | ||
1048 | * | ||
1049 | * Set large size value in B tree. | ||
1050 | */ | ||
1051 | static int ocfs2_xattr_set_value_outside(struct inode *inode, | ||
1052 | struct ocfs2_xattr_info *xi, | ||
1053 | struct ocfs2_xattr_search *xs, | ||
1054 | size_t offs) | ||
1055 | { | ||
1056 | size_t name_len = strlen(xi->name); | ||
1057 | void *val = xs->base + offs; | ||
1058 | struct ocfs2_xattr_value_root *xv = NULL; | ||
1059 | size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; | ||
1060 | int ret = 0; | ||
1061 | |||
1062 | memset(val, 0, size); | ||
1063 | memcpy(val, xi->name, name_len); | ||
1064 | xv = (struct ocfs2_xattr_value_root *) | ||
1065 | (val + OCFS2_XATTR_SIZE(name_len)); | ||
1066 | xv->xr_clusters = 0; | ||
1067 | xv->xr_last_eb_blk = 0; | ||
1068 | xv->xr_list.l_tree_depth = 0; | ||
1069 | xv->xr_list.l_count = cpu_to_le16(1); | ||
1070 | xv->xr_list.l_next_free_rec = 0; | ||
1071 | |||
1072 | ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv, | ||
1073 | xi->value_len); | ||
1074 | if (ret < 0) { | ||
1075 | mlog_errno(ret); | ||
1076 | return ret; | ||
1077 | } | ||
1078 | ret = __ocfs2_xattr_set_value_outside(inode, xv, xi->value, | ||
1079 | xi->value_len); | ||
1080 | if (ret < 0) { | ||
1081 | mlog_errno(ret); | ||
1082 | return ret; | ||
1083 | } | ||
1084 | ret = ocfs2_xattr_update_entry(inode, xi, xs, offs); | ||
1085 | if (ret < 0) | ||
1086 | mlog_errno(ret); | ||
1087 | |||
1088 | return ret; | ||
1089 | } | ||
1090 | |||
1091 | /* | ||
1092 | * ocfs2_xattr_set_entry_local() | ||
1093 | * | ||
1094 | * Set, replace or remove extended attribute in local. | ||
1095 | */ | ||
1096 | static void ocfs2_xattr_set_entry_local(struct inode *inode, | ||
1097 | struct ocfs2_xattr_info *xi, | ||
1098 | struct ocfs2_xattr_search *xs, | ||
1099 | struct ocfs2_xattr_entry *last, | ||
1100 | size_t min_offs) | ||
1101 | { | ||
1102 | size_t name_len = strlen(xi->name); | ||
1103 | int i; | ||
1104 | |||
1105 | if (xi->value && xs->not_found) { | ||
1106 | /* Insert the new xattr entry. */ | ||
1107 | le16_add_cpu(&xs->header->xh_count, 1); | ||
1108 | ocfs2_xattr_set_type(last, xi->name_index); | ||
1109 | ocfs2_xattr_set_local(last, 1); | ||
1110 | last->xe_name_len = name_len; | ||
1111 | } else { | ||
1112 | void *first_val; | ||
1113 | void *val; | ||
1114 | size_t offs, size; | ||
1115 | |||
1116 | first_val = xs->base + min_offs; | ||
1117 | offs = le16_to_cpu(xs->here->xe_name_offset); | ||
1118 | val = xs->base + offs; | ||
1119 | |||
1120 | if (le64_to_cpu(xs->here->xe_value_size) > | ||
1121 | OCFS2_XATTR_INLINE_SIZE) | ||
1122 | size = OCFS2_XATTR_SIZE(name_len) + | ||
1123 | OCFS2_XATTR_ROOT_SIZE; | ||
1124 | else | ||
1125 | size = OCFS2_XATTR_SIZE(name_len) + | ||
1126 | OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size)); | ||
1127 | |||
1128 | if (xi->value && size == OCFS2_XATTR_SIZE(name_len) + | ||
1129 | OCFS2_XATTR_SIZE(xi->value_len)) { | ||
1130 | /* The old and the new value have the | ||
1131 | same size. Just replace the value. */ | ||
1132 | ocfs2_xattr_set_local(xs->here, 1); | ||
1133 | xs->here->xe_value_size = cpu_to_le64(xi->value_len); | ||
1134 | /* Clear value bytes. */ | ||
1135 | memset(val + OCFS2_XATTR_SIZE(name_len), | ||
1136 | 0, | ||
1137 | OCFS2_XATTR_SIZE(xi->value_len)); | ||
1138 | memcpy(val + OCFS2_XATTR_SIZE(name_len), | ||
1139 | xi->value, | ||
1140 | xi->value_len); | ||
1141 | return; | ||
1142 | } | ||
1143 | /* Remove the old name+value. */ | ||
1144 | memmove(first_val + size, first_val, val - first_val); | ||
1145 | memset(first_val, 0, size); | ||
1146 | xs->here->xe_name_hash = 0; | ||
1147 | xs->here->xe_name_offset = 0; | ||
1148 | ocfs2_xattr_set_local(xs->here, 1); | ||
1149 | xs->here->xe_value_size = 0; | ||
1150 | |||
1151 | min_offs += size; | ||
1152 | |||
1153 | /* Adjust all value offsets. */ | ||
1154 | last = xs->header->xh_entries; | ||
1155 | for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) { | ||
1156 | size_t o = le16_to_cpu(last->xe_name_offset); | ||
1157 | |||
1158 | if (o < offs) | ||
1159 | last->xe_name_offset = cpu_to_le16(o + size); | ||
1160 | last += 1; | ||
1161 | } | ||
1162 | |||
1163 | if (!xi->value) { | ||
1164 | /* Remove the old entry. */ | ||
1165 | last -= 1; | ||
1166 | memmove(xs->here, xs->here + 1, | ||
1167 | (void *)last - (void *)xs->here); | ||
1168 | memset(last, 0, sizeof(struct ocfs2_xattr_entry)); | ||
1169 | le16_add_cpu(&xs->header->xh_count, -1); | ||
1170 | } | ||
1171 | } | ||
1172 | if (xi->value) { | ||
1173 | /* Insert the new name+value. */ | ||
1174 | size_t size = OCFS2_XATTR_SIZE(name_len) + | ||
1175 | OCFS2_XATTR_SIZE(xi->value_len); | ||
1176 | void *val = xs->base + min_offs - size; | ||
1177 | |||
1178 | xs->here->xe_name_offset = cpu_to_le16(min_offs - size); | ||
1179 | memset(val, 0, size); | ||
1180 | memcpy(val, xi->name, name_len); | ||
1181 | memcpy(val + OCFS2_XATTR_SIZE(name_len), | ||
1182 | xi->value, | ||
1183 | xi->value_len); | ||
1184 | xs->here->xe_value_size = cpu_to_le64(xi->value_len); | ||
1185 | ocfs2_xattr_set_local(xs->here, 1); | ||
1186 | ocfs2_xattr_hash_entry(inode, xs->header, xs->here); | ||
1187 | } | ||
1188 | |||
1189 | return; | ||
1190 | } | ||
1191 | |||
1192 | /* | ||
1193 | * ocfs2_xattr_set_entry() | ||
1194 | * | ||
1195 | * Set extended attribute entry into inode or block. | ||
1196 | * | ||
1197 | * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE, | ||
1198 | * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(), | ||
1199 | * then set value in B tree with set_value_outside(). | ||
1200 | */ | ||
1201 | static int ocfs2_xattr_set_entry(struct inode *inode, | ||
1202 | struct ocfs2_xattr_info *xi, | ||
1203 | struct ocfs2_xattr_search *xs, | ||
1204 | int flag) | ||
1205 | { | ||
1206 | struct ocfs2_xattr_entry *last; | ||
1207 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
1208 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
1209 | size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name); | ||
1210 | size_t size_l = 0; | ||
1211 | handle_t *handle = NULL; | ||
1212 | int free, i, ret; | ||
1213 | struct ocfs2_xattr_info xi_l = { | ||
1214 | .name_index = xi->name_index, | ||
1215 | .name = xi->name, | ||
1216 | .value = xi->value, | ||
1217 | .value_len = xi->value_len, | ||
1218 | }; | ||
1219 | |||
1220 | /* Compute min_offs, last and free space. */ | ||
1221 | last = xs->header->xh_entries; | ||
1222 | |||
1223 | for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) { | ||
1224 | size_t offs = le16_to_cpu(last->xe_name_offset); | ||
1225 | if (offs < min_offs) | ||
1226 | min_offs = offs; | ||
1227 | last += 1; | ||
1228 | } | ||
1229 | |||
1230 | free = min_offs - ((void *)last - xs->base) - sizeof(__u32); | ||
1231 | if (free < 0) | ||
1232 | return -EFAULT; | ||
1233 | |||
1234 | if (!xs->not_found) { | ||
1235 | size_t size = 0; | ||
1236 | if (ocfs2_xattr_is_local(xs->here)) | ||
1237 | size = OCFS2_XATTR_SIZE(name_len) + | ||
1238 | OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size)); | ||
1239 | else | ||
1240 | size = OCFS2_XATTR_SIZE(name_len) + | ||
1241 | OCFS2_XATTR_ROOT_SIZE; | ||
1242 | free += (size + sizeof(struct ocfs2_xattr_entry)); | ||
1243 | } | ||
1244 | /* Check free space in inode or block */ | ||
1245 | if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) { | ||
1246 | if (free < sizeof(struct ocfs2_xattr_entry) + | ||
1247 | OCFS2_XATTR_SIZE(name_len) + | ||
1248 | OCFS2_XATTR_ROOT_SIZE) { | ||
1249 | ret = -ENOSPC; | ||
1250 | goto out; | ||
1251 | } | ||
1252 | size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; | ||
1253 | xi_l.value = (void *)&def_xv; | ||
1254 | xi_l.value_len = OCFS2_XATTR_ROOT_SIZE; | ||
1255 | } else if (xi->value) { | ||
1256 | if (free < sizeof(struct ocfs2_xattr_entry) + | ||
1257 | OCFS2_XATTR_SIZE(name_len) + | ||
1258 | OCFS2_XATTR_SIZE(xi->value_len)) { | ||
1259 | ret = -ENOSPC; | ||
1260 | goto out; | ||
1261 | } | ||
1262 | } | ||
1263 | |||
1264 | if (!xs->not_found) { | ||
1265 | /* For existing extended attribute */ | ||
1266 | size_t size = OCFS2_XATTR_SIZE(name_len) + | ||
1267 | OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size)); | ||
1268 | size_t offs = le16_to_cpu(xs->here->xe_name_offset); | ||
1269 | void *val = xs->base + offs; | ||
1270 | |||
1271 | if (ocfs2_xattr_is_local(xs->here) && size == size_l) { | ||
1272 | /* Replace existing local xattr with tree root */ | ||
1273 | ret = ocfs2_xattr_set_value_outside(inode, xi, xs, | ||
1274 | offs); | ||
1275 | if (ret < 0) | ||
1276 | mlog_errno(ret); | ||
1277 | goto out; | ||
1278 | } else if (!ocfs2_xattr_is_local(xs->here)) { | ||
1279 | /* For existing xattr which has value outside */ | ||
1280 | struct ocfs2_xattr_value_root *xv = NULL; | ||
1281 | xv = (struct ocfs2_xattr_value_root *)(val + | ||
1282 | OCFS2_XATTR_SIZE(name_len)); | ||
1283 | |||
1284 | if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) { | ||
1285 | /* | ||
1286 | * If new value need set outside also, | ||
1287 | * first truncate old value to new value, | ||
1288 | * then set new value with set_value_outside(). | ||
1289 | */ | ||
1290 | ret = ocfs2_xattr_value_truncate(inode, | ||
1291 | xs->xattr_bh, | ||
1292 | xv, | ||
1293 | xi->value_len); | ||
1294 | if (ret < 0) { | ||
1295 | mlog_errno(ret); | ||
1296 | goto out; | ||
1297 | } | ||
1298 | |||
1299 | ret = __ocfs2_xattr_set_value_outside(inode, | ||
1300 | xv, | ||
1301 | xi->value, | ||
1302 | xi->value_len); | ||
1303 | if (ret < 0) { | ||
1304 | mlog_errno(ret); | ||
1305 | goto out; | ||
1306 | } | ||
1307 | |||
1308 | ret = ocfs2_xattr_update_entry(inode, | ||
1309 | xi, | ||
1310 | xs, | ||
1311 | offs); | ||
1312 | if (ret < 0) | ||
1313 | mlog_errno(ret); | ||
1314 | goto out; | ||
1315 | } else { | ||
1316 | /* | ||
1317 | * If new value need set in local, | ||
1318 | * just trucate old value to zero. | ||
1319 | */ | ||
1320 | ret = ocfs2_xattr_value_truncate(inode, | ||
1321 | xs->xattr_bh, | ||
1322 | xv, | ||
1323 | 0); | ||
1324 | if (ret < 0) | ||
1325 | mlog_errno(ret); | ||
1326 | } | ||
1327 | } | ||
1328 | } | ||
1329 | |||
1330 | handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), | ||
1331 | OCFS2_INODE_UPDATE_CREDITS); | ||
1332 | if (IS_ERR(handle)) { | ||
1333 | ret = PTR_ERR(handle); | ||
1334 | mlog_errno(ret); | ||
1335 | goto out; | ||
1336 | } | ||
1337 | |||
1338 | ret = ocfs2_journal_access(handle, inode, xs->inode_bh, | ||
1339 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1340 | if (ret) { | ||
1341 | mlog_errno(ret); | ||
1342 | goto out_commit; | ||
1343 | } | ||
1344 | |||
1345 | if (!(flag & OCFS2_INLINE_XATTR_FL)) { | ||
1346 | /* set extended attribute in external block. */ | ||
1347 | ret = ocfs2_extend_trans(handle, | ||
1348 | OCFS2_INODE_UPDATE_CREDITS + | ||
1349 | OCFS2_XATTR_BLOCK_UPDATE_CREDITS); | ||
1350 | if (ret) { | ||
1351 | mlog_errno(ret); | ||
1352 | goto out_commit; | ||
1353 | } | ||
1354 | ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, | ||
1355 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1356 | if (ret) { | ||
1357 | mlog_errno(ret); | ||
1358 | goto out_commit; | ||
1359 | } | ||
1360 | } | ||
1361 | |||
1362 | /* | ||
1363 | * Set value in local, include set tree root in local. | ||
1364 | * This is the first step for value size >INLINE_SIZE. | ||
1365 | */ | ||
1366 | ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs); | ||
1367 | |||
1368 | if (!(flag & OCFS2_INLINE_XATTR_FL)) { | ||
1369 | ret = ocfs2_journal_dirty(handle, xs->xattr_bh); | ||
1370 | if (ret < 0) { | ||
1371 | mlog_errno(ret); | ||
1372 | goto out_commit; | ||
1373 | } | ||
1374 | } | ||
1375 | |||
1376 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) && | ||
1377 | (flag & OCFS2_INLINE_XATTR_FL)) { | ||
1378 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1379 | unsigned int xattrsize = osb->s_xattr_inline_size; | ||
1380 | |||
1381 | /* | ||
1382 | * Adjust extent record count or inline data size | ||
1383 | * to reserve space for extended attribute. | ||
1384 | */ | ||
1385 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
1386 | struct ocfs2_inline_data *idata = &di->id2.i_data; | ||
1387 | le16_add_cpu(&idata->id_count, -xattrsize); | ||
1388 | } else if (!(ocfs2_inode_is_fast_symlink(inode))) { | ||
1389 | struct ocfs2_extent_list *el = &di->id2.i_list; | ||
1390 | le16_add_cpu(&el->l_count, -(xattrsize / | ||
1391 | sizeof(struct ocfs2_extent_rec))); | ||
1392 | } | ||
1393 | di->i_xattr_inline_size = cpu_to_le16(xattrsize); | ||
1394 | } | ||
1395 | /* Update xattr flag */ | ||
1396 | spin_lock(&oi->ip_lock); | ||
1397 | oi->ip_dyn_features |= flag; | ||
1398 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); | ||
1399 | spin_unlock(&oi->ip_lock); | ||
1400 | /* Update inode ctime */ | ||
1401 | inode->i_ctime = CURRENT_TIME; | ||
1402 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | ||
1403 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | ||
1404 | |||
1405 | ret = ocfs2_journal_dirty(handle, xs->inode_bh); | ||
1406 | if (ret < 0) | ||
1407 | mlog_errno(ret); | ||
1408 | |||
1409 | out_commit: | ||
1410 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
1411 | |||
1412 | if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) { | ||
1413 | /* | ||
1414 | * Set value outside in B tree. | ||
1415 | * This is the second step for value size > INLINE_SIZE. | ||
1416 | */ | ||
1417 | size_t offs = le16_to_cpu(xs->here->xe_name_offset); | ||
1418 | ret = ocfs2_xattr_set_value_outside(inode, xi, xs, offs); | ||
1419 | if (ret < 0) { | ||
1420 | int ret2; | ||
1421 | |||
1422 | mlog_errno(ret); | ||
1423 | /* | ||
1424 | * If set value outside failed, we have to clean | ||
1425 | * the junk tree root we have already set in local. | ||
1426 | */ | ||
1427 | ret2 = ocfs2_xattr_cleanup(inode, xi, xs, offs); | ||
1428 | if (ret2 < 0) | ||
1429 | mlog_errno(ret2); | ||
1430 | } | ||
1431 | } | ||
1432 | out: | ||
1433 | return ret; | ||
1434 | |||
1435 | } | ||
1436 | |||
1437 | static int ocfs2_remove_value_outside(struct inode*inode, | ||
1438 | struct buffer_head *bh, | ||
1439 | struct ocfs2_xattr_header *header) | ||
1440 | { | ||
1441 | int ret = 0, i; | ||
1442 | |||
1443 | for (i = 0; i < le16_to_cpu(header->xh_count); i++) { | ||
1444 | struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; | ||
1445 | |||
1446 | if (!ocfs2_xattr_is_local(entry)) { | ||
1447 | struct ocfs2_xattr_value_root *xv; | ||
1448 | void *val; | ||
1449 | |||
1450 | val = (void *)header + | ||
1451 | le16_to_cpu(entry->xe_name_offset); | ||
1452 | xv = (struct ocfs2_xattr_value_root *) | ||
1453 | (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); | ||
1454 | ret = ocfs2_xattr_value_truncate(inode, bh, xv, 0); | ||
1455 | if (ret < 0) { | ||
1456 | mlog_errno(ret); | ||
1457 | return ret; | ||
1458 | } | ||
1459 | } | ||
1460 | } | ||
1461 | |||
1462 | return ret; | ||
1463 | } | ||
1464 | |||
1465 | static int ocfs2_xattr_ibody_remove(struct inode *inode, | ||
1466 | struct buffer_head *di_bh) | ||
1467 | { | ||
1468 | |||
1469 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
1470 | struct ocfs2_xattr_header *header; | ||
1471 | int ret; | ||
1472 | |||
1473 | header = (struct ocfs2_xattr_header *) | ||
1474 | ((void *)di + inode->i_sb->s_blocksize - | ||
1475 | le16_to_cpu(di->i_xattr_inline_size)); | ||
1476 | |||
1477 | ret = ocfs2_remove_value_outside(inode, di_bh, header); | ||
1478 | |||
1479 | return ret; | ||
1480 | } | ||
1481 | |||
1482 | static int ocfs2_xattr_block_remove(struct inode *inode, | ||
1483 | struct buffer_head *blk_bh) | ||
1484 | { | ||
1485 | struct ocfs2_xattr_block *xb; | ||
1486 | int ret = 0; | ||
1487 | |||
1488 | xb = (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
1489 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { | ||
1490 | struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); | ||
1491 | ret = ocfs2_remove_value_outside(inode, blk_bh, header); | ||
1492 | } else | ||
1493 | ret = ocfs2_delete_xattr_index_block(inode, blk_bh); | ||
1494 | |||
1495 | return ret; | ||
1496 | } | ||
1497 | |||
1498 | static int ocfs2_xattr_free_block(struct inode *inode, | ||
1499 | u64 block) | ||
1500 | { | ||
1501 | struct inode *xb_alloc_inode; | ||
1502 | struct buffer_head *xb_alloc_bh = NULL; | ||
1503 | struct buffer_head *blk_bh = NULL; | ||
1504 | struct ocfs2_xattr_block *xb; | ||
1505 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1506 | handle_t *handle; | ||
1507 | int ret = 0; | ||
1508 | u64 blk, bg_blkno; | ||
1509 | u16 bit; | ||
1510 | |||
1511 | ret = ocfs2_read_block(inode, block, &blk_bh); | ||
1512 | if (ret < 0) { | ||
1513 | mlog_errno(ret); | ||
1514 | goto out; | ||
1515 | } | ||
1516 | |||
1517 | /*Verify the signature of xattr block*/ | ||
1518 | if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, | ||
1519 | strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { | ||
1520 | ret = -EFAULT; | ||
1521 | goto out; | ||
1522 | } | ||
1523 | |||
1524 | ret = ocfs2_xattr_block_remove(inode, blk_bh); | ||
1525 | if (ret < 0) { | ||
1526 | mlog_errno(ret); | ||
1527 | goto out; | ||
1528 | } | ||
1529 | |||
1530 | xb = (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
1531 | blk = le64_to_cpu(xb->xb_blkno); | ||
1532 | bit = le16_to_cpu(xb->xb_suballoc_bit); | ||
1533 | bg_blkno = ocfs2_which_suballoc_group(blk, bit); | ||
1534 | |||
1535 | xb_alloc_inode = ocfs2_get_system_file_inode(osb, | ||
1536 | EXTENT_ALLOC_SYSTEM_INODE, | ||
1537 | le16_to_cpu(xb->xb_suballoc_slot)); | ||
1538 | if (!xb_alloc_inode) { | ||
1539 | ret = -ENOMEM; | ||
1540 | mlog_errno(ret); | ||
1541 | goto out; | ||
1542 | } | ||
1543 | mutex_lock(&xb_alloc_inode->i_mutex); | ||
1544 | |||
1545 | ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1); | ||
1546 | if (ret < 0) { | ||
1547 | mlog_errno(ret); | ||
1548 | goto out_mutex; | ||
1549 | } | ||
1550 | |||
1551 | handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); | ||
1552 | if (IS_ERR(handle)) { | ||
1553 | ret = PTR_ERR(handle); | ||
1554 | mlog_errno(ret); | ||
1555 | goto out_unlock; | ||
1556 | } | ||
1557 | |||
1558 | ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh, | ||
1559 | bit, bg_blkno, 1); | ||
1560 | if (ret < 0) | ||
1561 | mlog_errno(ret); | ||
1562 | |||
1563 | ocfs2_commit_trans(osb, handle); | ||
1564 | out_unlock: | ||
1565 | ocfs2_inode_unlock(xb_alloc_inode, 1); | ||
1566 | brelse(xb_alloc_bh); | ||
1567 | out_mutex: | ||
1568 | mutex_unlock(&xb_alloc_inode->i_mutex); | ||
1569 | iput(xb_alloc_inode); | ||
1570 | out: | ||
1571 | brelse(blk_bh); | ||
1572 | return ret; | ||
1573 | } | ||
1574 | |||
1575 | /* | ||
1576 | * ocfs2_xattr_remove() | ||
1577 | * | ||
1578 | * Free extended attribute resources associated with this inode. | ||
1579 | */ | ||
1580 | int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) | ||
1581 | { | ||
1582 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
1583 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
1584 | handle_t *handle; | ||
1585 | int ret; | ||
1586 | |||
1587 | if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) | ||
1588 | return 0; | ||
1589 | |||
1590 | if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) | ||
1591 | return 0; | ||
1592 | |||
1593 | if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { | ||
1594 | ret = ocfs2_xattr_ibody_remove(inode, di_bh); | ||
1595 | if (ret < 0) { | ||
1596 | mlog_errno(ret); | ||
1597 | goto out; | ||
1598 | } | ||
1599 | } | ||
1600 | |||
1601 | if (di->i_xattr_loc) { | ||
1602 | ret = ocfs2_xattr_free_block(inode, | ||
1603 | le64_to_cpu(di->i_xattr_loc)); | ||
1604 | if (ret < 0) { | ||
1605 | mlog_errno(ret); | ||
1606 | goto out; | ||
1607 | } | ||
1608 | } | ||
1609 | |||
1610 | handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), | ||
1611 | OCFS2_INODE_UPDATE_CREDITS); | ||
1612 | if (IS_ERR(handle)) { | ||
1613 | ret = PTR_ERR(handle); | ||
1614 | mlog_errno(ret); | ||
1615 | goto out; | ||
1616 | } | ||
1617 | ret = ocfs2_journal_access(handle, inode, di_bh, | ||
1618 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1619 | if (ret) { | ||
1620 | mlog_errno(ret); | ||
1621 | goto out_commit; | ||
1622 | } | ||
1623 | |||
1624 | di->i_xattr_loc = 0; | ||
1625 | |||
1626 | spin_lock(&oi->ip_lock); | ||
1627 | oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); | ||
1628 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); | ||
1629 | spin_unlock(&oi->ip_lock); | ||
1630 | |||
1631 | ret = ocfs2_journal_dirty(handle, di_bh); | ||
1632 | if (ret < 0) | ||
1633 | mlog_errno(ret); | ||
1634 | out_commit: | ||
1635 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
1636 | out: | ||
1637 | return ret; | ||
1638 | } | ||
1639 | |||
1640 | static int ocfs2_xattr_has_space_inline(struct inode *inode, | ||
1641 | struct ocfs2_dinode *di) | ||
1642 | { | ||
1643 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
1644 | unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size; | ||
1645 | int free; | ||
1646 | |||
1647 | if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE) | ||
1648 | return 0; | ||
1649 | |||
1650 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
1651 | struct ocfs2_inline_data *idata = &di->id2.i_data; | ||
1652 | free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size); | ||
1653 | } else if (ocfs2_inode_is_fast_symlink(inode)) { | ||
1654 | free = ocfs2_fast_symlink_chars(inode->i_sb) - | ||
1655 | le64_to_cpu(di->i_size); | ||
1656 | } else { | ||
1657 | struct ocfs2_extent_list *el = &di->id2.i_list; | ||
1658 | free = (le16_to_cpu(el->l_count) - | ||
1659 | le16_to_cpu(el->l_next_free_rec)) * | ||
1660 | sizeof(struct ocfs2_extent_rec); | ||
1661 | } | ||
1662 | if (free >= xattrsize) | ||
1663 | return 1; | ||
1664 | |||
1665 | return 0; | ||
1666 | } | ||
1667 | |||
1668 | /* | ||
1669 | * ocfs2_xattr_ibody_find() | ||
1670 | * | ||
1671 | * Find extended attribute in inode block and | ||
1672 | * fill search info into struct ocfs2_xattr_search. | ||
1673 | */ | ||
1674 | static int ocfs2_xattr_ibody_find(struct inode *inode, | ||
1675 | int name_index, | ||
1676 | const char *name, | ||
1677 | struct ocfs2_xattr_search *xs) | ||
1678 | { | ||
1679 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
1680 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
1681 | int ret; | ||
1682 | int has_space = 0; | ||
1683 | |||
1684 | if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) | ||
1685 | return 0; | ||
1686 | |||
1687 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { | ||
1688 | down_read(&oi->ip_alloc_sem); | ||
1689 | has_space = ocfs2_xattr_has_space_inline(inode, di); | ||
1690 | up_read(&oi->ip_alloc_sem); | ||
1691 | if (!has_space) | ||
1692 | return 0; | ||
1693 | } | ||
1694 | |||
1695 | xs->xattr_bh = xs->inode_bh; | ||
1696 | xs->end = (void *)di + inode->i_sb->s_blocksize; | ||
1697 | if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) | ||
1698 | xs->header = (struct ocfs2_xattr_header *) | ||
1699 | (xs->end - le16_to_cpu(di->i_xattr_inline_size)); | ||
1700 | else | ||
1701 | xs->header = (struct ocfs2_xattr_header *) | ||
1702 | (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size); | ||
1703 | xs->base = (void *)xs->header; | ||
1704 | xs->here = xs->header->xh_entries; | ||
1705 | |||
1706 | /* Find the named attribute. */ | ||
1707 | if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { | ||
1708 | ret = ocfs2_xattr_find_entry(name_index, name, xs); | ||
1709 | if (ret && ret != -ENODATA) | ||
1710 | return ret; | ||
1711 | xs->not_found = ret; | ||
1712 | } | ||
1713 | |||
1714 | return 0; | ||
1715 | } | ||
1716 | |||
1717 | /* | ||
1718 | * ocfs2_xattr_ibody_set() | ||
1719 | * | ||
1720 | * Set, replace or remove an extended attribute into inode block. | ||
1721 | * | ||
1722 | */ | ||
1723 | static int ocfs2_xattr_ibody_set(struct inode *inode, | ||
1724 | struct ocfs2_xattr_info *xi, | ||
1725 | struct ocfs2_xattr_search *xs) | ||
1726 | { | ||
1727 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
1728 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
1729 | int ret; | ||
1730 | |||
1731 | if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) | ||
1732 | return -ENOSPC; | ||
1733 | |||
1734 | down_write(&oi->ip_alloc_sem); | ||
1735 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { | ||
1736 | if (!ocfs2_xattr_has_space_inline(inode, di)) { | ||
1737 | ret = -ENOSPC; | ||
1738 | goto out; | ||
1739 | } | ||
1740 | } | ||
1741 | |||
1742 | ret = ocfs2_xattr_set_entry(inode, xi, xs, | ||
1743 | (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL)); | ||
1744 | out: | ||
1745 | up_write(&oi->ip_alloc_sem); | ||
1746 | |||
1747 | return ret; | ||
1748 | } | ||
1749 | |||
1750 | /* | ||
1751 | * ocfs2_xattr_block_find() | ||
1752 | * | ||
1753 | * Find extended attribute in external block and | ||
1754 | * fill search info into struct ocfs2_xattr_search. | ||
1755 | */ | ||
1756 | static int ocfs2_xattr_block_find(struct inode *inode, | ||
1757 | int name_index, | ||
1758 | const char *name, | ||
1759 | struct ocfs2_xattr_search *xs) | ||
1760 | { | ||
1761 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
1762 | struct buffer_head *blk_bh = NULL; | ||
1763 | struct ocfs2_xattr_block *xb; | ||
1764 | int ret = 0; | ||
1765 | |||
1766 | if (!di->i_xattr_loc) | ||
1767 | return ret; | ||
1768 | |||
1769 | ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); | ||
1770 | if (ret < 0) { | ||
1771 | mlog_errno(ret); | ||
1772 | return ret; | ||
1773 | } | ||
1774 | /*Verify the signature of xattr block*/ | ||
1775 | if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, | ||
1776 | strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { | ||
1777 | ret = -EFAULT; | ||
1778 | goto cleanup; | ||
1779 | } | ||
1780 | |||
1781 | xs->xattr_bh = blk_bh; | ||
1782 | xb = (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
1783 | |||
1784 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { | ||
1785 | xs->header = &xb->xb_attrs.xb_header; | ||
1786 | xs->base = (void *)xs->header; | ||
1787 | xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; | ||
1788 | xs->here = xs->header->xh_entries; | ||
1789 | |||
1790 | ret = ocfs2_xattr_find_entry(name_index, name, xs); | ||
1791 | } else | ||
1792 | ret = ocfs2_xattr_index_block_find(inode, blk_bh, | ||
1793 | name_index, | ||
1794 | name, xs); | ||
1795 | |||
1796 | if (ret && ret != -ENODATA) { | ||
1797 | xs->xattr_bh = NULL; | ||
1798 | goto cleanup; | ||
1799 | } | ||
1800 | xs->not_found = ret; | ||
1801 | return 0; | ||
1802 | cleanup: | ||
1803 | brelse(blk_bh); | ||
1804 | |||
1805 | return ret; | ||
1806 | } | ||
1807 | |||
1808 | /* | ||
1809 | * When all the xattrs are deleted from index btree, the ocfs2_xattr_tree | ||
1810 | * will be erased and ocfs2_xattr_block will have its ocfs2_xattr_header | ||
1811 | * re-initialized. | ||
1812 | */ | ||
1813 | static int ocfs2_restore_xattr_block(struct inode *inode, | ||
1814 | struct ocfs2_xattr_search *xs) | ||
1815 | { | ||
1816 | int ret; | ||
1817 | handle_t *handle; | ||
1818 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1819 | struct ocfs2_xattr_block *xb = | ||
1820 | (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; | ||
1821 | struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; | ||
1822 | u16 xb_flags = le16_to_cpu(xb->xb_flags); | ||
1823 | |||
1824 | BUG_ON(!(xb_flags & OCFS2_XATTR_INDEXED) || | ||
1825 | le16_to_cpu(el->l_next_free_rec) != 0); | ||
1826 | |||
1827 | handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_UPDATE_CREDITS); | ||
1828 | if (IS_ERR(handle)) { | ||
1829 | ret = PTR_ERR(handle); | ||
1830 | handle = NULL; | ||
1831 | goto out; | ||
1832 | } | ||
1833 | |||
1834 | ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, | ||
1835 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1836 | if (ret < 0) { | ||
1837 | mlog_errno(ret); | ||
1838 | goto out_commit; | ||
1839 | } | ||
1840 | |||
1841 | memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - | ||
1842 | offsetof(struct ocfs2_xattr_block, xb_attrs)); | ||
1843 | |||
1844 | xb->xb_flags = cpu_to_le16(xb_flags & ~OCFS2_XATTR_INDEXED); | ||
1845 | |||
1846 | ocfs2_journal_dirty(handle, xs->xattr_bh); | ||
1847 | |||
1848 | out_commit: | ||
1849 | ocfs2_commit_trans(osb, handle); | ||
1850 | out: | ||
1851 | return ret; | ||
1852 | } | ||
1853 | |||
1854 | /* | ||
1855 | * ocfs2_xattr_block_set() | ||
1856 | * | ||
1857 | * Set, replace or remove an extended attribute into external block. | ||
1858 | * | ||
1859 | */ | ||
1860 | static int ocfs2_xattr_block_set(struct inode *inode, | ||
1861 | struct ocfs2_xattr_info *xi, | ||
1862 | struct ocfs2_xattr_search *xs) | ||
1863 | { | ||
1864 | struct buffer_head *new_bh = NULL; | ||
1865 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1866 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
1867 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
1868 | handle_t *handle = NULL; | ||
1869 | struct ocfs2_xattr_block *xblk = NULL; | ||
1870 | u16 suballoc_bit_start; | ||
1871 | u32 num_got; | ||
1872 | u64 first_blkno; | ||
1873 | int ret; | ||
1874 | |||
1875 | if (!xs->xattr_bh) { | ||
1876 | /* | ||
1877 | * Alloc one external block for extended attribute | ||
1878 | * outside of inode. | ||
1879 | */ | ||
1880 | ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); | ||
1881 | if (ret < 0) { | ||
1882 | mlog_errno(ret); | ||
1883 | goto out; | ||
1884 | } | ||
1885 | handle = ocfs2_start_trans(osb, | ||
1886 | OCFS2_XATTR_BLOCK_CREATE_CREDITS); | ||
1887 | if (IS_ERR(handle)) { | ||
1888 | ret = PTR_ERR(handle); | ||
1889 | mlog_errno(ret); | ||
1890 | goto out; | ||
1891 | } | ||
1892 | ret = ocfs2_journal_access(handle, inode, xs->inode_bh, | ||
1893 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
1894 | if (ret < 0) { | ||
1895 | mlog_errno(ret); | ||
1896 | goto out_commit; | ||
1897 | } | ||
1898 | |||
1899 | ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, | ||
1900 | &suballoc_bit_start, &num_got, | ||
1901 | &first_blkno); | ||
1902 | if (ret < 0) { | ||
1903 | mlog_errno(ret); | ||
1904 | goto out_commit; | ||
1905 | } | ||
1906 | |||
1907 | new_bh = sb_getblk(inode->i_sb, first_blkno); | ||
1908 | ocfs2_set_new_buffer_uptodate(inode, new_bh); | ||
1909 | |||
1910 | ret = ocfs2_journal_access(handle, inode, new_bh, | ||
1911 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
1912 | if (ret < 0) { | ||
1913 | mlog_errno(ret); | ||
1914 | goto out_commit; | ||
1915 | } | ||
1916 | |||
1917 | /* Initialize ocfs2_xattr_block */ | ||
1918 | xs->xattr_bh = new_bh; | ||
1919 | xblk = (struct ocfs2_xattr_block *)new_bh->b_data; | ||
1920 | memset(xblk, 0, inode->i_sb->s_blocksize); | ||
1921 | strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); | ||
1922 | xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num); | ||
1923 | xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); | ||
1924 | xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation); | ||
1925 | xblk->xb_blkno = cpu_to_le64(first_blkno); | ||
1926 | |||
1927 | xs->header = &xblk->xb_attrs.xb_header; | ||
1928 | xs->base = (void *)xs->header; | ||
1929 | xs->end = (void *)xblk + inode->i_sb->s_blocksize; | ||
1930 | xs->here = xs->header->xh_entries; | ||
1931 | |||
1932 | |||
1933 | ret = ocfs2_journal_dirty(handle, new_bh); | ||
1934 | if (ret < 0) { | ||
1935 | mlog_errno(ret); | ||
1936 | goto out_commit; | ||
1937 | } | ||
1938 | di->i_xattr_loc = cpu_to_le64(first_blkno); | ||
1939 | ret = ocfs2_journal_dirty(handle, xs->inode_bh); | ||
1940 | if (ret < 0) | ||
1941 | mlog_errno(ret); | ||
1942 | out_commit: | ||
1943 | ocfs2_commit_trans(osb, handle); | ||
1944 | out: | ||
1945 | if (meta_ac) | ||
1946 | ocfs2_free_alloc_context(meta_ac); | ||
1947 | if (ret < 0) | ||
1948 | return ret; | ||
1949 | } else | ||
1950 | xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; | ||
1951 | |||
1952 | if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { | ||
1953 | /* Set extended attribute into external block */ | ||
1954 | ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL); | ||
1955 | if (!ret || ret != -ENOSPC) | ||
1956 | goto end; | ||
1957 | |||
1958 | ret = ocfs2_xattr_create_index_block(inode, xs); | ||
1959 | if (ret) | ||
1960 | goto end; | ||
1961 | } | ||
1962 | |||
1963 | ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs); | ||
1964 | if (!ret && xblk->xb_attrs.xb_root.xt_list.l_next_free_rec == 0) | ||
1965 | ret = ocfs2_restore_xattr_block(inode, xs); | ||
1966 | |||
1967 | end: | ||
1968 | |||
1969 | return ret; | ||
1970 | } | ||
1971 | |||
1972 | /* | ||
1973 | * ocfs2_xattr_set() | ||
1974 | * | ||
1975 | * Set, replace or remove an extended attribute for this inode. | ||
1976 | * value is NULL to remove an existing extended attribute, else either | ||
1977 | * create or replace an extended attribute. | ||
1978 | */ | ||
1979 | int ocfs2_xattr_set(struct inode *inode, | ||
1980 | int name_index, | ||
1981 | const char *name, | ||
1982 | const void *value, | ||
1983 | size_t value_len, | ||
1984 | int flags) | ||
1985 | { | ||
1986 | struct buffer_head *di_bh = NULL; | ||
1987 | struct ocfs2_dinode *di; | ||
1988 | int ret; | ||
1989 | u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
1990 | |||
1991 | struct ocfs2_xattr_info xi = { | ||
1992 | .name_index = name_index, | ||
1993 | .name = name, | ||
1994 | .value = value, | ||
1995 | .value_len = value_len, | ||
1996 | }; | ||
1997 | |||
1998 | struct ocfs2_xattr_search xis = { | ||
1999 | .not_found = -ENODATA, | ||
2000 | }; | ||
2001 | |||
2002 | struct ocfs2_xattr_search xbs = { | ||
2003 | .not_found = -ENODATA, | ||
2004 | }; | ||
2005 | |||
2006 | if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) | ||
2007 | return -EOPNOTSUPP; | ||
2008 | |||
2009 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | ||
2010 | if (ret < 0) { | ||
2011 | mlog_errno(ret); | ||
2012 | return ret; | ||
2013 | } | ||
2014 | xis.inode_bh = xbs.inode_bh = di_bh; | ||
2015 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
2016 | |||
2017 | down_write(&OCFS2_I(inode)->ip_xattr_sem); | ||
2018 | /* | ||
2019 | * Scan inode and external block to find the same name | ||
2020 | * extended attribute and collect search infomation. | ||
2021 | */ | ||
2022 | ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); | ||
2023 | if (ret) | ||
2024 | goto cleanup; | ||
2025 | if (xis.not_found) { | ||
2026 | ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); | ||
2027 | if (ret) | ||
2028 | goto cleanup; | ||
2029 | } | ||
2030 | |||
2031 | if (xis.not_found && xbs.not_found) { | ||
2032 | ret = -ENODATA; | ||
2033 | if (flags & XATTR_REPLACE) | ||
2034 | goto cleanup; | ||
2035 | ret = 0; | ||
2036 | if (!value) | ||
2037 | goto cleanup; | ||
2038 | } else { | ||
2039 | ret = -EEXIST; | ||
2040 | if (flags & XATTR_CREATE) | ||
2041 | goto cleanup; | ||
2042 | } | ||
2043 | |||
2044 | if (!value) { | ||
2045 | /* Remove existing extended attribute */ | ||
2046 | if (!xis.not_found) | ||
2047 | ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); | ||
2048 | else if (!xbs.not_found) | ||
2049 | ret = ocfs2_xattr_block_set(inode, &xi, &xbs); | ||
2050 | } else { | ||
2051 | /* We always try to set extended attribute into inode first*/ | ||
2052 | ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); | ||
2053 | if (!ret && !xbs.not_found) { | ||
2054 | /* | ||
2055 | * If succeed and that extended attribute existing in | ||
2056 | * external block, then we will remove it. | ||
2057 | */ | ||
2058 | xi.value = NULL; | ||
2059 | xi.value_len = 0; | ||
2060 | ret = ocfs2_xattr_block_set(inode, &xi, &xbs); | ||
2061 | } else if (ret == -ENOSPC) { | ||
2062 | if (di->i_xattr_loc && !xbs.xattr_bh) { | ||
2063 | ret = ocfs2_xattr_block_find(inode, name_index, | ||
2064 | name, &xbs); | ||
2065 | if (ret) | ||
2066 | goto cleanup; | ||
2067 | } | ||
2068 | /* | ||
2069 | * If no space in inode, we will set extended attribute | ||
2070 | * into external block. | ||
2071 | */ | ||
2072 | ret = ocfs2_xattr_block_set(inode, &xi, &xbs); | ||
2073 | if (ret) | ||
2074 | goto cleanup; | ||
2075 | if (!xis.not_found) { | ||
2076 | /* | ||
2077 | * If succeed and that extended attribute | ||
2078 | * existing in inode, we will remove it. | ||
2079 | */ | ||
2080 | xi.value = NULL; | ||
2081 | xi.value_len = 0; | ||
2082 | ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); | ||
2083 | } | ||
2084 | } | ||
2085 | } | ||
2086 | cleanup: | ||
2087 | up_write(&OCFS2_I(inode)->ip_xattr_sem); | ||
2088 | ocfs2_inode_unlock(inode, 1); | ||
2089 | brelse(di_bh); | ||
2090 | brelse(xbs.xattr_bh); | ||
2091 | for (i = 0; i < blk_per_bucket; i++) | ||
2092 | brelse(xbs.bucket.bhs[i]); | ||
2093 | |||
2094 | return ret; | ||
2095 | } | ||
2096 | |||
2097 | /* | ||
2098 | * Find the xattr extent rec which may contains name_hash. | ||
2099 | * e_cpos will be the first name hash of the xattr rec. | ||
2100 | * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. | ||
2101 | */ | ||
2102 | static int ocfs2_xattr_get_rec(struct inode *inode, | ||
2103 | u32 name_hash, | ||
2104 | u64 *p_blkno, | ||
2105 | u32 *e_cpos, | ||
2106 | u32 *num_clusters, | ||
2107 | struct ocfs2_extent_list *el) | ||
2108 | { | ||
2109 | int ret = 0, i; | ||
2110 | struct buffer_head *eb_bh = NULL; | ||
2111 | struct ocfs2_extent_block *eb; | ||
2112 | struct ocfs2_extent_rec *rec = NULL; | ||
2113 | u64 e_blkno = 0; | ||
2114 | |||
2115 | if (el->l_tree_depth) { | ||
2116 | ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh); | ||
2117 | if (ret) { | ||
2118 | mlog_errno(ret); | ||
2119 | goto out; | ||
2120 | } | ||
2121 | |||
2122 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
2123 | el = &eb->h_list; | ||
2124 | |||
2125 | if (el->l_tree_depth) { | ||
2126 | ocfs2_error(inode->i_sb, | ||
2127 | "Inode %lu has non zero tree depth in " | ||
2128 | "xattr tree block %llu\n", inode->i_ino, | ||
2129 | (unsigned long long)eb_bh->b_blocknr); | ||
2130 | ret = -EROFS; | ||
2131 | goto out; | ||
2132 | } | ||
2133 | } | ||
2134 | |||
2135 | for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { | ||
2136 | rec = &el->l_recs[i]; | ||
2137 | |||
2138 | if (le32_to_cpu(rec->e_cpos) <= name_hash) { | ||
2139 | e_blkno = le64_to_cpu(rec->e_blkno); | ||
2140 | break; | ||
2141 | } | ||
2142 | } | ||
2143 | |||
2144 | if (!e_blkno) { | ||
2145 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " | ||
2146 | "record (%u, %u, 0) in xattr", inode->i_ino, | ||
2147 | le32_to_cpu(rec->e_cpos), | ||
2148 | ocfs2_rec_clusters(el, rec)); | ||
2149 | ret = -EROFS; | ||
2150 | goto out; | ||
2151 | } | ||
2152 | |||
2153 | *p_blkno = le64_to_cpu(rec->e_blkno); | ||
2154 | *num_clusters = le16_to_cpu(rec->e_leaf_clusters); | ||
2155 | if (e_cpos) | ||
2156 | *e_cpos = le32_to_cpu(rec->e_cpos); | ||
2157 | out: | ||
2158 | brelse(eb_bh); | ||
2159 | return ret; | ||
2160 | } | ||
2161 | |||
2162 | typedef int (xattr_bucket_func)(struct inode *inode, | ||
2163 | struct ocfs2_xattr_bucket *bucket, | ||
2164 | void *para); | ||
2165 | |||
2166 | static int ocfs2_find_xe_in_bucket(struct inode *inode, | ||
2167 | struct buffer_head *header_bh, | ||
2168 | int name_index, | ||
2169 | const char *name, | ||
2170 | u32 name_hash, | ||
2171 | u16 *xe_index, | ||
2172 | int *found) | ||
2173 | { | ||
2174 | int i, ret = 0, cmp = 1, block_off, new_offset; | ||
2175 | struct ocfs2_xattr_header *xh = | ||
2176 | (struct ocfs2_xattr_header *)header_bh->b_data; | ||
2177 | size_t name_len = strlen(name); | ||
2178 | struct ocfs2_xattr_entry *xe = NULL; | ||
2179 | struct buffer_head *name_bh = NULL; | ||
2180 | char *xe_name; | ||
2181 | |||
2182 | /* | ||
2183 | * We don't use binary search in the bucket because there | ||
2184 | * may be multiple entries with the same name hash. | ||
2185 | */ | ||
2186 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { | ||
2187 | xe = &xh->xh_entries[i]; | ||
2188 | |||
2189 | if (name_hash > le32_to_cpu(xe->xe_name_hash)) | ||
2190 | continue; | ||
2191 | else if (name_hash < le32_to_cpu(xe->xe_name_hash)) | ||
2192 | break; | ||
2193 | |||
2194 | cmp = name_index - ocfs2_xattr_get_type(xe); | ||
2195 | if (!cmp) | ||
2196 | cmp = name_len - xe->xe_name_len; | ||
2197 | if (cmp) | ||
2198 | continue; | ||
2199 | |||
2200 | ret = ocfs2_xattr_bucket_get_name_value(inode, | ||
2201 | xh, | ||
2202 | i, | ||
2203 | &block_off, | ||
2204 | &new_offset); | ||
2205 | if (ret) { | ||
2206 | mlog_errno(ret); | ||
2207 | break; | ||
2208 | } | ||
2209 | |||
2210 | ret = ocfs2_read_block(inode, header_bh->b_blocknr + block_off, | ||
2211 | &name_bh); | ||
2212 | if (ret) { | ||
2213 | mlog_errno(ret); | ||
2214 | break; | ||
2215 | } | ||
2216 | xe_name = name_bh->b_data + new_offset; | ||
2217 | |||
2218 | cmp = memcmp(name, xe_name, name_len); | ||
2219 | brelse(name_bh); | ||
2220 | name_bh = NULL; | ||
2221 | |||
2222 | if (cmp == 0) { | ||
2223 | *xe_index = i; | ||
2224 | *found = 1; | ||
2225 | ret = 0; | ||
2226 | break; | ||
2227 | } | ||
2228 | } | ||
2229 | |||
2230 | return ret; | ||
2231 | } | ||
2232 | |||
2233 | /* | ||
2234 | * Find the specified xattr entry in a series of buckets. | ||
2235 | * This series start from p_blkno and last for num_clusters. | ||
2236 | * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains | ||
2237 | * the num of the valid buckets. | ||
2238 | * | ||
2239 | * Return the buffer_head this xattr should reside in. And if the xattr's | ||
2240 | * hash is in the gap of 2 buckets, return the lower bucket. | ||
2241 | */ | ||
2242 | static int ocfs2_xattr_bucket_find(struct inode *inode, | ||
2243 | int name_index, | ||
2244 | const char *name, | ||
2245 | u32 name_hash, | ||
2246 | u64 p_blkno, | ||
2247 | u32 first_hash, | ||
2248 | u32 num_clusters, | ||
2249 | struct ocfs2_xattr_search *xs) | ||
2250 | { | ||
2251 | int ret, found = 0; | ||
2252 | struct buffer_head *bh = NULL; | ||
2253 | struct buffer_head *lower_bh = NULL; | ||
2254 | struct ocfs2_xattr_header *xh = NULL; | ||
2255 | struct ocfs2_xattr_entry *xe = NULL; | ||
2256 | u16 index = 0; | ||
2257 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
2258 | int low_bucket = 0, bucket, high_bucket; | ||
2259 | u32 last_hash; | ||
2260 | u64 blkno; | ||
2261 | |||
2262 | ret = ocfs2_read_block(inode, p_blkno, &bh); | ||
2263 | if (ret) { | ||
2264 | mlog_errno(ret); | ||
2265 | goto out; | ||
2266 | } | ||
2267 | |||
2268 | xh = (struct ocfs2_xattr_header *)bh->b_data; | ||
2269 | high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; | ||
2270 | |||
2271 | while (low_bucket <= high_bucket) { | ||
2272 | brelse(bh); | ||
2273 | bh = NULL; | ||
2274 | bucket = (low_bucket + high_bucket) / 2; | ||
2275 | |||
2276 | blkno = p_blkno + bucket * blk_per_bucket; | ||
2277 | |||
2278 | ret = ocfs2_read_block(inode, blkno, &bh); | ||
2279 | if (ret) { | ||
2280 | mlog_errno(ret); | ||
2281 | goto out; | ||
2282 | } | ||
2283 | |||
2284 | xh = (struct ocfs2_xattr_header *)bh->b_data; | ||
2285 | xe = &xh->xh_entries[0]; | ||
2286 | if (name_hash < le32_to_cpu(xe->xe_name_hash)) { | ||
2287 | high_bucket = bucket - 1; | ||
2288 | continue; | ||
2289 | } | ||
2290 | |||
2291 | /* | ||
2292 | * Check whether the hash of the last entry in our | ||
2293 | * bucket is larger than the search one. for an empty | ||
2294 | * bucket, the last one is also the first one. | ||
2295 | */ | ||
2296 | if (xh->xh_count) | ||
2297 | xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; | ||
2298 | |||
2299 | last_hash = le32_to_cpu(xe->xe_name_hash); | ||
2300 | |||
2301 | /* record lower_bh which may be the insert place. */ | ||
2302 | brelse(lower_bh); | ||
2303 | lower_bh = bh; | ||
2304 | bh = NULL; | ||
2305 | |||
2306 | if (name_hash > le32_to_cpu(xe->xe_name_hash)) { | ||
2307 | low_bucket = bucket + 1; | ||
2308 | continue; | ||
2309 | } | ||
2310 | |||
2311 | /* the searched xattr should reside in this bucket if exists. */ | ||
2312 | ret = ocfs2_find_xe_in_bucket(inode, lower_bh, | ||
2313 | name_index, name, name_hash, | ||
2314 | &index, &found); | ||
2315 | if (ret) { | ||
2316 | mlog_errno(ret); | ||
2317 | goto out; | ||
2318 | } | ||
2319 | break; | ||
2320 | } | ||
2321 | |||
2322 | /* | ||
2323 | * Record the bucket we have found. | ||
2324 | * When the xattr's hash value is in the gap of 2 buckets, we will | ||
2325 | * always set it to the previous bucket. | ||
2326 | */ | ||
2327 | if (!lower_bh) { | ||
2328 | /* | ||
2329 | * We can't find any bucket whose first name_hash is less | ||
2330 | * than the find name_hash. | ||
2331 | */ | ||
2332 | BUG_ON(bh->b_blocknr != p_blkno); | ||
2333 | lower_bh = bh; | ||
2334 | bh = NULL; | ||
2335 | } | ||
2336 | xs->bucket.bhs[0] = lower_bh; | ||
2337 | xs->bucket.xh = (struct ocfs2_xattr_header *) | ||
2338 | xs->bucket.bhs[0]->b_data; | ||
2339 | lower_bh = NULL; | ||
2340 | |||
2341 | xs->header = xs->bucket.xh; | ||
2342 | xs->base = xs->bucket.bhs[0]->b_data; | ||
2343 | xs->end = xs->base + inode->i_sb->s_blocksize; | ||
2344 | |||
2345 | if (found) { | ||
2346 | /* | ||
2347 | * If we have found the xattr enty, read all the blocks in | ||
2348 | * this bucket. | ||
2349 | */ | ||
2350 | ret = ocfs2_read_blocks(inode, xs->bucket.bhs[0]->b_blocknr + 1, | ||
2351 | blk_per_bucket - 1, &xs->bucket.bhs[1], | ||
2352 | 0); | ||
2353 | if (ret) { | ||
2354 | mlog_errno(ret); | ||
2355 | goto out; | ||
2356 | } | ||
2357 | |||
2358 | xs->here = &xs->header->xh_entries[index]; | ||
2359 | mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name, | ||
2360 | (unsigned long long)xs->bucket.bhs[0]->b_blocknr, index); | ||
2361 | } else | ||
2362 | ret = -ENODATA; | ||
2363 | |||
2364 | out: | ||
2365 | brelse(bh); | ||
2366 | brelse(lower_bh); | ||
2367 | return ret; | ||
2368 | } | ||
2369 | |||
2370 | static int ocfs2_xattr_index_block_find(struct inode *inode, | ||
2371 | struct buffer_head *root_bh, | ||
2372 | int name_index, | ||
2373 | const char *name, | ||
2374 | struct ocfs2_xattr_search *xs) | ||
2375 | { | ||
2376 | int ret; | ||
2377 | struct ocfs2_xattr_block *xb = | ||
2378 | (struct ocfs2_xattr_block *)root_bh->b_data; | ||
2379 | struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; | ||
2380 | struct ocfs2_extent_list *el = &xb_root->xt_list; | ||
2381 | u64 p_blkno = 0; | ||
2382 | u32 first_hash, num_clusters = 0; | ||
2383 | u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); | ||
2384 | |||
2385 | if (le16_to_cpu(el->l_next_free_rec) == 0) | ||
2386 | return -ENODATA; | ||
2387 | |||
2388 | mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n", | ||
2389 | name, name_hash, name_index); | ||
2390 | |||
2391 | ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, | ||
2392 | &num_clusters, el); | ||
2393 | if (ret) { | ||
2394 | mlog_errno(ret); | ||
2395 | goto out; | ||
2396 | } | ||
2397 | |||
2398 | BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); | ||
2399 | |||
2400 | mlog(0, "find xattr extent rec %u clusters from %llu, the first hash " | ||
2401 | "in the rec is %u\n", num_clusters, p_blkno, first_hash); | ||
2402 | |||
2403 | ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, | ||
2404 | p_blkno, first_hash, num_clusters, xs); | ||
2405 | |||
2406 | out: | ||
2407 | return ret; | ||
2408 | } | ||
2409 | |||
2410 | static int ocfs2_iterate_xattr_buckets(struct inode *inode, | ||
2411 | u64 blkno, | ||
2412 | u32 clusters, | ||
2413 | xattr_bucket_func *func, | ||
2414 | void *para) | ||
2415 | { | ||
2416 | int i, j, ret = 0; | ||
2417 | int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
2418 | u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); | ||
2419 | u32 num_buckets = clusters * bpc; | ||
2420 | struct ocfs2_xattr_bucket bucket; | ||
2421 | |||
2422 | memset(&bucket, 0, sizeof(bucket)); | ||
2423 | |||
2424 | mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n", | ||
2425 | clusters, blkno); | ||
2426 | |||
2427 | for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) { | ||
2428 | ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, | ||
2429 | bucket.bhs, 0); | ||
2430 | if (ret) { | ||
2431 | mlog_errno(ret); | ||
2432 | goto out; | ||
2433 | } | ||
2434 | |||
2435 | bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data; | ||
2436 | /* | ||
2437 | * The real bucket num in this series of blocks is stored | ||
2438 | * in the 1st bucket. | ||
2439 | */ | ||
2440 | if (i == 0) | ||
2441 | num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets); | ||
2442 | |||
2443 | mlog(0, "iterating xattr bucket %llu, first hash %u\n", blkno, | ||
2444 | le32_to_cpu(bucket.xh->xh_entries[0].xe_name_hash)); | ||
2445 | if (func) { | ||
2446 | ret = func(inode, &bucket, para); | ||
2447 | if (ret) { | ||
2448 | mlog_errno(ret); | ||
2449 | break; | ||
2450 | } | ||
2451 | } | ||
2452 | |||
2453 | for (j = 0; j < blk_per_bucket; j++) | ||
2454 | brelse(bucket.bhs[j]); | ||
2455 | memset(&bucket, 0, sizeof(bucket)); | ||
2456 | } | ||
2457 | |||
2458 | out: | ||
2459 | for (j = 0; j < blk_per_bucket; j++) | ||
2460 | brelse(bucket.bhs[j]); | ||
2461 | |||
2462 | return ret; | ||
2463 | } | ||
2464 | |||
2465 | struct ocfs2_xattr_tree_list { | ||
2466 | char *buffer; | ||
2467 | size_t buffer_size; | ||
2468 | size_t result; | ||
2469 | }; | ||
2470 | |||
2471 | static int ocfs2_xattr_bucket_get_name_value(struct inode *inode, | ||
2472 | struct ocfs2_xattr_header *xh, | ||
2473 | int index, | ||
2474 | int *block_off, | ||
2475 | int *new_offset) | ||
2476 | { | ||
2477 | u16 name_offset; | ||
2478 | |||
2479 | if (index < 0 || index >= le16_to_cpu(xh->xh_count)) | ||
2480 | return -EINVAL; | ||
2481 | |||
2482 | name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); | ||
2483 | |||
2484 | *block_off = name_offset >> inode->i_sb->s_blocksize_bits; | ||
2485 | *new_offset = name_offset % inode->i_sb->s_blocksize; | ||
2486 | |||
2487 | return 0; | ||
2488 | } | ||
2489 | |||
2490 | static int ocfs2_list_xattr_bucket(struct inode *inode, | ||
2491 | struct ocfs2_xattr_bucket *bucket, | ||
2492 | void *para) | ||
2493 | { | ||
2494 | int ret = 0, type; | ||
2495 | struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; | ||
2496 | int i, block_off, new_offset; | ||
2497 | const char *prefix, *name; | ||
2498 | |||
2499 | for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) { | ||
2500 | struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i]; | ||
2501 | type = ocfs2_xattr_get_type(entry); | ||
2502 | prefix = ocfs2_xattr_prefix(type); | ||
2503 | |||
2504 | if (prefix) { | ||
2505 | ret = ocfs2_xattr_bucket_get_name_value(inode, | ||
2506 | bucket->xh, | ||
2507 | i, | ||
2508 | &block_off, | ||
2509 | &new_offset); | ||
2510 | if (ret) | ||
2511 | break; | ||
2512 | |||
2513 | name = (const char *)bucket->bhs[block_off]->b_data + | ||
2514 | new_offset; | ||
2515 | ret = ocfs2_xattr_list_entry(xl->buffer, | ||
2516 | xl->buffer_size, | ||
2517 | &xl->result, | ||
2518 | prefix, name, | ||
2519 | entry->xe_name_len); | ||
2520 | if (ret) | ||
2521 | break; | ||
2522 | } | ||
2523 | } | ||
2524 | |||
2525 | return ret; | ||
2526 | } | ||
2527 | |||
2528 | static int ocfs2_xattr_tree_list_index_block(struct inode *inode, | ||
2529 | struct ocfs2_xattr_tree_root *xt, | ||
2530 | char *buffer, | ||
2531 | size_t buffer_size) | ||
2532 | { | ||
2533 | struct ocfs2_extent_list *el = &xt->xt_list; | ||
2534 | int ret = 0; | ||
2535 | u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; | ||
2536 | u64 p_blkno = 0; | ||
2537 | struct ocfs2_xattr_tree_list xl = { | ||
2538 | .buffer = buffer, | ||
2539 | .buffer_size = buffer_size, | ||
2540 | .result = 0, | ||
2541 | }; | ||
2542 | |||
2543 | if (le16_to_cpu(el->l_next_free_rec) == 0) | ||
2544 | return 0; | ||
2545 | |||
2546 | while (name_hash > 0) { | ||
2547 | ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, | ||
2548 | &e_cpos, &num_clusters, el); | ||
2549 | if (ret) { | ||
2550 | mlog_errno(ret); | ||
2551 | goto out; | ||
2552 | } | ||
2553 | |||
2554 | ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters, | ||
2555 | ocfs2_list_xattr_bucket, | ||
2556 | &xl); | ||
2557 | if (ret) { | ||
2558 | mlog_errno(ret); | ||
2559 | goto out; | ||
2560 | } | ||
2561 | |||
2562 | if (e_cpos == 0) | ||
2563 | break; | ||
2564 | |||
2565 | name_hash = e_cpos - 1; | ||
2566 | } | ||
2567 | |||
2568 | ret = xl.result; | ||
2569 | out: | ||
2570 | return ret; | ||
2571 | } | ||
2572 | |||
2573 | static int cmp_xe(const void *a, const void *b) | ||
2574 | { | ||
2575 | const struct ocfs2_xattr_entry *l = a, *r = b; | ||
2576 | u32 l_hash = le32_to_cpu(l->xe_name_hash); | ||
2577 | u32 r_hash = le32_to_cpu(r->xe_name_hash); | ||
2578 | |||
2579 | if (l_hash > r_hash) | ||
2580 | return 1; | ||
2581 | if (l_hash < r_hash) | ||
2582 | return -1; | ||
2583 | return 0; | ||
2584 | } | ||
2585 | |||
2586 | static void swap_xe(void *a, void *b, int size) | ||
2587 | { | ||
2588 | struct ocfs2_xattr_entry *l = a, *r = b, tmp; | ||
2589 | |||
2590 | tmp = *l; | ||
2591 | memcpy(l, r, sizeof(struct ocfs2_xattr_entry)); | ||
2592 | memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry)); | ||
2593 | } | ||
2594 | |||
2595 | /* | ||
2596 | * When the ocfs2_xattr_block is filled up, new bucket will be created | ||
2597 | * and all the xattr entries will be moved to the new bucket. | ||
2598 | * Note: we need to sort the entries since they are not saved in order | ||
2599 | * in the ocfs2_xattr_block. | ||
2600 | */ | ||
2601 | static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, | ||
2602 | struct buffer_head *xb_bh, | ||
2603 | struct buffer_head *xh_bh, | ||
2604 | struct buffer_head *data_bh) | ||
2605 | { | ||
2606 | int i, blocksize = inode->i_sb->s_blocksize; | ||
2607 | u16 offset, size, off_change; | ||
2608 | struct ocfs2_xattr_entry *xe; | ||
2609 | struct ocfs2_xattr_block *xb = | ||
2610 | (struct ocfs2_xattr_block *)xb_bh->b_data; | ||
2611 | struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; | ||
2612 | struct ocfs2_xattr_header *xh = | ||
2613 | (struct ocfs2_xattr_header *)xh_bh->b_data; | ||
2614 | u16 count = le16_to_cpu(xb_xh->xh_count); | ||
2615 | char *target = xh_bh->b_data, *src = xb_bh->b_data; | ||
2616 | |||
2617 | mlog(0, "cp xattr from block %llu to bucket %llu\n", | ||
2618 | (unsigned long long)xb_bh->b_blocknr, | ||
2619 | (unsigned long long)xh_bh->b_blocknr); | ||
2620 | |||
2621 | memset(xh_bh->b_data, 0, blocksize); | ||
2622 | if (data_bh) | ||
2623 | memset(data_bh->b_data, 0, blocksize); | ||
2624 | /* | ||
2625 | * Since the xe_name_offset is based on ocfs2_xattr_header, | ||
2626 | * there is a offset change corresponding to the change of | ||
2627 | * ocfs2_xattr_header's position. | ||
2628 | */ | ||
2629 | off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); | ||
2630 | xe = &xb_xh->xh_entries[count - 1]; | ||
2631 | offset = le16_to_cpu(xe->xe_name_offset) + off_change; | ||
2632 | size = blocksize - offset; | ||
2633 | |||
2634 | /* copy all the names and values. */ | ||
2635 | if (data_bh) | ||
2636 | target = data_bh->b_data; | ||
2637 | memcpy(target + offset, src + offset, size); | ||
2638 | |||
2639 | /* Init new header now. */ | ||
2640 | xh->xh_count = xb_xh->xh_count; | ||
2641 | xh->xh_num_buckets = cpu_to_le16(1); | ||
2642 | xh->xh_name_value_len = cpu_to_le16(size); | ||
2643 | xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); | ||
2644 | |||
2645 | /* copy all the entries. */ | ||
2646 | target = xh_bh->b_data; | ||
2647 | offset = offsetof(struct ocfs2_xattr_header, xh_entries); | ||
2648 | size = count * sizeof(struct ocfs2_xattr_entry); | ||
2649 | memcpy(target + offset, (char *)xb_xh + offset, size); | ||
2650 | |||
2651 | /* Change the xe offset for all the xe because of the move. */ | ||
2652 | off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize + | ||
2653 | offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); | ||
2654 | for (i = 0; i < count; i++) | ||
2655 | le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); | ||
2656 | |||
2657 | mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n", | ||
2658 | offset, size, off_change); | ||
2659 | |||
2660 | sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), | ||
2661 | cmp_xe, swap_xe); | ||
2662 | } | ||
2663 | |||
2664 | /* | ||
2665 | * After we move xattr from block to index btree, we have to | ||
2666 | * update ocfs2_xattr_search to the new xe and base. | ||
2667 | * | ||
2668 | * When the entry is in xattr block, xattr_bh indicates the storage place. | ||
2669 | * While if the entry is in index b-tree, "bucket" indicates the | ||
2670 | * real place of the xattr. | ||
2671 | */ | ||
2672 | static int ocfs2_xattr_update_xattr_search(struct inode *inode, | ||
2673 | struct ocfs2_xattr_search *xs, | ||
2674 | struct buffer_head *old_bh, | ||
2675 | struct buffer_head *new_bh) | ||
2676 | { | ||
2677 | int ret = 0; | ||
2678 | char *buf = old_bh->b_data; | ||
2679 | struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; | ||
2680 | struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; | ||
2681 | int i, blocksize = inode->i_sb->s_blocksize; | ||
2682 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
2683 | |||
2684 | xs->bucket.bhs[0] = new_bh; | ||
2685 | get_bh(new_bh); | ||
2686 | xs->bucket.xh = (struct ocfs2_xattr_header *)xs->bucket.bhs[0]->b_data; | ||
2687 | xs->header = xs->bucket.xh; | ||
2688 | |||
2689 | xs->base = new_bh->b_data; | ||
2690 | xs->end = xs->base + inode->i_sb->s_blocksize; | ||
2691 | |||
2692 | if (!xs->not_found) { | ||
2693 | if (OCFS2_XATTR_BUCKET_SIZE != blocksize) { | ||
2694 | ret = ocfs2_read_blocks(inode, | ||
2695 | xs->bucket.bhs[0]->b_blocknr + 1, | ||
2696 | blk_per_bucket - 1, &xs->bucket.bhs[1], | ||
2697 | 0); | ||
2698 | if (ret) { | ||
2699 | mlog_errno(ret); | ||
2700 | return ret; | ||
2701 | } | ||
2702 | |||
2703 | i = xs->here - old_xh->xh_entries; | ||
2704 | xs->here = &xs->header->xh_entries[i]; | ||
2705 | } | ||
2706 | } | ||
2707 | |||
2708 | return ret; | ||
2709 | } | ||
2710 | |||
2711 | static int ocfs2_xattr_create_index_block(struct inode *inode, | ||
2712 | struct ocfs2_xattr_search *xs) | ||
2713 | { | ||
2714 | int ret, credits = OCFS2_SUBALLOC_ALLOC; | ||
2715 | u32 bit_off, len; | ||
2716 | u64 blkno; | ||
2717 | handle_t *handle; | ||
2718 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
2719 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
2720 | struct ocfs2_alloc_context *data_ac; | ||
2721 | struct buffer_head *xh_bh = NULL, *data_bh = NULL; | ||
2722 | struct buffer_head *xb_bh = xs->xattr_bh; | ||
2723 | struct ocfs2_xattr_block *xb = | ||
2724 | (struct ocfs2_xattr_block *)xb_bh->b_data; | ||
2725 | struct ocfs2_xattr_tree_root *xr; | ||
2726 | u16 xb_flags = le16_to_cpu(xb->xb_flags); | ||
2727 | u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
2728 | |||
2729 | mlog(0, "create xattr index block for %llu\n", | ||
2730 | (unsigned long long)xb_bh->b_blocknr); | ||
2731 | |||
2732 | BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); | ||
2733 | |||
2734 | ret = ocfs2_reserve_clusters(osb, 1, &data_ac); | ||
2735 | if (ret) { | ||
2736 | mlog_errno(ret); | ||
2737 | goto out; | ||
2738 | } | ||
2739 | |||
2740 | /* | ||
2741 | * XXX: | ||
2742 | * We can use this lock for now, and maybe move to a dedicated mutex | ||
2743 | * if performance becomes a problem later. | ||
2744 | */ | ||
2745 | down_write(&oi->ip_alloc_sem); | ||
2746 | |||
2747 | /* | ||
2748 | * 3 more credits, one for xattr block update, one for the 1st block | ||
2749 | * of the new xattr bucket and one for the value/data. | ||
2750 | */ | ||
2751 | credits += 3; | ||
2752 | handle = ocfs2_start_trans(osb, credits); | ||
2753 | if (IS_ERR(handle)) { | ||
2754 | ret = PTR_ERR(handle); | ||
2755 | mlog_errno(ret); | ||
2756 | goto out_sem; | ||
2757 | } | ||
2758 | |||
2759 | ret = ocfs2_journal_access(handle, inode, xb_bh, | ||
2760 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2761 | if (ret) { | ||
2762 | mlog_errno(ret); | ||
2763 | goto out_commit; | ||
2764 | } | ||
2765 | |||
2766 | ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len); | ||
2767 | if (ret) { | ||
2768 | mlog_errno(ret); | ||
2769 | goto out_commit; | ||
2770 | } | ||
2771 | |||
2772 | /* | ||
2773 | * The bucket may spread in many blocks, and | ||
2774 | * we will only touch the 1st block and the last block | ||
2775 | * in the whole bucket(one for entry and one for data). | ||
2776 | */ | ||
2777 | blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); | ||
2778 | |||
2779 | mlog(0, "allocate 1 cluster from %llu to xattr block\n", blkno); | ||
2780 | |||
2781 | xh_bh = sb_getblk(inode->i_sb, blkno); | ||
2782 | if (!xh_bh) { | ||
2783 | ret = -EIO; | ||
2784 | mlog_errno(ret); | ||
2785 | goto out_commit; | ||
2786 | } | ||
2787 | |||
2788 | ocfs2_set_new_buffer_uptodate(inode, xh_bh); | ||
2789 | |||
2790 | ret = ocfs2_journal_access(handle, inode, xh_bh, | ||
2791 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
2792 | if (ret) { | ||
2793 | mlog_errno(ret); | ||
2794 | goto out_commit; | ||
2795 | } | ||
2796 | |||
2797 | if (bpb > 1) { | ||
2798 | data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1); | ||
2799 | if (!data_bh) { | ||
2800 | ret = -EIO; | ||
2801 | mlog_errno(ret); | ||
2802 | goto out_commit; | ||
2803 | } | ||
2804 | |||
2805 | ocfs2_set_new_buffer_uptodate(inode, data_bh); | ||
2806 | |||
2807 | ret = ocfs2_journal_access(handle, inode, data_bh, | ||
2808 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
2809 | if (ret) { | ||
2810 | mlog_errno(ret); | ||
2811 | goto out_commit; | ||
2812 | } | ||
2813 | } | ||
2814 | |||
2815 | ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh); | ||
2816 | |||
2817 | ocfs2_journal_dirty(handle, xh_bh); | ||
2818 | if (data_bh) | ||
2819 | ocfs2_journal_dirty(handle, data_bh); | ||
2820 | |||
2821 | ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh); | ||
2822 | |||
2823 | /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ | ||
2824 | memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - | ||
2825 | offsetof(struct ocfs2_xattr_block, xb_attrs)); | ||
2826 | |||
2827 | xr = &xb->xb_attrs.xb_root; | ||
2828 | xr->xt_clusters = cpu_to_le32(1); | ||
2829 | xr->xt_last_eb_blk = 0; | ||
2830 | xr->xt_list.l_tree_depth = 0; | ||
2831 | xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb)); | ||
2832 | xr->xt_list.l_next_free_rec = cpu_to_le16(1); | ||
2833 | |||
2834 | xr->xt_list.l_recs[0].e_cpos = 0; | ||
2835 | xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno); | ||
2836 | xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); | ||
2837 | |||
2838 | xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); | ||
2839 | |||
2840 | ret = ocfs2_journal_dirty(handle, xb_bh); | ||
2841 | if (ret) { | ||
2842 | mlog_errno(ret); | ||
2843 | goto out_commit; | ||
2844 | } | ||
2845 | |||
2846 | out_commit: | ||
2847 | ocfs2_commit_trans(osb, handle); | ||
2848 | |||
2849 | out_sem: | ||
2850 | up_write(&oi->ip_alloc_sem); | ||
2851 | |||
2852 | out: | ||
2853 | if (data_ac) | ||
2854 | ocfs2_free_alloc_context(data_ac); | ||
2855 | |||
2856 | brelse(xh_bh); | ||
2857 | brelse(data_bh); | ||
2858 | |||
2859 | return ret; | ||
2860 | } | ||
2861 | |||
2862 | static int cmp_xe_offset(const void *a, const void *b) | ||
2863 | { | ||
2864 | const struct ocfs2_xattr_entry *l = a, *r = b; | ||
2865 | u32 l_name_offset = le16_to_cpu(l->xe_name_offset); | ||
2866 | u32 r_name_offset = le16_to_cpu(r->xe_name_offset); | ||
2867 | |||
2868 | if (l_name_offset < r_name_offset) | ||
2869 | return 1; | ||
2870 | if (l_name_offset > r_name_offset) | ||
2871 | return -1; | ||
2872 | return 0; | ||
2873 | } | ||
2874 | |||
2875 | /* | ||
2876 | * defrag a xattr bucket if we find that the bucket has some | ||
2877 | * holes beteen name/value pairs. | ||
2878 | * We will move all the name/value pairs to the end of the bucket | ||
2879 | * so that we can spare some space for insertion. | ||
2880 | */ | ||
2881 | static int ocfs2_defrag_xattr_bucket(struct inode *inode, | ||
2882 | struct ocfs2_xattr_bucket *bucket) | ||
2883 | { | ||
2884 | int ret, i; | ||
2885 | size_t end, offset, len, value_len; | ||
2886 | struct ocfs2_xattr_header *xh; | ||
2887 | char *entries, *buf, *bucket_buf = NULL; | ||
2888 | u64 blkno = bucket->bhs[0]->b_blocknr; | ||
2889 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
2890 | u16 xh_free_start; | ||
2891 | size_t blocksize = inode->i_sb->s_blocksize; | ||
2892 | handle_t *handle; | ||
2893 | struct buffer_head **bhs; | ||
2894 | struct ocfs2_xattr_entry *xe; | ||
2895 | |||
2896 | bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, | ||
2897 | GFP_NOFS); | ||
2898 | if (!bhs) | ||
2899 | return -ENOMEM; | ||
2900 | |||
2901 | ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs, 0); | ||
2902 | if (ret) | ||
2903 | goto out; | ||
2904 | |||
2905 | /* | ||
2906 | * In order to make the operation more efficient and generic, | ||
2907 | * we copy all the blocks into a contiguous memory and do the | ||
2908 | * defragment there, so if anything is error, we will not touch | ||
2909 | * the real block. | ||
2910 | */ | ||
2911 | bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS); | ||
2912 | if (!bucket_buf) { | ||
2913 | ret = -EIO; | ||
2914 | goto out; | ||
2915 | } | ||
2916 | |||
2917 | buf = bucket_buf; | ||
2918 | for (i = 0; i < blk_per_bucket; i++, buf += blocksize) | ||
2919 | memcpy(buf, bhs[i]->b_data, blocksize); | ||
2920 | |||
2921 | handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket); | ||
2922 | if (IS_ERR(handle)) { | ||
2923 | ret = PTR_ERR(handle); | ||
2924 | handle = NULL; | ||
2925 | mlog_errno(ret); | ||
2926 | goto out; | ||
2927 | } | ||
2928 | |||
2929 | for (i = 0; i < blk_per_bucket; i++) { | ||
2930 | ret = ocfs2_journal_access(handle, inode, bhs[i], | ||
2931 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2932 | if (ret < 0) { | ||
2933 | mlog_errno(ret); | ||
2934 | goto commit; | ||
2935 | } | ||
2936 | } | ||
2937 | |||
2938 | xh = (struct ocfs2_xattr_header *)bucket_buf; | ||
2939 | entries = (char *)xh->xh_entries; | ||
2940 | xh_free_start = le16_to_cpu(xh->xh_free_start); | ||
2941 | |||
2942 | mlog(0, "adjust xattr bucket in %llu, count = %u, " | ||
2943 | "xh_free_start = %u, xh_name_value_len = %u.\n", | ||
2944 | blkno, le16_to_cpu(xh->xh_count), xh_free_start, | ||
2945 | le16_to_cpu(xh->xh_name_value_len)); | ||
2946 | |||
2947 | /* | ||
2948 | * sort all the entries by their offset. | ||
2949 | * the largest will be the first, so that we can | ||
2950 | * move them to the end one by one. | ||
2951 | */ | ||
2952 | sort(entries, le16_to_cpu(xh->xh_count), | ||
2953 | sizeof(struct ocfs2_xattr_entry), | ||
2954 | cmp_xe_offset, swap_xe); | ||
2955 | |||
2956 | /* Move all name/values to the end of the bucket. */ | ||
2957 | xe = xh->xh_entries; | ||
2958 | end = OCFS2_XATTR_BUCKET_SIZE; | ||
2959 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { | ||
2960 | offset = le16_to_cpu(xe->xe_name_offset); | ||
2961 | if (ocfs2_xattr_is_local(xe)) | ||
2962 | value_len = OCFS2_XATTR_SIZE( | ||
2963 | le64_to_cpu(xe->xe_value_size)); | ||
2964 | else | ||
2965 | value_len = OCFS2_XATTR_ROOT_SIZE; | ||
2966 | len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len; | ||
2967 | |||
2968 | /* | ||
2969 | * We must make sure that the name/value pair | ||
2970 | * exist in the same block. So adjust end to | ||
2971 | * the previous block end if needed. | ||
2972 | */ | ||
2973 | if (((end - len) / blocksize != | ||
2974 | (end - 1) / blocksize)) | ||
2975 | end = end - end % blocksize; | ||
2976 | |||
2977 | if (end > offset + len) { | ||
2978 | memmove(bucket_buf + end - len, | ||
2979 | bucket_buf + offset, len); | ||
2980 | xe->xe_name_offset = cpu_to_le16(end - len); | ||
2981 | } | ||
2982 | |||
2983 | mlog_bug_on_msg(end < offset + len, "Defrag check failed for " | ||
2984 | "bucket %llu\n", (unsigned long long)blkno); | ||
2985 | |||
2986 | end -= len; | ||
2987 | } | ||
2988 | |||
2989 | mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for " | ||
2990 | "bucket %llu\n", (unsigned long long)blkno); | ||
2991 | |||
2992 | if (xh_free_start == end) | ||
2993 | goto commit; | ||
2994 | |||
2995 | memset(bucket_buf + xh_free_start, 0, end - xh_free_start); | ||
2996 | xh->xh_free_start = cpu_to_le16(end); | ||
2997 | |||
2998 | /* sort the entries by their name_hash. */ | ||
2999 | sort(entries, le16_to_cpu(xh->xh_count), | ||
3000 | sizeof(struct ocfs2_xattr_entry), | ||
3001 | cmp_xe, swap_xe); | ||
3002 | |||
3003 | buf = bucket_buf; | ||
3004 | for (i = 0; i < blk_per_bucket; i++, buf += blocksize) { | ||
3005 | memcpy(bhs[i]->b_data, buf, blocksize); | ||
3006 | ocfs2_journal_dirty(handle, bhs[i]); | ||
3007 | } | ||
3008 | |||
3009 | commit: | ||
3010 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
3011 | out: | ||
3012 | |||
3013 | if (bhs) { | ||
3014 | for (i = 0; i < blk_per_bucket; i++) | ||
3015 | brelse(bhs[i]); | ||
3016 | } | ||
3017 | kfree(bhs); | ||
3018 | |||
3019 | kfree(bucket_buf); | ||
3020 | return ret; | ||
3021 | } | ||
3022 | |||
3023 | /* | ||
3024 | * Move half nums of the xattr bucket in the previous cluster to this new | ||
3025 | * cluster. We only touch the last cluster of the previous extend record. | ||
3026 | * | ||
3027 | * first_bh is the first buffer_head of a series of bucket in the same | ||
3028 | * extent rec and header_bh is the header of one bucket in this cluster. | ||
3029 | * They will be updated if we move the data header_bh contains to the new | ||
3030 | * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster. | ||
3031 | */ | ||
3032 | static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, | ||
3033 | handle_t *handle, | ||
3034 | struct buffer_head **first_bh, | ||
3035 | struct buffer_head **header_bh, | ||
3036 | u64 new_blkno, | ||
3037 | u64 prev_blkno, | ||
3038 | u32 num_clusters, | ||
3039 | u32 *first_hash) | ||
3040 | { | ||
3041 | int i, ret, credits; | ||
3042 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
3043 | int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
3044 | int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); | ||
3045 | int blocksize = inode->i_sb->s_blocksize; | ||
3046 | struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL; | ||
3047 | struct ocfs2_xattr_header *new_xh; | ||
3048 | struct ocfs2_xattr_header *xh = | ||
3049 | (struct ocfs2_xattr_header *)((*first_bh)->b_data); | ||
3050 | |||
3051 | BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets); | ||
3052 | BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize); | ||
3053 | |||
3054 | prev_bh = *first_bh; | ||
3055 | get_bh(prev_bh); | ||
3056 | xh = (struct ocfs2_xattr_header *)prev_bh->b_data; | ||
3057 | |||
3058 | prev_blkno += (num_clusters - 1) * bpc + bpc / 2; | ||
3059 | |||
3060 | mlog(0, "move half of xattrs in cluster %llu to %llu\n", | ||
3061 | prev_blkno, new_blkno); | ||
3062 | |||
3063 | /* | ||
3064 | * We need to update the 1st half of the new cluster and | ||
3065 | * 1 more for the update of the 1st bucket of the previous | ||
3066 | * extent record. | ||
3067 | */ | ||
3068 | credits = bpc / 2 + 1; | ||
3069 | ret = ocfs2_extend_trans(handle, credits); | ||
3070 | if (ret) { | ||
3071 | mlog_errno(ret); | ||
3072 | goto out; | ||
3073 | } | ||
3074 | |||
3075 | ret = ocfs2_journal_access(handle, inode, prev_bh, | ||
3076 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3077 | if (ret) { | ||
3078 | mlog_errno(ret); | ||
3079 | goto out; | ||
3080 | } | ||
3081 | |||
3082 | for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) { | ||
3083 | old_bh = new_bh = NULL; | ||
3084 | new_bh = sb_getblk(inode->i_sb, new_blkno); | ||
3085 | if (!new_bh) { | ||
3086 | ret = -EIO; | ||
3087 | mlog_errno(ret); | ||
3088 | goto out; | ||
3089 | } | ||
3090 | |||
3091 | ocfs2_set_new_buffer_uptodate(inode, new_bh); | ||
3092 | |||
3093 | ret = ocfs2_journal_access(handle, inode, new_bh, | ||
3094 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
3095 | if (ret < 0) { | ||
3096 | mlog_errno(ret); | ||
3097 | brelse(new_bh); | ||
3098 | goto out; | ||
3099 | } | ||
3100 | |||
3101 | ret = ocfs2_read_block(inode, prev_blkno, &old_bh); | ||
3102 | if (ret < 0) { | ||
3103 | mlog_errno(ret); | ||
3104 | brelse(new_bh); | ||
3105 | goto out; | ||
3106 | } | ||
3107 | |||
3108 | memcpy(new_bh->b_data, old_bh->b_data, blocksize); | ||
3109 | |||
3110 | if (i == 0) { | ||
3111 | new_xh = (struct ocfs2_xattr_header *)new_bh->b_data; | ||
3112 | new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2); | ||
3113 | |||
3114 | if (first_hash) | ||
3115 | *first_hash = le32_to_cpu( | ||
3116 | new_xh->xh_entries[0].xe_name_hash); | ||
3117 | new_first_bh = new_bh; | ||
3118 | get_bh(new_first_bh); | ||
3119 | } | ||
3120 | |||
3121 | ocfs2_journal_dirty(handle, new_bh); | ||
3122 | |||
3123 | if (*header_bh == old_bh) { | ||
3124 | brelse(*header_bh); | ||
3125 | *header_bh = new_bh; | ||
3126 | get_bh(*header_bh); | ||
3127 | |||
3128 | brelse(*first_bh); | ||
3129 | *first_bh = new_first_bh; | ||
3130 | get_bh(*first_bh); | ||
3131 | } | ||
3132 | brelse(new_bh); | ||
3133 | brelse(old_bh); | ||
3134 | } | ||
3135 | |||
3136 | le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2)); | ||
3137 | |||
3138 | ocfs2_journal_dirty(handle, prev_bh); | ||
3139 | out: | ||
3140 | brelse(prev_bh); | ||
3141 | brelse(new_first_bh); | ||
3142 | return ret; | ||
3143 | } | ||
3144 | |||
3145 | static int ocfs2_read_xattr_bucket(struct inode *inode, | ||
3146 | u64 blkno, | ||
3147 | struct buffer_head **bhs, | ||
3148 | int new) | ||
3149 | { | ||
3150 | int ret = 0; | ||
3151 | u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
3152 | |||
3153 | if (!new) | ||
3154 | return ocfs2_read_blocks(inode, blkno, | ||
3155 | blk_per_bucket, bhs, 0); | ||
3156 | |||
3157 | for (i = 0; i < blk_per_bucket; i++) { | ||
3158 | bhs[i] = sb_getblk(inode->i_sb, blkno + i); | ||
3159 | if (bhs[i] == NULL) { | ||
3160 | ret = -EIO; | ||
3161 | mlog_errno(ret); | ||
3162 | break; | ||
3163 | } | ||
3164 | ocfs2_set_new_buffer_uptodate(inode, bhs[i]); | ||
3165 | } | ||
3166 | |||
3167 | return ret; | ||
3168 | } | ||
3169 | |||
3170 | /* | ||
3171 | * Move half num of the xattrs in old bucket(blk) to new bucket(new_blk). | ||
3172 | * first_hash will record the 1st hash of the new bucket. | ||
3173 | */ | ||
3174 | static int ocfs2_half_xattr_bucket(struct inode *inode, | ||
3175 | handle_t *handle, | ||
3176 | u64 blk, | ||
3177 | u64 new_blk, | ||
3178 | u32 *first_hash, | ||
3179 | int new_bucket_head) | ||
3180 | { | ||
3181 | int ret, i; | ||
3182 | u16 count, start, len, name_value_len, xe_len, name_offset; | ||
3183 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
3184 | struct buffer_head **s_bhs, **t_bhs = NULL; | ||
3185 | struct ocfs2_xattr_header *xh; | ||
3186 | struct ocfs2_xattr_entry *xe; | ||
3187 | int blocksize = inode->i_sb->s_blocksize; | ||
3188 | |||
3189 | mlog(0, "move half of xattrs from bucket %llu to %llu\n", | ||
3190 | blk, new_blk); | ||
3191 | |||
3192 | s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); | ||
3193 | if (!s_bhs) | ||
3194 | return -ENOMEM; | ||
3195 | |||
3196 | ret = ocfs2_read_xattr_bucket(inode, blk, s_bhs, 0); | ||
3197 | if (ret) { | ||
3198 | mlog_errno(ret); | ||
3199 | goto out; | ||
3200 | } | ||
3201 | |||
3202 | ret = ocfs2_journal_access(handle, inode, s_bhs[0], | ||
3203 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3204 | if (ret) { | ||
3205 | mlog_errno(ret); | ||
3206 | goto out; | ||
3207 | } | ||
3208 | |||
3209 | t_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); | ||
3210 | if (!t_bhs) { | ||
3211 | ret = -ENOMEM; | ||
3212 | goto out; | ||
3213 | } | ||
3214 | |||
3215 | ret = ocfs2_read_xattr_bucket(inode, new_blk, t_bhs, new_bucket_head); | ||
3216 | if (ret) { | ||
3217 | mlog_errno(ret); | ||
3218 | goto out; | ||
3219 | } | ||
3220 | |||
3221 | for (i = 0; i < blk_per_bucket; i++) { | ||
3222 | ret = ocfs2_journal_access(handle, inode, t_bhs[i], | ||
3223 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
3224 | if (ret) { | ||
3225 | mlog_errno(ret); | ||
3226 | goto out; | ||
3227 | } | ||
3228 | } | ||
3229 | |||
3230 | /* copy the whole bucket to the new first. */ | ||
3231 | for (i = 0; i < blk_per_bucket; i++) | ||
3232 | memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize); | ||
3233 | |||
3234 | /* update the new bucket. */ | ||
3235 | xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; | ||
3236 | count = le16_to_cpu(xh->xh_count); | ||
3237 | start = count / 2; | ||
3238 | |||
3239 | /* | ||
3240 | * Calculate the total name/value len and xh_free_start for | ||
3241 | * the old bucket first. | ||
3242 | */ | ||
3243 | name_offset = OCFS2_XATTR_BUCKET_SIZE; | ||
3244 | name_value_len = 0; | ||
3245 | for (i = 0; i < start; i++) { | ||
3246 | xe = &xh->xh_entries[i]; | ||
3247 | xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len); | ||
3248 | if (ocfs2_xattr_is_local(xe)) | ||
3249 | xe_len += | ||
3250 | OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); | ||
3251 | else | ||
3252 | xe_len += OCFS2_XATTR_ROOT_SIZE; | ||
3253 | name_value_len += xe_len; | ||
3254 | if (le16_to_cpu(xe->xe_name_offset) < name_offset) | ||
3255 | name_offset = le16_to_cpu(xe->xe_name_offset); | ||
3256 | } | ||
3257 | |||
3258 | /* | ||
3259 | * Now begin the modification to the new bucket. | ||
3260 | * | ||
3261 | * In the new bucket, We just move the xattr entry to the beginning | ||
3262 | * and don't touch the name/value. So there will be some holes in the | ||
3263 | * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is | ||
3264 | * called. | ||
3265 | */ | ||
3266 | xe = &xh->xh_entries[start]; | ||
3267 | len = sizeof(struct ocfs2_xattr_entry) * (count - start); | ||
3268 | mlog(0, "mv xattr entry len %d from %d to %d\n", len, | ||
3269 | (int)((char *)xe - (char *)xh), | ||
3270 | (int)((char *)xh->xh_entries - (char *)xh)); | ||
3271 | memmove((char *)xh->xh_entries, (char *)xe, len); | ||
3272 | xe = &xh->xh_entries[count - start]; | ||
3273 | len = sizeof(struct ocfs2_xattr_entry) * start; | ||
3274 | memset((char *)xe, 0, len); | ||
3275 | |||
3276 | le16_add_cpu(&xh->xh_count, -start); | ||
3277 | le16_add_cpu(&xh->xh_name_value_len, -name_value_len); | ||
3278 | |||
3279 | /* Calculate xh_free_start for the new bucket. */ | ||
3280 | xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); | ||
3281 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { | ||
3282 | xe = &xh->xh_entries[i]; | ||
3283 | xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len); | ||
3284 | if (ocfs2_xattr_is_local(xe)) | ||
3285 | xe_len += | ||
3286 | OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); | ||
3287 | else | ||
3288 | xe_len += OCFS2_XATTR_ROOT_SIZE; | ||
3289 | if (le16_to_cpu(xe->xe_name_offset) < | ||
3290 | le16_to_cpu(xh->xh_free_start)) | ||
3291 | xh->xh_free_start = xe->xe_name_offset; | ||
3292 | } | ||
3293 | |||
3294 | /* set xh->xh_num_buckets for the new xh. */ | ||
3295 | if (new_bucket_head) | ||
3296 | xh->xh_num_buckets = cpu_to_le16(1); | ||
3297 | else | ||
3298 | xh->xh_num_buckets = 0; | ||
3299 | |||
3300 | for (i = 0; i < blk_per_bucket; i++) { | ||
3301 | ocfs2_journal_dirty(handle, t_bhs[i]); | ||
3302 | if (ret) | ||
3303 | mlog_errno(ret); | ||
3304 | } | ||
3305 | |||
3306 | /* store the first_hash of the new bucket. */ | ||
3307 | if (first_hash) | ||
3308 | *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); | ||
3309 | |||
3310 | /* | ||
3311 | * Now only update the 1st block of the old bucket. | ||
3312 | * Please note that the entry has been sorted already above. | ||
3313 | */ | ||
3314 | xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; | ||
3315 | memset(&xh->xh_entries[start], 0, | ||
3316 | sizeof(struct ocfs2_xattr_entry) * (count - start)); | ||
3317 | xh->xh_count = cpu_to_le16(start); | ||
3318 | xh->xh_free_start = cpu_to_le16(name_offset); | ||
3319 | xh->xh_name_value_len = cpu_to_le16(name_value_len); | ||
3320 | |||
3321 | ocfs2_journal_dirty(handle, s_bhs[0]); | ||
3322 | if (ret) | ||
3323 | mlog_errno(ret); | ||
3324 | |||
3325 | out: | ||
3326 | if (s_bhs) { | ||
3327 | for (i = 0; i < blk_per_bucket; i++) | ||
3328 | brelse(s_bhs[i]); | ||
3329 | } | ||
3330 | kfree(s_bhs); | ||
3331 | |||
3332 | if (t_bhs) { | ||
3333 | for (i = 0; i < blk_per_bucket; i++) | ||
3334 | brelse(t_bhs[i]); | ||
3335 | } | ||
3336 | kfree(t_bhs); | ||
3337 | |||
3338 | return ret; | ||
3339 | } | ||
3340 | |||
3341 | /* | ||
3342 | * Copy xattr from one bucket to another bucket. | ||
3343 | * | ||
3344 | * The caller must make sure that the journal transaction | ||
3345 | * has enough space for journaling. | ||
3346 | */ | ||
3347 | static int ocfs2_cp_xattr_bucket(struct inode *inode, | ||
3348 | handle_t *handle, | ||
3349 | u64 s_blkno, | ||
3350 | u64 t_blkno, | ||
3351 | int t_is_new) | ||
3352 | { | ||
3353 | int ret, i; | ||
3354 | int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
3355 | int blocksize = inode->i_sb->s_blocksize; | ||
3356 | struct buffer_head **s_bhs, **t_bhs = NULL; | ||
3357 | |||
3358 | BUG_ON(s_blkno == t_blkno); | ||
3359 | |||
3360 | mlog(0, "cp bucket %llu to %llu, target is %d\n", | ||
3361 | s_blkno, t_blkno, t_is_new); | ||
3362 | |||
3363 | s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, | ||
3364 | GFP_NOFS); | ||
3365 | if (!s_bhs) | ||
3366 | return -ENOMEM; | ||
3367 | |||
3368 | ret = ocfs2_read_xattr_bucket(inode, s_blkno, s_bhs, 0); | ||
3369 | if (ret) | ||
3370 | goto out; | ||
3371 | |||
3372 | t_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, | ||
3373 | GFP_NOFS); | ||
3374 | if (!t_bhs) { | ||
3375 | ret = -ENOMEM; | ||
3376 | goto out; | ||
3377 | } | ||
3378 | |||
3379 | ret = ocfs2_read_xattr_bucket(inode, t_blkno, t_bhs, t_is_new); | ||
3380 | if (ret) | ||
3381 | goto out; | ||
3382 | |||
3383 | for (i = 0; i < blk_per_bucket; i++) { | ||
3384 | ret = ocfs2_journal_access(handle, inode, t_bhs[i], | ||
3385 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3386 | if (ret) | ||
3387 | goto out; | ||
3388 | } | ||
3389 | |||
3390 | for (i = 0; i < blk_per_bucket; i++) { | ||
3391 | memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize); | ||
3392 | ocfs2_journal_dirty(handle, t_bhs[i]); | ||
3393 | } | ||
3394 | |||
3395 | out: | ||
3396 | if (s_bhs) { | ||
3397 | for (i = 0; i < blk_per_bucket; i++) | ||
3398 | brelse(s_bhs[i]); | ||
3399 | } | ||
3400 | kfree(s_bhs); | ||
3401 | |||
3402 | if (t_bhs) { | ||
3403 | for (i = 0; i < blk_per_bucket; i++) | ||
3404 | brelse(t_bhs[i]); | ||
3405 | } | ||
3406 | kfree(t_bhs); | ||
3407 | |||
3408 | return ret; | ||
3409 | } | ||
3410 | |||
3411 | /* | ||
3412 | * Copy one xattr cluster from src_blk to to_blk. | ||
3413 | * The to_blk will become the first bucket header of the cluster, so its | ||
3414 | * xh_num_buckets will be initialized as the bucket num in the cluster. | ||
3415 | */ | ||
3416 | static int ocfs2_cp_xattr_cluster(struct inode *inode, | ||
3417 | handle_t *handle, | ||
3418 | struct buffer_head *first_bh, | ||
3419 | u64 src_blk, | ||
3420 | u64 to_blk, | ||
3421 | u32 *first_hash) | ||
3422 | { | ||
3423 | int i, ret, credits; | ||
3424 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
3425 | int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
3426 | int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); | ||
3427 | struct buffer_head *bh = NULL; | ||
3428 | struct ocfs2_xattr_header *xh; | ||
3429 | u64 to_blk_start = to_blk; | ||
3430 | |||
3431 | mlog(0, "cp xattrs from cluster %llu to %llu\n", src_blk, to_blk); | ||
3432 | |||
3433 | /* | ||
3434 | * We need to update the new cluster and 1 more for the update of | ||
3435 | * the 1st bucket of the previous extent rec. | ||
3436 | */ | ||
3437 | credits = bpc + 1; | ||
3438 | ret = ocfs2_extend_trans(handle, credits); | ||
3439 | if (ret) { | ||
3440 | mlog_errno(ret); | ||
3441 | goto out; | ||
3442 | } | ||
3443 | |||
3444 | ret = ocfs2_journal_access(handle, inode, first_bh, | ||
3445 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3446 | if (ret) { | ||
3447 | mlog_errno(ret); | ||
3448 | goto out; | ||
3449 | } | ||
3450 | |||
3451 | for (i = 0; i < num_buckets; i++) { | ||
3452 | ret = ocfs2_cp_xattr_bucket(inode, handle, | ||
3453 | src_blk, to_blk, 1); | ||
3454 | if (ret) { | ||
3455 | mlog_errno(ret); | ||
3456 | goto out; | ||
3457 | } | ||
3458 | |||
3459 | src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
3460 | to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
3461 | } | ||
3462 | |||
3463 | /* update the old bucket header. */ | ||
3464 | xh = (struct ocfs2_xattr_header *)first_bh->b_data; | ||
3465 | le16_add_cpu(&xh->xh_num_buckets, -num_buckets); | ||
3466 | |||
3467 | ocfs2_journal_dirty(handle, first_bh); | ||
3468 | |||
3469 | /* update the new bucket header. */ | ||
3470 | ret = ocfs2_read_block(inode, to_blk_start, &bh); | ||
3471 | if (ret < 0) { | ||
3472 | mlog_errno(ret); | ||
3473 | goto out; | ||
3474 | } | ||
3475 | |||
3476 | ret = ocfs2_journal_access(handle, inode, bh, | ||
3477 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3478 | if (ret) { | ||
3479 | mlog_errno(ret); | ||
3480 | goto out; | ||
3481 | } | ||
3482 | |||
3483 | xh = (struct ocfs2_xattr_header *)bh->b_data; | ||
3484 | xh->xh_num_buckets = cpu_to_le16(num_buckets); | ||
3485 | |||
3486 | ocfs2_journal_dirty(handle, bh); | ||
3487 | |||
3488 | if (first_hash) | ||
3489 | *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); | ||
3490 | out: | ||
3491 | brelse(bh); | ||
3492 | return ret; | ||
3493 | } | ||
3494 | |||
3495 | /* | ||
3496 | * Move half of the xattrs in this cluster to the new cluster. | ||
3497 | * This function should only be called when bucket size == cluster size. | ||
3498 | * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. | ||
3499 | */ | ||
3500 | static int ocfs2_half_xattr_cluster(struct inode *inode, | ||
3501 | handle_t *handle, | ||
3502 | u64 prev_blk, | ||
3503 | u64 new_blk, | ||
3504 | u32 *first_hash) | ||
3505 | { | ||
3506 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
3507 | int ret, credits = 2 * blk_per_bucket; | ||
3508 | |||
3509 | BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); | ||
3510 | |||
3511 | ret = ocfs2_extend_trans(handle, credits); | ||
3512 | if (ret) { | ||
3513 | mlog_errno(ret); | ||
3514 | return ret; | ||
3515 | } | ||
3516 | |||
3517 | /* Move half of the xattr in start_blk to the next bucket. */ | ||
3518 | return ocfs2_half_xattr_bucket(inode, handle, prev_blk, | ||
3519 | new_blk, first_hash, 1); | ||
3520 | } | ||
3521 | |||
3522 | /* | ||
3523 | * Move some xattrs from the old cluster to the new one since they are not | ||
3524 | * contiguous in ocfs2 xattr tree. | ||
3525 | * | ||
3526 | * new_blk starts a new separate cluster, and we will move some xattrs from | ||
3527 | * prev_blk to it. v_start will be set as the first name hash value in this | ||
3528 | * new cluster so that it can be used as e_cpos during tree insertion and | ||
3529 | * don't collide with our original b-tree operations. first_bh and header_bh | ||
3530 | * will also be updated since they will be used in ocfs2_extend_xattr_bucket | ||
3531 | * to extend the insert bucket. | ||
3532 | * | ||
3533 | * The problem is how much xattr should we move to the new one and when should | ||
3534 | * we update first_bh and header_bh? | ||
3535 | * 1. If cluster size > bucket size, that means the previous cluster has more | ||
3536 | * than 1 bucket, so just move half nums of bucket into the new cluster and | ||
3537 | * update the first_bh and header_bh if the insert bucket has been moved | ||
3538 | * to the new cluster. | ||
3539 | * 2. If cluster_size == bucket_size: | ||
3540 | * a) If the previous extent rec has more than one cluster and the insert | ||
3541 | * place isn't in the last cluster, copy the entire last cluster to the | ||
3542 | * new one. This time, we don't need to upate the first_bh and header_bh | ||
3543 | * since they will not be moved into the new cluster. | ||
3544 | * b) Otherwise, move the bottom half of the xattrs in the last cluster into | ||
3545 | * the new one. And we set the extend flag to zero if the insert place is | ||
3546 | * moved into the new allocated cluster since no extend is needed. | ||
3547 | */ | ||
3548 | static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, | ||
3549 | handle_t *handle, | ||
3550 | struct buffer_head **first_bh, | ||
3551 | struct buffer_head **header_bh, | ||
3552 | u64 new_blk, | ||
3553 | u64 prev_blk, | ||
3554 | u32 prev_clusters, | ||
3555 | u32 *v_start, | ||
3556 | int *extend) | ||
3557 | { | ||
3558 | int ret = 0; | ||
3559 | int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
3560 | |||
3561 | mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n", | ||
3562 | prev_blk, prev_clusters, new_blk); | ||
3563 | |||
3564 | if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) | ||
3565 | ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, | ||
3566 | handle, | ||
3567 | first_bh, | ||
3568 | header_bh, | ||
3569 | new_blk, | ||
3570 | prev_blk, | ||
3571 | prev_clusters, | ||
3572 | v_start); | ||
3573 | else { | ||
3574 | u64 last_blk = prev_blk + bpc * (prev_clusters - 1); | ||
3575 | |||
3576 | if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk) | ||
3577 | ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh, | ||
3578 | last_blk, new_blk, | ||
3579 | v_start); | ||
3580 | else { | ||
3581 | ret = ocfs2_half_xattr_cluster(inode, handle, | ||
3582 | last_blk, new_blk, | ||
3583 | v_start); | ||
3584 | |||
3585 | if ((*header_bh)->b_blocknr == last_blk && extend) | ||
3586 | *extend = 0; | ||
3587 | } | ||
3588 | } | ||
3589 | |||
3590 | return ret; | ||
3591 | } | ||
3592 | |||
3593 | /* | ||
3594 | * Add a new cluster for xattr storage. | ||
3595 | * | ||
3596 | * If the new cluster is contiguous with the previous one, it will be | ||
3597 | * appended to the same extent record, and num_clusters will be updated. | ||
3598 | * If not, we will insert a new extent for it and move some xattrs in | ||
3599 | * the last cluster into the new allocated one. | ||
3600 | * We also need to limit the maximum size of a btree leaf, otherwise we'll | ||
3601 | * lose the benefits of hashing because we'll have to search large leaves. | ||
3602 | * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize, | ||
3603 | * if it's bigger). | ||
3604 | * | ||
3605 | * first_bh is the first block of the previous extent rec and header_bh | ||
3606 | * indicates the bucket we will insert the new xattrs. They will be updated | ||
3607 | * when the header_bh is moved into the new cluster. | ||
3608 | */ | ||
3609 | static int ocfs2_add_new_xattr_cluster(struct inode *inode, | ||
3610 | struct buffer_head *root_bh, | ||
3611 | struct buffer_head **first_bh, | ||
3612 | struct buffer_head **header_bh, | ||
3613 | u32 *num_clusters, | ||
3614 | u32 prev_cpos, | ||
3615 | u64 prev_blkno, | ||
3616 | int *extend) | ||
3617 | { | ||
3618 | int ret, credits; | ||
3619 | u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
3620 | u32 prev_clusters = *num_clusters; | ||
3621 | u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; | ||
3622 | u64 block; | ||
3623 | handle_t *handle = NULL; | ||
3624 | struct ocfs2_alloc_context *data_ac = NULL; | ||
3625 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
3626 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
3627 | struct ocfs2_extent_tree et; | ||
3628 | |||
3629 | mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, " | ||
3630 | "previous xattr blkno = %llu\n", | ||
3631 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
3632 | prev_cpos, prev_blkno); | ||
3633 | |||
3634 | ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); | ||
3635 | |||
3636 | ret = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, | ||
3637 | &data_ac, &meta_ac); | ||
3638 | if (ret) { | ||
3639 | mlog_errno(ret); | ||
3640 | goto leave; | ||
3641 | } | ||
3642 | |||
3643 | credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el, | ||
3644 | clusters_to_add); | ||
3645 | handle = ocfs2_start_trans(osb, credits); | ||
3646 | if (IS_ERR(handle)) { | ||
3647 | ret = PTR_ERR(handle); | ||
3648 | handle = NULL; | ||
3649 | mlog_errno(ret); | ||
3650 | goto leave; | ||
3651 | } | ||
3652 | |||
3653 | ret = ocfs2_journal_access(handle, inode, root_bh, | ||
3654 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3655 | if (ret < 0) { | ||
3656 | mlog_errno(ret); | ||
3657 | goto leave; | ||
3658 | } | ||
3659 | |||
3660 | ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1, | ||
3661 | clusters_to_add, &bit_off, &num_bits); | ||
3662 | if (ret < 0) { | ||
3663 | if (ret != -ENOSPC) | ||
3664 | mlog_errno(ret); | ||
3665 | goto leave; | ||
3666 | } | ||
3667 | |||
3668 | BUG_ON(num_bits > clusters_to_add); | ||
3669 | |||
3670 | block = ocfs2_clusters_to_blocks(osb->sb, bit_off); | ||
3671 | mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n", | ||
3672 | num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
3673 | |||
3674 | if (prev_blkno + prev_clusters * bpc == block && | ||
3675 | (prev_clusters + num_bits) << osb->s_clustersize_bits <= | ||
3676 | OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { | ||
3677 | /* | ||
3678 | * If this cluster is contiguous with the old one and | ||
3679 | * adding this new cluster, we don't surpass the limit of | ||
3680 | * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be | ||
3681 | * initialized and used like other buckets in the previous | ||
3682 | * cluster. | ||
3683 | * So add it as a contiguous one. The caller will handle | ||
3684 | * its init process. | ||
3685 | */ | ||
3686 | v_start = prev_cpos + prev_clusters; | ||
3687 | *num_clusters = prev_clusters + num_bits; | ||
3688 | mlog(0, "Add contiguous %u clusters to previous extent rec.\n", | ||
3689 | num_bits); | ||
3690 | } else { | ||
3691 | ret = ocfs2_adjust_xattr_cross_cluster(inode, | ||
3692 | handle, | ||
3693 | first_bh, | ||
3694 | header_bh, | ||
3695 | block, | ||
3696 | prev_blkno, | ||
3697 | prev_clusters, | ||
3698 | &v_start, | ||
3699 | extend); | ||
3700 | if (ret) { | ||
3701 | mlog_errno(ret); | ||
3702 | goto leave; | ||
3703 | } | ||
3704 | } | ||
3705 | |||
3706 | if (handle->h_buffer_credits < credits) { | ||
3707 | /* | ||
3708 | * The journal has been restarted before, and don't | ||
3709 | * have enough space for the insertion, so extend it | ||
3710 | * here. | ||
3711 | */ | ||
3712 | ret = ocfs2_extend_trans(handle, credits); | ||
3713 | if (ret) { | ||
3714 | mlog_errno(ret); | ||
3715 | goto leave; | ||
3716 | } | ||
3717 | } | ||
3718 | mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", | ||
3719 | num_bits, block, v_start); | ||
3720 | ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block, | ||
3721 | num_bits, 0, meta_ac); | ||
3722 | if (ret < 0) { | ||
3723 | mlog_errno(ret); | ||
3724 | goto leave; | ||
3725 | } | ||
3726 | |||
3727 | ret = ocfs2_journal_dirty(handle, root_bh); | ||
3728 | if (ret < 0) { | ||
3729 | mlog_errno(ret); | ||
3730 | goto leave; | ||
3731 | } | ||
3732 | |||
3733 | leave: | ||
3734 | if (handle) | ||
3735 | ocfs2_commit_trans(osb, handle); | ||
3736 | if (data_ac) | ||
3737 | ocfs2_free_alloc_context(data_ac); | ||
3738 | if (meta_ac) | ||
3739 | ocfs2_free_alloc_context(meta_ac); | ||
3740 | |||
3741 | return ret; | ||
3742 | } | ||
3743 | |||
3744 | /* | ||
3745 | * Extend a new xattr bucket and move xattrs to the end one by one until | ||
3746 | * We meet with start_bh. Only move half of the xattrs to the bucket after it. | ||
3747 | */ | ||
3748 | static int ocfs2_extend_xattr_bucket(struct inode *inode, | ||
3749 | struct buffer_head *first_bh, | ||
3750 | struct buffer_head *start_bh, | ||
3751 | u32 num_clusters) | ||
3752 | { | ||
3753 | int ret, credits; | ||
3754 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
3755 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
3756 | u64 start_blk = start_bh->b_blocknr, end_blk; | ||
3757 | u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb); | ||
3758 | handle_t *handle; | ||
3759 | struct ocfs2_xattr_header *first_xh = | ||
3760 | (struct ocfs2_xattr_header *)first_bh->b_data; | ||
3761 | u16 bucket = le16_to_cpu(first_xh->xh_num_buckets); | ||
3762 | |||
3763 | mlog(0, "extend xattr bucket in %llu, xattr extend rec starting " | ||
3764 | "from %llu, len = %u\n", start_blk, | ||
3765 | (unsigned long long)first_bh->b_blocknr, num_clusters); | ||
3766 | |||
3767 | BUG_ON(bucket >= num_buckets); | ||
3768 | |||
3769 | end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket; | ||
3770 | |||
3771 | /* | ||
3772 | * We will touch all the buckets after the start_bh(include it). | ||
3773 | * Add one more bucket and modify the first_bh. | ||
3774 | */ | ||
3775 | credits = end_blk - start_blk + 2 * blk_per_bucket + 1; | ||
3776 | handle = ocfs2_start_trans(osb, credits); | ||
3777 | if (IS_ERR(handle)) { | ||
3778 | ret = PTR_ERR(handle); | ||
3779 | handle = NULL; | ||
3780 | mlog_errno(ret); | ||
3781 | goto out; | ||
3782 | } | ||
3783 | |||
3784 | ret = ocfs2_journal_access(handle, inode, first_bh, | ||
3785 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3786 | if (ret) { | ||
3787 | mlog_errno(ret); | ||
3788 | goto commit; | ||
3789 | } | ||
3790 | |||
3791 | while (end_blk != start_blk) { | ||
3792 | ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, | ||
3793 | end_blk + blk_per_bucket, 0); | ||
3794 | if (ret) | ||
3795 | goto commit; | ||
3796 | end_blk -= blk_per_bucket; | ||
3797 | } | ||
3798 | |||
3799 | /* Move half of the xattr in start_blk to the next bucket. */ | ||
3800 | ret = ocfs2_half_xattr_bucket(inode, handle, start_blk, | ||
3801 | start_blk + blk_per_bucket, NULL, 0); | ||
3802 | |||
3803 | le16_add_cpu(&first_xh->xh_num_buckets, 1); | ||
3804 | ocfs2_journal_dirty(handle, first_bh); | ||
3805 | |||
3806 | commit: | ||
3807 | ocfs2_commit_trans(osb, handle); | ||
3808 | out: | ||
3809 | return ret; | ||
3810 | } | ||
3811 | |||
3812 | /* | ||
3813 | * Add new xattr bucket in an extent record and adjust the buckets accordingly. | ||
3814 | * xb_bh is the ocfs2_xattr_block. | ||
3815 | * We will move all the buckets starting from header_bh to the next place. As | ||
3816 | * for this one, half num of its xattrs will be moved to the next one. | ||
3817 | * | ||
3818 | * We will allocate a new cluster if current cluster is full and adjust | ||
3819 | * header_bh and first_bh if the insert place is moved to the new cluster. | ||
3820 | */ | ||
3821 | static int ocfs2_add_new_xattr_bucket(struct inode *inode, | ||
3822 | struct buffer_head *xb_bh, | ||
3823 | struct buffer_head *header_bh) | ||
3824 | { | ||
3825 | struct ocfs2_xattr_header *first_xh = NULL; | ||
3826 | struct buffer_head *first_bh = NULL; | ||
3827 | struct ocfs2_xattr_block *xb = | ||
3828 | (struct ocfs2_xattr_block *)xb_bh->b_data; | ||
3829 | struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; | ||
3830 | struct ocfs2_extent_list *el = &xb_root->xt_list; | ||
3831 | struct ocfs2_xattr_header *xh = | ||
3832 | (struct ocfs2_xattr_header *)header_bh->b_data; | ||
3833 | u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); | ||
3834 | struct super_block *sb = inode->i_sb; | ||
3835 | struct ocfs2_super *osb = OCFS2_SB(sb); | ||
3836 | int ret, num_buckets, extend = 1; | ||
3837 | u64 p_blkno; | ||
3838 | u32 e_cpos, num_clusters; | ||
3839 | |||
3840 | mlog(0, "Add new xattr bucket starting form %llu\n", | ||
3841 | (unsigned long long)header_bh->b_blocknr); | ||
3842 | |||
3843 | /* | ||
3844 | * Add refrence for header_bh here because it may be | ||
3845 | * changed in ocfs2_add_new_xattr_cluster and we need | ||
3846 | * to free it in the end. | ||
3847 | */ | ||
3848 | get_bh(header_bh); | ||
3849 | |||
3850 | ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, | ||
3851 | &num_clusters, el); | ||
3852 | if (ret) { | ||
3853 | mlog_errno(ret); | ||
3854 | goto out; | ||
3855 | } | ||
3856 | |||
3857 | ret = ocfs2_read_block(inode, p_blkno, &first_bh); | ||
3858 | if (ret) { | ||
3859 | mlog_errno(ret); | ||
3860 | goto out; | ||
3861 | } | ||
3862 | |||
3863 | num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; | ||
3864 | first_xh = (struct ocfs2_xattr_header *)first_bh->b_data; | ||
3865 | |||
3866 | if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) { | ||
3867 | ret = ocfs2_add_new_xattr_cluster(inode, | ||
3868 | xb_bh, | ||
3869 | &first_bh, | ||
3870 | &header_bh, | ||
3871 | &num_clusters, | ||
3872 | e_cpos, | ||
3873 | p_blkno, | ||
3874 | &extend); | ||
3875 | if (ret) { | ||
3876 | mlog_errno(ret); | ||
3877 | goto out; | ||
3878 | } | ||
3879 | } | ||
3880 | |||
3881 | if (extend) | ||
3882 | ret = ocfs2_extend_xattr_bucket(inode, | ||
3883 | first_bh, | ||
3884 | header_bh, | ||
3885 | num_clusters); | ||
3886 | if (ret) | ||
3887 | mlog_errno(ret); | ||
3888 | out: | ||
3889 | brelse(first_bh); | ||
3890 | brelse(header_bh); | ||
3891 | return ret; | ||
3892 | } | ||
3893 | |||
3894 | static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode, | ||
3895 | struct ocfs2_xattr_bucket *bucket, | ||
3896 | int offs) | ||
3897 | { | ||
3898 | int block_off = offs >> inode->i_sb->s_blocksize_bits; | ||
3899 | |||
3900 | offs = offs % inode->i_sb->s_blocksize; | ||
3901 | return bucket->bhs[block_off]->b_data + offs; | ||
3902 | } | ||
3903 | |||
3904 | /* | ||
3905 | * Handle the normal xattr set, including replace, delete and new. | ||
3906 | * | ||
3907 | * Note: "local" indicates the real data's locality. So we can't | ||
3908 | * just its bucket locality by its length. | ||
3909 | */ | ||
3910 | static void ocfs2_xattr_set_entry_normal(struct inode *inode, | ||
3911 | struct ocfs2_xattr_info *xi, | ||
3912 | struct ocfs2_xattr_search *xs, | ||
3913 | u32 name_hash, | ||
3914 | int local) | ||
3915 | { | ||
3916 | struct ocfs2_xattr_entry *last, *xe; | ||
3917 | int name_len = strlen(xi->name); | ||
3918 | struct ocfs2_xattr_header *xh = xs->header; | ||
3919 | u16 count = le16_to_cpu(xh->xh_count), start; | ||
3920 | size_t blocksize = inode->i_sb->s_blocksize; | ||
3921 | char *val; | ||
3922 | size_t offs, size, new_size; | ||
3923 | |||
3924 | last = &xh->xh_entries[count]; | ||
3925 | if (!xs->not_found) { | ||
3926 | xe = xs->here; | ||
3927 | offs = le16_to_cpu(xe->xe_name_offset); | ||
3928 | if (ocfs2_xattr_is_local(xe)) | ||
3929 | size = OCFS2_XATTR_SIZE(name_len) + | ||
3930 | OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); | ||
3931 | else | ||
3932 | size = OCFS2_XATTR_SIZE(name_len) + | ||
3933 | OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE); | ||
3934 | |||
3935 | /* | ||
3936 | * If the new value will be stored outside, xi->value has been | ||
3937 | * initalized as an empty ocfs2_xattr_value_root, and the same | ||
3938 | * goes with xi->value_len, so we can set new_size safely here. | ||
3939 | * See ocfs2_xattr_set_in_bucket. | ||
3940 | */ | ||
3941 | new_size = OCFS2_XATTR_SIZE(name_len) + | ||
3942 | OCFS2_XATTR_SIZE(xi->value_len); | ||
3943 | |||
3944 | le16_add_cpu(&xh->xh_name_value_len, -size); | ||
3945 | if (xi->value) { | ||
3946 | if (new_size > size) | ||
3947 | goto set_new_name_value; | ||
3948 | |||
3949 | /* Now replace the old value with new one. */ | ||
3950 | if (local) | ||
3951 | xe->xe_value_size = cpu_to_le64(xi->value_len); | ||
3952 | else | ||
3953 | xe->xe_value_size = 0; | ||
3954 | |||
3955 | val = ocfs2_xattr_bucket_get_val(inode, | ||
3956 | &xs->bucket, offs); | ||
3957 | memset(val + OCFS2_XATTR_SIZE(name_len), 0, | ||
3958 | size - OCFS2_XATTR_SIZE(name_len)); | ||
3959 | if (OCFS2_XATTR_SIZE(xi->value_len) > 0) | ||
3960 | memcpy(val + OCFS2_XATTR_SIZE(name_len), | ||
3961 | xi->value, xi->value_len); | ||
3962 | |||
3963 | le16_add_cpu(&xh->xh_name_value_len, new_size); | ||
3964 | ocfs2_xattr_set_local(xe, local); | ||
3965 | return; | ||
3966 | } else { | ||
3967 | /* | ||
3968 | * Remove the old entry if there is more than one. | ||
3969 | * We don't remove the last entry so that we can | ||
3970 | * use it to indicate the hash value of the empty | ||
3971 | * bucket. | ||
3972 | */ | ||
3973 | last -= 1; | ||
3974 | le16_add_cpu(&xh->xh_count, -1); | ||
3975 | if (xh->xh_count) { | ||
3976 | memmove(xe, xe + 1, | ||
3977 | (void *)last - (void *)xe); | ||
3978 | memset(last, 0, | ||
3979 | sizeof(struct ocfs2_xattr_entry)); | ||
3980 | } else | ||
3981 | xh->xh_free_start = | ||
3982 | cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); | ||
3983 | |||
3984 | return; | ||
3985 | } | ||
3986 | } else { | ||
3987 | /* find a new entry for insert. */ | ||
3988 | int low = 0, high = count - 1, tmp; | ||
3989 | struct ocfs2_xattr_entry *tmp_xe; | ||
3990 | |||
3991 | while (low <= high && count) { | ||
3992 | tmp = (low + high) / 2; | ||
3993 | tmp_xe = &xh->xh_entries[tmp]; | ||
3994 | |||
3995 | if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) | ||
3996 | low = tmp + 1; | ||
3997 | else if (name_hash < | ||
3998 | le32_to_cpu(tmp_xe->xe_name_hash)) | ||
3999 | high = tmp - 1; | ||
4000 | else { | ||
4001 | low = tmp; | ||
4002 | break; | ||
4003 | } | ||
4004 | } | ||
4005 | |||
4006 | xe = &xh->xh_entries[low]; | ||
4007 | if (low != count) | ||
4008 | memmove(xe + 1, xe, (void *)last - (void *)xe); | ||
4009 | |||
4010 | le16_add_cpu(&xh->xh_count, 1); | ||
4011 | memset(xe, 0, sizeof(struct ocfs2_xattr_entry)); | ||
4012 | xe->xe_name_hash = cpu_to_le32(name_hash); | ||
4013 | xe->xe_name_len = name_len; | ||
4014 | ocfs2_xattr_set_type(xe, xi->name_index); | ||
4015 | } | ||
4016 | |||
4017 | set_new_name_value: | ||
4018 | /* Insert the new name+value. */ | ||
4019 | size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len); | ||
4020 | |||
4021 | /* | ||
4022 | * We must make sure that the name/value pair | ||
4023 | * exists in the same block. | ||
4024 | */ | ||
4025 | offs = le16_to_cpu(xh->xh_free_start); | ||
4026 | start = offs - size; | ||
4027 | |||
4028 | if (start >> inode->i_sb->s_blocksize_bits != | ||
4029 | (offs - 1) >> inode->i_sb->s_blocksize_bits) { | ||
4030 | offs = offs - offs % blocksize; | ||
4031 | xh->xh_free_start = cpu_to_le16(offs); | ||
4032 | } | ||
4033 | |||
4034 | val = ocfs2_xattr_bucket_get_val(inode, | ||
4035 | &xs->bucket, offs - size); | ||
4036 | xe->xe_name_offset = cpu_to_le16(offs - size); | ||
4037 | |||
4038 | memset(val, 0, size); | ||
4039 | memcpy(val, xi->name, name_len); | ||
4040 | memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len); | ||
4041 | |||
4042 | xe->xe_value_size = cpu_to_le64(xi->value_len); | ||
4043 | ocfs2_xattr_set_local(xe, local); | ||
4044 | xs->here = xe; | ||
4045 | le16_add_cpu(&xh->xh_free_start, -size); | ||
4046 | le16_add_cpu(&xh->xh_name_value_len, size); | ||
4047 | |||
4048 | return; | ||
4049 | } | ||
4050 | |||
4051 | static int ocfs2_xattr_bucket_handle_journal(struct inode *inode, | ||
4052 | handle_t *handle, | ||
4053 | struct ocfs2_xattr_search *xs, | ||
4054 | struct buffer_head **bhs, | ||
4055 | u16 bh_num) | ||
4056 | { | ||
4057 | int ret = 0, off, block_off; | ||
4058 | struct ocfs2_xattr_entry *xe = xs->here; | ||
4059 | |||
4060 | /* | ||
4061 | * First calculate all the blocks we should journal_access | ||
4062 | * and journal_dirty. The first block should always be touched. | ||
4063 | */ | ||
4064 | ret = ocfs2_journal_dirty(handle, bhs[0]); | ||
4065 | if (ret) | ||
4066 | mlog_errno(ret); | ||
4067 | |||
4068 | /* calc the data. */ | ||
4069 | off = le16_to_cpu(xe->xe_name_offset); | ||
4070 | block_off = off >> inode->i_sb->s_blocksize_bits; | ||
4071 | ret = ocfs2_journal_dirty(handle, bhs[block_off]); | ||
4072 | if (ret) | ||
4073 | mlog_errno(ret); | ||
4074 | |||
4075 | return ret; | ||
4076 | } | ||
4077 | |||
4078 | /* | ||
4079 | * Set the xattr entry in the specified bucket. | ||
4080 | * The bucket is indicated by xs->bucket and it should have the enough | ||
4081 | * space for the xattr insertion. | ||
4082 | */ | ||
4083 | static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, | ||
4084 | struct ocfs2_xattr_info *xi, | ||
4085 | struct ocfs2_xattr_search *xs, | ||
4086 | u32 name_hash, | ||
4087 | int local) | ||
4088 | { | ||
4089 | int i, ret; | ||
4090 | handle_t *handle = NULL; | ||
4091 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
4092 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
4093 | |||
4094 | mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n", | ||
4095 | (unsigned long)xi->value_len, xi->name_index, | ||
4096 | (unsigned long long)xs->bucket.bhs[0]->b_blocknr); | ||
4097 | |||
4098 | if (!xs->bucket.bhs[1]) { | ||
4099 | ret = ocfs2_read_blocks(inode, | ||
4100 | xs->bucket.bhs[0]->b_blocknr + 1, | ||
4101 | blk_per_bucket - 1, &xs->bucket.bhs[1], | ||
4102 | 0); | ||
4103 | if (ret) { | ||
4104 | mlog_errno(ret); | ||
4105 | goto out; | ||
4106 | } | ||
4107 | } | ||
4108 | |||
4109 | handle = ocfs2_start_trans(osb, blk_per_bucket); | ||
4110 | if (IS_ERR(handle)) { | ||
4111 | ret = PTR_ERR(handle); | ||
4112 | handle = NULL; | ||
4113 | mlog_errno(ret); | ||
4114 | goto out; | ||
4115 | } | ||
4116 | |||
4117 | for (i = 0; i < blk_per_bucket; i++) { | ||
4118 | ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[i], | ||
4119 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
4120 | if (ret < 0) { | ||
4121 | mlog_errno(ret); | ||
4122 | goto out; | ||
4123 | } | ||
4124 | } | ||
4125 | |||
4126 | ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local); | ||
4127 | |||
4128 | /*Only dirty the blocks we have touched in set xattr. */ | ||
4129 | ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs, | ||
4130 | xs->bucket.bhs, blk_per_bucket); | ||
4131 | if (ret) | ||
4132 | mlog_errno(ret); | ||
4133 | out: | ||
4134 | ocfs2_commit_trans(osb, handle); | ||
4135 | |||
4136 | return ret; | ||
4137 | } | ||
4138 | |||
4139 | static int ocfs2_xattr_value_update_size(struct inode *inode, | ||
4140 | struct buffer_head *xe_bh, | ||
4141 | struct ocfs2_xattr_entry *xe, | ||
4142 | u64 new_size) | ||
4143 | { | ||
4144 | int ret; | ||
4145 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
4146 | handle_t *handle = NULL; | ||
4147 | |||
4148 | handle = ocfs2_start_trans(osb, 1); | ||
4149 | if (handle == NULL) { | ||
4150 | ret = -ENOMEM; | ||
4151 | mlog_errno(ret); | ||
4152 | goto out; | ||
4153 | } | ||
4154 | |||
4155 | ret = ocfs2_journal_access(handle, inode, xe_bh, | ||
4156 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
4157 | if (ret < 0) { | ||
4158 | mlog_errno(ret); | ||
4159 | goto out_commit; | ||
4160 | } | ||
4161 | |||
4162 | xe->xe_value_size = cpu_to_le64(new_size); | ||
4163 | |||
4164 | ret = ocfs2_journal_dirty(handle, xe_bh); | ||
4165 | if (ret < 0) | ||
4166 | mlog_errno(ret); | ||
4167 | |||
4168 | out_commit: | ||
4169 | ocfs2_commit_trans(osb, handle); | ||
4170 | out: | ||
4171 | return ret; | ||
4172 | } | ||
4173 | |||
4174 | /* | ||
4175 | * Truncate the specified xe_off entry in xattr bucket. | ||
4176 | * bucket is indicated by header_bh and len is the new length. | ||
4177 | * Both the ocfs2_xattr_value_root and the entry will be updated here. | ||
4178 | * | ||
4179 | * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. | ||
4180 | */ | ||
4181 | static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, | ||
4182 | struct buffer_head *header_bh, | ||
4183 | int xe_off, | ||
4184 | int len) | ||
4185 | { | ||
4186 | int ret, offset; | ||
4187 | u64 value_blk; | ||
4188 | struct buffer_head *value_bh = NULL; | ||
4189 | struct ocfs2_xattr_value_root *xv; | ||
4190 | struct ocfs2_xattr_entry *xe; | ||
4191 | struct ocfs2_xattr_header *xh = | ||
4192 | (struct ocfs2_xattr_header *)header_bh->b_data; | ||
4193 | size_t blocksize = inode->i_sb->s_blocksize; | ||
4194 | |||
4195 | xe = &xh->xh_entries[xe_off]; | ||
4196 | |||
4197 | BUG_ON(!xe || ocfs2_xattr_is_local(xe)); | ||
4198 | |||
4199 | offset = le16_to_cpu(xe->xe_name_offset) + | ||
4200 | OCFS2_XATTR_SIZE(xe->xe_name_len); | ||
4201 | |||
4202 | value_blk = offset / blocksize; | ||
4203 | |||
4204 | /* We don't allow ocfs2_xattr_value to be stored in different block. */ | ||
4205 | BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); | ||
4206 | value_blk += header_bh->b_blocknr; | ||
4207 | |||
4208 | ret = ocfs2_read_block(inode, value_blk, &value_bh); | ||
4209 | if (ret) { | ||
4210 | mlog_errno(ret); | ||
4211 | goto out; | ||
4212 | } | ||
4213 | |||
4214 | xv = (struct ocfs2_xattr_value_root *) | ||
4215 | (value_bh->b_data + offset % blocksize); | ||
4216 | |||
4217 | mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n", | ||
4218 | xe_off, (unsigned long long)header_bh->b_blocknr, len); | ||
4219 | ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len); | ||
4220 | if (ret) { | ||
4221 | mlog_errno(ret); | ||
4222 | goto out; | ||
4223 | } | ||
4224 | |||
4225 | ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len); | ||
4226 | if (ret) { | ||
4227 | mlog_errno(ret); | ||
4228 | goto out; | ||
4229 | } | ||
4230 | |||
4231 | out: | ||
4232 | brelse(value_bh); | ||
4233 | return ret; | ||
4234 | } | ||
4235 | |||
4236 | static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode, | ||
4237 | struct ocfs2_xattr_search *xs, | ||
4238 | int len) | ||
4239 | { | ||
4240 | int ret, offset; | ||
4241 | struct ocfs2_xattr_entry *xe = xs->here; | ||
4242 | struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base; | ||
4243 | |||
4244 | BUG_ON(!xs->bucket.bhs[0] || !xe || ocfs2_xattr_is_local(xe)); | ||
4245 | |||
4246 | offset = xe - xh->xh_entries; | ||
4247 | ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bhs[0], | ||
4248 | offset, len); | ||
4249 | if (ret) | ||
4250 | mlog_errno(ret); | ||
4251 | |||
4252 | return ret; | ||
4253 | } | ||
4254 | |||
4255 | static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode, | ||
4256 | struct ocfs2_xattr_search *xs, | ||
4257 | char *val, | ||
4258 | int value_len) | ||
4259 | { | ||
4260 | int offset; | ||
4261 | struct ocfs2_xattr_value_root *xv; | ||
4262 | struct ocfs2_xattr_entry *xe = xs->here; | ||
4263 | |||
4264 | BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe)); | ||
4265 | |||
4266 | offset = le16_to_cpu(xe->xe_name_offset) + | ||
4267 | OCFS2_XATTR_SIZE(xe->xe_name_len); | ||
4268 | |||
4269 | xv = (struct ocfs2_xattr_value_root *)(xs->base + offset); | ||
4270 | |||
4271 | return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len); | ||
4272 | } | ||
4273 | |||
4274 | static int ocfs2_rm_xattr_cluster(struct inode *inode, | ||
4275 | struct buffer_head *root_bh, | ||
4276 | u64 blkno, | ||
4277 | u32 cpos, | ||
4278 | u32 len) | ||
4279 | { | ||
4280 | int ret; | ||
4281 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
4282 | struct inode *tl_inode = osb->osb_tl_inode; | ||
4283 | handle_t *handle; | ||
4284 | struct ocfs2_xattr_block *xb = | ||
4285 | (struct ocfs2_xattr_block *)root_bh->b_data; | ||
4286 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
4287 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
4288 | struct ocfs2_extent_tree et; | ||
4289 | |||
4290 | ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); | ||
4291 | |||
4292 | ocfs2_init_dealloc_ctxt(&dealloc); | ||
4293 | |||
4294 | mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n", | ||
4295 | cpos, len, (unsigned long long)blkno); | ||
4296 | |||
4297 | ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len); | ||
4298 | |||
4299 | ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); | ||
4300 | if (ret) { | ||
4301 | mlog_errno(ret); | ||
4302 | return ret; | ||
4303 | } | ||
4304 | |||
4305 | mutex_lock(&tl_inode->i_mutex); | ||
4306 | |||
4307 | if (ocfs2_truncate_log_needs_flush(osb)) { | ||
4308 | ret = __ocfs2_flush_truncate_log(osb); | ||
4309 | if (ret < 0) { | ||
4310 | mlog_errno(ret); | ||
4311 | goto out; | ||
4312 | } | ||
4313 | } | ||
4314 | |||
4315 | handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); | ||
4316 | if (handle == NULL) { | ||
4317 | ret = -ENOMEM; | ||
4318 | mlog_errno(ret); | ||
4319 | goto out; | ||
4320 | } | ||
4321 | |||
4322 | ret = ocfs2_journal_access(handle, inode, root_bh, | ||
4323 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
4324 | if (ret) { | ||
4325 | mlog_errno(ret); | ||
4326 | goto out_commit; | ||
4327 | } | ||
4328 | |||
4329 | ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac, | ||
4330 | &dealloc); | ||
4331 | if (ret) { | ||
4332 | mlog_errno(ret); | ||
4333 | goto out_commit; | ||
4334 | } | ||
4335 | |||
4336 | le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); | ||
4337 | |||
4338 | ret = ocfs2_journal_dirty(handle, root_bh); | ||
4339 | if (ret) { | ||
4340 | mlog_errno(ret); | ||
4341 | goto out_commit; | ||
4342 | } | ||
4343 | |||
4344 | ret = ocfs2_truncate_log_append(osb, handle, blkno, len); | ||
4345 | if (ret) | ||
4346 | mlog_errno(ret); | ||
4347 | |||
4348 | out_commit: | ||
4349 | ocfs2_commit_trans(osb, handle); | ||
4350 | out: | ||
4351 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
4352 | |||
4353 | mutex_unlock(&tl_inode->i_mutex); | ||
4354 | |||
4355 | if (meta_ac) | ||
4356 | ocfs2_free_alloc_context(meta_ac); | ||
4357 | |||
4358 | ocfs2_run_deallocs(osb, &dealloc); | ||
4359 | |||
4360 | return ret; | ||
4361 | } | ||
4362 | |||
4363 | static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, | ||
4364 | struct ocfs2_xattr_search *xs) | ||
4365 | { | ||
4366 | handle_t *handle = NULL; | ||
4367 | struct ocfs2_xattr_header *xh = xs->bucket.xh; | ||
4368 | struct ocfs2_xattr_entry *last = &xh->xh_entries[ | ||
4369 | le16_to_cpu(xh->xh_count) - 1]; | ||
4370 | int ret = 0; | ||
4371 | |||
4372 | handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1); | ||
4373 | if (IS_ERR(handle)) { | ||
4374 | ret = PTR_ERR(handle); | ||
4375 | mlog_errno(ret); | ||
4376 | return; | ||
4377 | } | ||
4378 | |||
4379 | ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[0], | ||
4380 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
4381 | if (ret) { | ||
4382 | mlog_errno(ret); | ||
4383 | goto out_commit; | ||
4384 | } | ||
4385 | |||
4386 | /* Remove the old entry. */ | ||
4387 | memmove(xs->here, xs->here + 1, | ||
4388 | (void *)last - (void *)xs->here); | ||
4389 | memset(last, 0, sizeof(struct ocfs2_xattr_entry)); | ||
4390 | le16_add_cpu(&xh->xh_count, -1); | ||
4391 | |||
4392 | ret = ocfs2_journal_dirty(handle, xs->bucket.bhs[0]); | ||
4393 | if (ret < 0) | ||
4394 | mlog_errno(ret); | ||
4395 | out_commit: | ||
4396 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
4397 | } | ||
4398 | |||
4399 | /* | ||
4400 | * Set the xattr name/value in the bucket specified in xs. | ||
4401 | * | ||
4402 | * As the new value in xi may be stored in the bucket or in an outside cluster, | ||
4403 | * we divide the whole process into 3 steps: | ||
4404 | * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket) | ||
4405 | * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs) | ||
4406 | * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside) | ||
4407 | * 4. If the clusters for the new outside value can't be allocated, we need | ||
4408 | * to free the xattr we allocated in set. | ||
4409 | */ | ||
4410 | static int ocfs2_xattr_set_in_bucket(struct inode *inode, | ||
4411 | struct ocfs2_xattr_info *xi, | ||
4412 | struct ocfs2_xattr_search *xs) | ||
4413 | { | ||
4414 | int ret, local = 1; | ||
4415 | size_t value_len; | ||
4416 | char *val = (char *)xi->value; | ||
4417 | struct ocfs2_xattr_entry *xe = xs->here; | ||
4418 | u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name, | ||
4419 | strlen(xi->name)); | ||
4420 | |||
4421 | if (!xs->not_found && !ocfs2_xattr_is_local(xe)) { | ||
4422 | /* | ||
4423 | * We need to truncate the xattr storage first. | ||
4424 | * | ||
4425 | * If both the old and new value are stored to | ||
4426 | * outside block, we only need to truncate | ||
4427 | * the storage and then set the value outside. | ||
4428 | * | ||
4429 | * If the new value should be stored within block, | ||
4430 | * we should free all the outside block first and | ||
4431 | * the modification to the xattr block will be done | ||
4432 | * by following steps. | ||
4433 | */ | ||
4434 | if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) | ||
4435 | value_len = xi->value_len; | ||
4436 | else | ||
4437 | value_len = 0; | ||
4438 | |||
4439 | ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs, | ||
4440 | value_len); | ||
4441 | if (ret) | ||
4442 | goto out; | ||
4443 | |||
4444 | if (value_len) | ||
4445 | goto set_value_outside; | ||
4446 | } | ||
4447 | |||
4448 | value_len = xi->value_len; | ||
4449 | /* So we have to handle the inside block change now. */ | ||
4450 | if (value_len > OCFS2_XATTR_INLINE_SIZE) { | ||
4451 | /* | ||
4452 | * If the new value will be stored outside of block, | ||
4453 | * initalize a new empty value root and insert it first. | ||
4454 | */ | ||
4455 | local = 0; | ||
4456 | xi->value = &def_xv; | ||
4457 | xi->value_len = OCFS2_XATTR_ROOT_SIZE; | ||
4458 | } | ||
4459 | |||
4460 | ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash, local); | ||
4461 | if (ret) { | ||
4462 | mlog_errno(ret); | ||
4463 | goto out; | ||
4464 | } | ||
4465 | |||
4466 | if (value_len <= OCFS2_XATTR_INLINE_SIZE) | ||
4467 | goto out; | ||
4468 | |||
4469 | /* allocate the space now for the outside block storage. */ | ||
4470 | ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs, | ||
4471 | value_len); | ||
4472 | if (ret) { | ||
4473 | mlog_errno(ret); | ||
4474 | |||
4475 | if (xs->not_found) { | ||
4476 | /* | ||
4477 | * We can't allocate enough clusters for outside | ||
4478 | * storage and we have allocated xattr already, | ||
4479 | * so need to remove it. | ||
4480 | */ | ||
4481 | ocfs2_xattr_bucket_remove_xs(inode, xs); | ||
4482 | } | ||
4483 | goto out; | ||
4484 | } | ||
4485 | |||
4486 | set_value_outside: | ||
4487 | ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len); | ||
4488 | out: | ||
4489 | return ret; | ||
4490 | } | ||
4491 | |||
4492 | /* check whether the xattr bucket is filled up with the same hash value. */ | ||
4493 | static int ocfs2_check_xattr_bucket_collision(struct inode *inode, | ||
4494 | struct ocfs2_xattr_bucket *bucket) | ||
4495 | { | ||
4496 | struct ocfs2_xattr_header *xh = bucket->xh; | ||
4497 | |||
4498 | if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == | ||
4499 | xh->xh_entries[0].xe_name_hash) { | ||
4500 | mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " | ||
4501 | "hash = %u\n", | ||
4502 | (unsigned long long)bucket->bhs[0]->b_blocknr, | ||
4503 | le32_to_cpu(xh->xh_entries[0].xe_name_hash)); | ||
4504 | return -ENOSPC; | ||
4505 | } | ||
4506 | |||
4507 | return 0; | ||
4508 | } | ||
4509 | |||
4510 | static int ocfs2_xattr_set_entry_index_block(struct inode *inode, | ||
4511 | struct ocfs2_xattr_info *xi, | ||
4512 | struct ocfs2_xattr_search *xs) | ||
4513 | { | ||
4514 | struct ocfs2_xattr_header *xh; | ||
4515 | struct ocfs2_xattr_entry *xe; | ||
4516 | u16 count, header_size, xh_free_start; | ||
4517 | int i, free, max_free, need, old; | ||
4518 | size_t value_size = 0, name_len = strlen(xi->name); | ||
4519 | size_t blocksize = inode->i_sb->s_blocksize; | ||
4520 | int ret, allocation = 0; | ||
4521 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
4522 | |||
4523 | mlog_entry("Set xattr %s in xattr index block\n", xi->name); | ||
4524 | |||
4525 | try_again: | ||
4526 | xh = xs->header; | ||
4527 | count = le16_to_cpu(xh->xh_count); | ||
4528 | xh_free_start = le16_to_cpu(xh->xh_free_start); | ||
4529 | header_size = sizeof(struct ocfs2_xattr_header) + | ||
4530 | count * sizeof(struct ocfs2_xattr_entry); | ||
4531 | max_free = OCFS2_XATTR_BUCKET_SIZE - | ||
4532 | le16_to_cpu(xh->xh_name_value_len) - header_size; | ||
4533 | |||
4534 | mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size " | ||
4535 | "of %u which exceed block size\n", | ||
4536 | (unsigned long long)xs->bucket.bhs[0]->b_blocknr, | ||
4537 | header_size); | ||
4538 | |||
4539 | if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) | ||
4540 | value_size = OCFS2_XATTR_ROOT_SIZE; | ||
4541 | else if (xi->value) | ||
4542 | value_size = OCFS2_XATTR_SIZE(xi->value_len); | ||
4543 | |||
4544 | if (xs->not_found) | ||
4545 | need = sizeof(struct ocfs2_xattr_entry) + | ||
4546 | OCFS2_XATTR_SIZE(name_len) + value_size; | ||
4547 | else { | ||
4548 | need = value_size + OCFS2_XATTR_SIZE(name_len); | ||
4549 | |||
4550 | /* | ||
4551 | * We only replace the old value if the new length is smaller | ||
4552 | * than the old one. Otherwise we will allocate new space in the | ||
4553 | * bucket to store it. | ||
4554 | */ | ||
4555 | xe = xs->here; | ||
4556 | if (ocfs2_xattr_is_local(xe)) | ||
4557 | old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); | ||
4558 | else | ||
4559 | old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE); | ||
4560 | |||
4561 | if (old >= value_size) | ||
4562 | need = 0; | ||
4563 | } | ||
4564 | |||
4565 | free = xh_free_start - header_size; | ||
4566 | /* | ||
4567 | * We need to make sure the new name/value pair | ||
4568 | * can exist in the same block. | ||
4569 | */ | ||
4570 | if (xh_free_start % blocksize < need) | ||
4571 | free -= xh_free_start % blocksize; | ||
4572 | |||
4573 | mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, " | ||
4574 | "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len =" | ||
4575 | " %u\n", xs->not_found, | ||
4576 | (unsigned long long)xs->bucket.bhs[0]->b_blocknr, | ||
4577 | free, need, max_free, le16_to_cpu(xh->xh_free_start), | ||
4578 | le16_to_cpu(xh->xh_name_value_len)); | ||
4579 | |||
4580 | if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) { | ||
4581 | if (need <= max_free && | ||
4582 | count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) { | ||
4583 | /* | ||
4584 | * We can create the space by defragment. Since only the | ||
4585 | * name/value will be moved, the xe shouldn't be changed | ||
4586 | * in xs. | ||
4587 | */ | ||
4588 | ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket); | ||
4589 | if (ret) { | ||
4590 | mlog_errno(ret); | ||
4591 | goto out; | ||
4592 | } | ||
4593 | |||
4594 | xh_free_start = le16_to_cpu(xh->xh_free_start); | ||
4595 | free = xh_free_start - header_size; | ||
4596 | if (xh_free_start % blocksize < need) | ||
4597 | free -= xh_free_start % blocksize; | ||
4598 | |||
4599 | if (free >= need) | ||
4600 | goto xattr_set; | ||
4601 | |||
4602 | mlog(0, "Can't get enough space for xattr insert by " | ||
4603 | "defragment. Need %u bytes, but we have %d, so " | ||
4604 | "allocate new bucket for it.\n", need, free); | ||
4605 | } | ||
4606 | |||
4607 | /* | ||
4608 | * We have to add new buckets or clusters and one | ||
4609 | * allocation should leave us enough space for insert. | ||
4610 | */ | ||
4611 | BUG_ON(allocation); | ||
4612 | |||
4613 | /* | ||
4614 | * We do not allow for overlapping ranges between buckets. And | ||
4615 | * the maximum number of collisions we will allow for then is | ||
4616 | * one bucket's worth, so check it here whether we need to | ||
4617 | * add a new bucket for the insert. | ||
4618 | */ | ||
4619 | ret = ocfs2_check_xattr_bucket_collision(inode, &xs->bucket); | ||
4620 | if (ret) { | ||
4621 | mlog_errno(ret); | ||
4622 | goto out; | ||
4623 | } | ||
4624 | |||
4625 | ret = ocfs2_add_new_xattr_bucket(inode, | ||
4626 | xs->xattr_bh, | ||
4627 | xs->bucket.bhs[0]); | ||
4628 | if (ret) { | ||
4629 | mlog_errno(ret); | ||
4630 | goto out; | ||
4631 | } | ||
4632 | |||
4633 | for (i = 0; i < blk_per_bucket; i++) | ||
4634 | brelse(xs->bucket.bhs[i]); | ||
4635 | |||
4636 | memset(&xs->bucket, 0, sizeof(xs->bucket)); | ||
4637 | |||
4638 | ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, | ||
4639 | xi->name_index, | ||
4640 | xi->name, xs); | ||
4641 | if (ret && ret != -ENODATA) | ||
4642 | goto out; | ||
4643 | xs->not_found = ret; | ||
4644 | allocation = 1; | ||
4645 | goto try_again; | ||
4646 | } | ||
4647 | |||
4648 | xattr_set: | ||
4649 | ret = ocfs2_xattr_set_in_bucket(inode, xi, xs); | ||
4650 | out: | ||
4651 | mlog_exit(ret); | ||
4652 | return ret; | ||
4653 | } | ||
4654 | |||
4655 | static int ocfs2_delete_xattr_in_bucket(struct inode *inode, | ||
4656 | struct ocfs2_xattr_bucket *bucket, | ||
4657 | void *para) | ||
4658 | { | ||
4659 | int ret = 0; | ||
4660 | struct ocfs2_xattr_header *xh = bucket->xh; | ||
4661 | u16 i; | ||
4662 | struct ocfs2_xattr_entry *xe; | ||
4663 | |||
4664 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { | ||
4665 | xe = &xh->xh_entries[i]; | ||
4666 | if (ocfs2_xattr_is_local(xe)) | ||
4667 | continue; | ||
4668 | |||
4669 | ret = ocfs2_xattr_bucket_value_truncate(inode, | ||
4670 | bucket->bhs[0], | ||
4671 | i, 0); | ||
4672 | if (ret) { | ||
4673 | mlog_errno(ret); | ||
4674 | break; | ||
4675 | } | ||
4676 | } | ||
4677 | |||
4678 | return ret; | ||
4679 | } | ||
4680 | |||
4681 | static int ocfs2_delete_xattr_index_block(struct inode *inode, | ||
4682 | struct buffer_head *xb_bh) | ||
4683 | { | ||
4684 | struct ocfs2_xattr_block *xb = | ||
4685 | (struct ocfs2_xattr_block *)xb_bh->b_data; | ||
4686 | struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; | ||
4687 | int ret = 0; | ||
4688 | u32 name_hash = UINT_MAX, e_cpos, num_clusters; | ||
4689 | u64 p_blkno; | ||
4690 | |||
4691 | if (le16_to_cpu(el->l_next_free_rec) == 0) | ||
4692 | return 0; | ||
4693 | |||
4694 | while (name_hash > 0) { | ||
4695 | ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, | ||
4696 | &e_cpos, &num_clusters, el); | ||
4697 | if (ret) { | ||
4698 | mlog_errno(ret); | ||
4699 | goto out; | ||
4700 | } | ||
4701 | |||
4702 | ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters, | ||
4703 | ocfs2_delete_xattr_in_bucket, | ||
4704 | NULL); | ||
4705 | if (ret) { | ||
4706 | mlog_errno(ret); | ||
4707 | goto out; | ||
4708 | } | ||
4709 | |||
4710 | ret = ocfs2_rm_xattr_cluster(inode, xb_bh, | ||
4711 | p_blkno, e_cpos, num_clusters); | ||
4712 | if (ret) { | ||
4713 | mlog_errno(ret); | ||
4714 | break; | ||
4715 | } | ||
4716 | |||
4717 | if (e_cpos == 0) | ||
4718 | break; | ||
4719 | |||
4720 | name_hash = e_cpos - 1; | ||
4721 | } | ||
4722 | |||
4723 | out: | ||
4724 | return ret; | ||
4725 | } | ||
4726 | |||
4727 | /* | ||
4728 | * 'trusted' attributes support | ||
4729 | */ | ||
4730 | |||
4731 | #define XATTR_TRUSTED_PREFIX "trusted." | ||
4732 | |||
4733 | static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list, | ||
4734 | size_t list_size, const char *name, | ||
4735 | size_t name_len) | ||
4736 | { | ||
4737 | const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX) - 1; | ||
4738 | const size_t total_len = prefix_len + name_len + 1; | ||
4739 | |||
4740 | if (list && total_len <= list_size) { | ||
4741 | memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len); | ||
4742 | memcpy(list + prefix_len, name, name_len); | ||
4743 | list[prefix_len + name_len] = '\0'; | ||
4744 | } | ||
4745 | return total_len; | ||
4746 | } | ||
4747 | |||
4748 | static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name, | ||
4749 | void *buffer, size_t size) | ||
4750 | { | ||
4751 | if (strcmp(name, "") == 0) | ||
4752 | return -EINVAL; | ||
4753 | return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name, | ||
4754 | buffer, size); | ||
4755 | } | ||
4756 | |||
4757 | static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name, | ||
4758 | const void *value, size_t size, int flags) | ||
4759 | { | ||
4760 | if (strcmp(name, "") == 0) | ||
4761 | return -EINVAL; | ||
4762 | |||
4763 | return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value, | ||
4764 | size, flags); | ||
4765 | } | ||
4766 | |||
4767 | struct xattr_handler ocfs2_xattr_trusted_handler = { | ||
4768 | .prefix = XATTR_TRUSTED_PREFIX, | ||
4769 | .list = ocfs2_xattr_trusted_list, | ||
4770 | .get = ocfs2_xattr_trusted_get, | ||
4771 | .set = ocfs2_xattr_trusted_set, | ||
4772 | }; | ||
4773 | |||
4774 | |||
4775 | /* | ||
4776 | * 'user' attributes support | ||
4777 | */ | ||
4778 | |||
4779 | #define XATTR_USER_PREFIX "user." | ||
4780 | |||
4781 | static size_t ocfs2_xattr_user_list(struct inode *inode, char *list, | ||
4782 | size_t list_size, const char *name, | ||
4783 | size_t name_len) | ||
4784 | { | ||
4785 | const size_t prefix_len = sizeof(XATTR_USER_PREFIX) - 1; | ||
4786 | const size_t total_len = prefix_len + name_len + 1; | ||
4787 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
4788 | |||
4789 | if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) | ||
4790 | return 0; | ||
4791 | |||
4792 | if (list && total_len <= list_size) { | ||
4793 | memcpy(list, XATTR_USER_PREFIX, prefix_len); | ||
4794 | memcpy(list + prefix_len, name, name_len); | ||
4795 | list[prefix_len + name_len] = '\0'; | ||
4796 | } | ||
4797 | return total_len; | ||
4798 | } | ||
4799 | |||
4800 | static int ocfs2_xattr_user_get(struct inode *inode, const char *name, | ||
4801 | void *buffer, size_t size) | ||
4802 | { | ||
4803 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
4804 | |||
4805 | if (strcmp(name, "") == 0) | ||
4806 | return -EINVAL; | ||
4807 | if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) | ||
4808 | return -EOPNOTSUPP; | ||
4809 | return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, | ||
4810 | buffer, size); | ||
4811 | } | ||
4812 | |||
4813 | static int ocfs2_xattr_user_set(struct inode *inode, const char *name, | ||
4814 | const void *value, size_t size, int flags) | ||
4815 | { | ||
4816 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
4817 | |||
4818 | if (strcmp(name, "") == 0) | ||
4819 | return -EINVAL; | ||
4820 | if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) | ||
4821 | return -EOPNOTSUPP; | ||
4822 | |||
4823 | return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value, | ||
4824 | size, flags); | ||
4825 | } | ||
4826 | |||
4827 | struct xattr_handler ocfs2_xattr_user_handler = { | ||
4828 | .prefix = XATTR_USER_PREFIX, | ||
4829 | .list = ocfs2_xattr_user_list, | ||
4830 | .get = ocfs2_xattr_user_get, | ||
4831 | .set = ocfs2_xattr_user_set, | ||
4832 | }; | ||
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h new file mode 100644 index 000000000000..c25c7c62a059 --- /dev/null +++ b/fs/ocfs2/xattr.h | |||
@@ -0,0 +1,68 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * xattr.h | ||
5 | * | ||
6 | * Function prototypes | ||
7 | * | ||
8 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #ifndef OCFS2_XATTR_H | ||
27 | #define OCFS2_XATTR_H | ||
28 | |||
29 | #include <linux/init.h> | ||
30 | #include <linux/xattr.h> | ||
31 | |||
32 | enum ocfs2_xattr_type { | ||
33 | OCFS2_XATTR_INDEX_USER = 1, | ||
34 | OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS, | ||
35 | OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, | ||
36 | OCFS2_XATTR_INDEX_TRUSTED, | ||
37 | OCFS2_XATTR_INDEX_SECURITY, | ||
38 | OCFS2_XATTR_MAX | ||
39 | }; | ||
40 | |||
41 | extern struct xattr_handler ocfs2_xattr_user_handler; | ||
42 | extern struct xattr_handler ocfs2_xattr_trusted_handler; | ||
43 | |||
44 | extern ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); | ||
45 | extern int ocfs2_xattr_get(struct inode *, int, const char *, void *, size_t); | ||
46 | extern int ocfs2_xattr_set(struct inode *, int, const char *, const void *, | ||
47 | size_t, int); | ||
48 | extern int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh); | ||
49 | extern struct xattr_handler *ocfs2_xattr_handlers[]; | ||
50 | |||
51 | static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) | ||
52 | { | ||
53 | return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; | ||
54 | } | ||
55 | |||
56 | static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) | ||
57 | { | ||
58 | return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); | ||
59 | } | ||
60 | |||
61 | static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb) | ||
62 | { | ||
63 | u16 len = sb->s_blocksize - | ||
64 | offsetof(struct ocfs2_xattr_header, xh_entries); | ||
65 | |||
66 | return len / sizeof(struct ocfs2_xattr_entry); | ||
67 | } | ||
68 | #endif /* OCFS2_XATTR_H */ | ||
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index d29047b1b9b0..cbf047a847c5 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c | |||
@@ -346,7 +346,7 @@ enum { | |||
346 | Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask | 346 | Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask |
347 | }; | 347 | }; |
348 | 348 | ||
349 | static match_table_t tokens = { | 349 | static const match_table_t tokens = { |
350 | {Opt_uid, "uid=%u"}, | 350 | {Opt_uid, "uid=%u"}, |
351 | {Opt_gid, "gid=%u"}, | 351 | {Opt_gid, "gid=%u"}, |
352 | {Opt_umask, "umask=%o"}, | 352 | {Opt_umask, "umask=%o"}, |
@@ -1141,8 +1141,7 @@ EXPORT_SYMBOL(sys_close); | |||
1141 | asmlinkage long sys_vhangup(void) | 1141 | asmlinkage long sys_vhangup(void) |
1142 | { | 1142 | { |
1143 | if (capable(CAP_SYS_TTY_CONFIG)) { | 1143 | if (capable(CAP_SYS_TTY_CONFIG)) { |
1144 | /* XXX: this needs locking */ | 1144 | tty_vhangup_self(); |
1145 | tty_vhangup(current->signal->tty); | ||
1146 | return 0; | 1145 | return 0; |
1147 | } | 1146 | } |
1148 | return -EPERM; | 1147 | return -EPERM; |
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c index 3d3e16631472..a97b477ac0fc 100644 --- a/fs/partitions/acorn.c +++ b/fs/partitions/acorn.c | |||
@@ -275,16 +275,6 @@ adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev) | |||
275 | id = data[0x1fc] & 15; | 275 | id = data[0x1fc] & 15; |
276 | put_dev_sector(sect); | 276 | put_dev_sector(sect); |
277 | 277 | ||
278 | #ifdef CONFIG_BLK_DEV_MFM | ||
279 | if (MAJOR(bdev->bd_dev) == MFM_ACORN_MAJOR) { | ||
280 | extern void xd_set_geometry(struct block_device *, | ||
281 | unsigned char, unsigned char, unsigned int); | ||
282 | xd_set_geometry(bdev, dr->secspertrack, heads, 1); | ||
283 | invalidate_bh_lrus(); | ||
284 | truncate_inode_pages(bdev->bd_inode->i_mapping, 0); | ||
285 | } | ||
286 | #endif | ||
287 | |||
288 | /* | 278 | /* |
289 | * Work out start of non-adfs partition. | 279 | * Work out start of non-adfs partition. |
290 | */ | 280 | */ |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 7d6b34e201db..cfb0c80690aa 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -120,22 +120,21 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) = | |||
120 | * a pointer to that same buffer (for convenience). | 120 | * a pointer to that same buffer (for convenience). |
121 | */ | 121 | */ |
122 | 122 | ||
123 | char *disk_name(struct gendisk *hd, int part, char *buf) | 123 | char *disk_name(struct gendisk *hd, int partno, char *buf) |
124 | { | 124 | { |
125 | if (!part) | 125 | if (!partno) |
126 | snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); | 126 | snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); |
127 | else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) | 127 | else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) |
128 | snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, part); | 128 | snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); |
129 | else | 129 | else |
130 | snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, part); | 130 | snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); |
131 | 131 | ||
132 | return buf; | 132 | return buf; |
133 | } | 133 | } |
134 | 134 | ||
135 | const char *bdevname(struct block_device *bdev, char *buf) | 135 | const char *bdevname(struct block_device *bdev, char *buf) |
136 | { | 136 | { |
137 | int part = MINOR(bdev->bd_dev) - bdev->bd_disk->first_minor; | 137 | return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); |
138 | return disk_name(bdev->bd_disk, part, buf); | ||
139 | } | 138 | } |
140 | 139 | ||
141 | EXPORT_SYMBOL(bdevname); | 140 | EXPORT_SYMBOL(bdevname); |
@@ -169,7 +168,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
169 | if (isdigit(state->name[strlen(state->name)-1])) | 168 | if (isdigit(state->name[strlen(state->name)-1])) |
170 | sprintf(state->name, "p"); | 169 | sprintf(state->name, "p"); |
171 | 170 | ||
172 | state->limit = hd->minors; | 171 | state->limit = disk_max_parts(hd); |
173 | i = res = err = 0; | 172 | i = res = err = 0; |
174 | while (!res && check_part[i]) { | 173 | while (!res && check_part[i]) { |
175 | memset(&state->parts, 0, sizeof(state->parts)); | 174 | memset(&state->parts, 0, sizeof(state->parts)); |
@@ -196,6 +195,14 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
196 | return ERR_PTR(res); | 195 | return ERR_PTR(res); |
197 | } | 196 | } |
198 | 197 | ||
198 | static ssize_t part_partition_show(struct device *dev, | ||
199 | struct device_attribute *attr, char *buf) | ||
200 | { | ||
201 | struct hd_struct *p = dev_to_part(dev); | ||
202 | |||
203 | return sprintf(buf, "%d\n", p->partno); | ||
204 | } | ||
205 | |||
199 | static ssize_t part_start_show(struct device *dev, | 206 | static ssize_t part_start_show(struct device *dev, |
200 | struct device_attribute *attr, char *buf) | 207 | struct device_attribute *attr, char *buf) |
201 | { | 208 | { |
@@ -204,21 +211,22 @@ static ssize_t part_start_show(struct device *dev, | |||
204 | return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); | 211 | return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); |
205 | } | 212 | } |
206 | 213 | ||
207 | static ssize_t part_size_show(struct device *dev, | 214 | ssize_t part_size_show(struct device *dev, |
208 | struct device_attribute *attr, char *buf) | 215 | struct device_attribute *attr, char *buf) |
209 | { | 216 | { |
210 | struct hd_struct *p = dev_to_part(dev); | 217 | struct hd_struct *p = dev_to_part(dev); |
211 | return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); | 218 | return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); |
212 | } | 219 | } |
213 | 220 | ||
214 | static ssize_t part_stat_show(struct device *dev, | 221 | ssize_t part_stat_show(struct device *dev, |
215 | struct device_attribute *attr, char *buf) | 222 | struct device_attribute *attr, char *buf) |
216 | { | 223 | { |
217 | struct hd_struct *p = dev_to_part(dev); | 224 | struct hd_struct *p = dev_to_part(dev); |
225 | int cpu; | ||
218 | 226 | ||
219 | preempt_disable(); | 227 | cpu = part_stat_lock(); |
220 | part_round_stats(p); | 228 | part_round_stats(cpu, p); |
221 | preempt_enable(); | 229 | part_stat_unlock(); |
222 | return sprintf(buf, | 230 | return sprintf(buf, |
223 | "%8lu %8lu %8llu %8u " | 231 | "%8lu %8lu %8llu %8u " |
224 | "%8lu %8lu %8llu %8u " | 232 | "%8lu %8lu %8llu %8u " |
@@ -238,17 +246,17 @@ static ssize_t part_stat_show(struct device *dev, | |||
238 | } | 246 | } |
239 | 247 | ||
240 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 248 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
241 | static ssize_t part_fail_show(struct device *dev, | 249 | ssize_t part_fail_show(struct device *dev, |
242 | struct device_attribute *attr, char *buf) | 250 | struct device_attribute *attr, char *buf) |
243 | { | 251 | { |
244 | struct hd_struct *p = dev_to_part(dev); | 252 | struct hd_struct *p = dev_to_part(dev); |
245 | 253 | ||
246 | return sprintf(buf, "%d\n", p->make_it_fail); | 254 | return sprintf(buf, "%d\n", p->make_it_fail); |
247 | } | 255 | } |
248 | 256 | ||
249 | static ssize_t part_fail_store(struct device *dev, | 257 | ssize_t part_fail_store(struct device *dev, |
250 | struct device_attribute *attr, | 258 | struct device_attribute *attr, |
251 | const char *buf, size_t count) | 259 | const char *buf, size_t count) |
252 | { | 260 | { |
253 | struct hd_struct *p = dev_to_part(dev); | 261 | struct hd_struct *p = dev_to_part(dev); |
254 | int i; | 262 | int i; |
@@ -260,6 +268,7 @@ static ssize_t part_fail_store(struct device *dev, | |||
260 | } | 268 | } |
261 | #endif | 269 | #endif |
262 | 270 | ||
271 | static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); | ||
263 | static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); | 272 | static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); |
264 | static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); | 273 | static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); |
265 | static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); | 274 | static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); |
@@ -269,6 +278,7 @@ static struct device_attribute dev_attr_fail = | |||
269 | #endif | 278 | #endif |
270 | 279 | ||
271 | static struct attribute *part_attrs[] = { | 280 | static struct attribute *part_attrs[] = { |
281 | &dev_attr_partition.attr, | ||
272 | &dev_attr_start.attr, | 282 | &dev_attr_start.attr, |
273 | &dev_attr_size.attr, | 283 | &dev_attr_size.attr, |
274 | &dev_attr_stat.attr, | 284 | &dev_attr_stat.attr, |
@@ -300,40 +310,34 @@ struct device_type part_type = { | |||
300 | .release = part_release, | 310 | .release = part_release, |
301 | }; | 311 | }; |
302 | 312 | ||
303 | static inline void partition_sysfs_add_subdir(struct hd_struct *p) | 313 | static void delete_partition_rcu_cb(struct rcu_head *head) |
304 | { | 314 | { |
305 | struct kobject *k; | 315 | struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); |
306 | 316 | ||
307 | k = kobject_get(&p->dev.kobj); | 317 | part->start_sect = 0; |
308 | p->holder_dir = kobject_create_and_add("holders", k); | 318 | part->nr_sects = 0; |
309 | kobject_put(k); | 319 | part_stat_set_all(part, 0); |
320 | put_device(part_to_dev(part)); | ||
310 | } | 321 | } |
311 | 322 | ||
312 | static inline void disk_sysfs_add_subdirs(struct gendisk *disk) | 323 | void delete_partition(struct gendisk *disk, int partno) |
313 | { | 324 | { |
314 | struct kobject *k; | 325 | struct disk_part_tbl *ptbl = disk->part_tbl; |
326 | struct hd_struct *part; | ||
315 | 327 | ||
316 | k = kobject_get(&disk->dev.kobj); | 328 | if (partno >= ptbl->len) |
317 | disk->holder_dir = kobject_create_and_add("holders", k); | ||
318 | disk->slave_dir = kobject_create_and_add("slaves", k); | ||
319 | kobject_put(k); | ||
320 | } | ||
321 | |||
322 | void delete_partition(struct gendisk *disk, int part) | ||
323 | { | ||
324 | struct hd_struct *p = disk->part[part-1]; | ||
325 | |||
326 | if (!p) | ||
327 | return; | 329 | return; |
328 | if (!p->nr_sects) | 330 | |
331 | part = ptbl->part[partno]; | ||
332 | if (!part) | ||
329 | return; | 333 | return; |
330 | disk->part[part-1] = NULL; | 334 | |
331 | p->start_sect = 0; | 335 | blk_free_devt(part_devt(part)); |
332 | p->nr_sects = 0; | 336 | rcu_assign_pointer(ptbl->part[partno], NULL); |
333 | part_stat_set_all(p, 0); | 337 | kobject_put(part->holder_dir); |
334 | kobject_put(p->holder_dir); | 338 | device_del(part_to_dev(part)); |
335 | device_del(&p->dev); | 339 | |
336 | put_device(&p->dev); | 340 | call_rcu(&part->rcu_head, delete_partition_rcu_cb); |
337 | } | 341 | } |
338 | 342 | ||
339 | static ssize_t whole_disk_show(struct device *dev, | 343 | static ssize_t whole_disk_show(struct device *dev, |
@@ -344,102 +348,132 @@ static ssize_t whole_disk_show(struct device *dev, | |||
344 | static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, | 348 | static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, |
345 | whole_disk_show, NULL); | 349 | whole_disk_show, NULL); |
346 | 350 | ||
347 | int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) | 351 | int add_partition(struct gendisk *disk, int partno, |
352 | sector_t start, sector_t len, int flags) | ||
348 | { | 353 | { |
349 | struct hd_struct *p; | 354 | struct hd_struct *p; |
355 | dev_t devt = MKDEV(0, 0); | ||
356 | struct device *ddev = disk_to_dev(disk); | ||
357 | struct device *pdev; | ||
358 | struct disk_part_tbl *ptbl; | ||
359 | const char *dname; | ||
350 | int err; | 360 | int err; |
351 | 361 | ||
362 | err = disk_expand_part_tbl(disk, partno); | ||
363 | if (err) | ||
364 | return err; | ||
365 | ptbl = disk->part_tbl; | ||
366 | |||
367 | if (ptbl->part[partno]) | ||
368 | return -EBUSY; | ||
369 | |||
352 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 370 | p = kzalloc(sizeof(*p), GFP_KERNEL); |
353 | if (!p) | 371 | if (!p) |
354 | return -ENOMEM; | 372 | return -ENOMEM; |
355 | 373 | ||
356 | if (!init_part_stats(p)) { | 374 | if (!init_part_stats(p)) { |
357 | err = -ENOMEM; | 375 | err = -ENOMEM; |
358 | goto out0; | 376 | goto out_free; |
359 | } | 377 | } |
378 | pdev = part_to_dev(p); | ||
379 | |||
360 | p->start_sect = start; | 380 | p->start_sect = start; |
361 | p->nr_sects = len; | 381 | p->nr_sects = len; |
362 | p->partno = part; | 382 | p->partno = partno; |
363 | p->policy = disk->policy; | 383 | p->policy = get_disk_ro(disk); |
364 | 384 | ||
365 | if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1])) | 385 | dname = dev_name(ddev); |
366 | snprintf(p->dev.bus_id, BUS_ID_SIZE, | 386 | if (isdigit(dname[strlen(dname) - 1])) |
367 | "%sp%d", disk->dev.bus_id, part); | 387 | snprintf(pdev->bus_id, BUS_ID_SIZE, "%sp%d", dname, partno); |
368 | else | 388 | else |
369 | snprintf(p->dev.bus_id, BUS_ID_SIZE, | 389 | snprintf(pdev->bus_id, BUS_ID_SIZE, "%s%d", dname, partno); |
370 | "%s%d", disk->dev.bus_id, part); | 390 | |
391 | device_initialize(pdev); | ||
392 | pdev->class = &block_class; | ||
393 | pdev->type = &part_type; | ||
394 | pdev->parent = ddev; | ||
371 | 395 | ||
372 | device_initialize(&p->dev); | 396 | err = blk_alloc_devt(p, &devt); |
373 | p->dev.devt = MKDEV(disk->major, disk->first_minor + part); | 397 | if (err) |
374 | p->dev.class = &block_class; | 398 | goto out_free; |
375 | p->dev.type = &part_type; | 399 | pdev->devt = devt; |
376 | p->dev.parent = &disk->dev; | ||
377 | disk->part[part-1] = p; | ||
378 | 400 | ||
379 | /* delay uevent until 'holders' subdir is created */ | 401 | /* delay uevent until 'holders' subdir is created */ |
380 | p->dev.uevent_suppress = 1; | 402 | pdev->uevent_suppress = 1; |
381 | err = device_add(&p->dev); | 403 | err = device_add(pdev); |
382 | if (err) | 404 | if (err) |
383 | goto out1; | 405 | goto out_put; |
384 | partition_sysfs_add_subdir(p); | 406 | |
385 | p->dev.uevent_suppress = 0; | 407 | err = -ENOMEM; |
408 | p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); | ||
409 | if (!p->holder_dir) | ||
410 | goto out_del; | ||
411 | |||
412 | pdev->uevent_suppress = 0; | ||
386 | if (flags & ADDPART_FLAG_WHOLEDISK) { | 413 | if (flags & ADDPART_FLAG_WHOLEDISK) { |
387 | err = device_create_file(&p->dev, &dev_attr_whole_disk); | 414 | err = device_create_file(pdev, &dev_attr_whole_disk); |
388 | if (err) | 415 | if (err) |
389 | goto out2; | 416 | goto out_del; |
390 | } | 417 | } |
391 | 418 | ||
419 | /* everything is up and running, commence */ | ||
420 | INIT_RCU_HEAD(&p->rcu_head); | ||
421 | rcu_assign_pointer(ptbl->part[partno], p); | ||
422 | |||
392 | /* suppress uevent if the disk supresses it */ | 423 | /* suppress uevent if the disk supresses it */ |
393 | if (!disk->dev.uevent_suppress) | 424 | if (!ddev->uevent_suppress) |
394 | kobject_uevent(&p->dev.kobj, KOBJ_ADD); | 425 | kobject_uevent(&pdev->kobj, KOBJ_ADD); |
395 | 426 | ||
396 | return 0; | 427 | return 0; |
397 | 428 | ||
398 | out2: | 429 | out_free: |
399 | device_del(&p->dev); | ||
400 | out1: | ||
401 | put_device(&p->dev); | ||
402 | free_part_stats(p); | ||
403 | out0: | ||
404 | kfree(p); | 430 | kfree(p); |
405 | return err; | 431 | return err; |
432 | out_del: | ||
433 | kobject_put(p->holder_dir); | ||
434 | device_del(pdev); | ||
435 | out_put: | ||
436 | put_device(pdev); | ||
437 | blk_free_devt(devt); | ||
438 | return err; | ||
406 | } | 439 | } |
407 | 440 | ||
408 | /* Not exported, helper to add_disk(). */ | 441 | /* Not exported, helper to add_disk(). */ |
409 | void register_disk(struct gendisk *disk) | 442 | void register_disk(struct gendisk *disk) |
410 | { | 443 | { |
444 | struct device *ddev = disk_to_dev(disk); | ||
411 | struct block_device *bdev; | 445 | struct block_device *bdev; |
446 | struct disk_part_iter piter; | ||
447 | struct hd_struct *part; | ||
412 | char *s; | 448 | char *s; |
413 | int i; | ||
414 | struct hd_struct *p; | ||
415 | int err; | 449 | int err; |
416 | 450 | ||
417 | disk->dev.parent = disk->driverfs_dev; | 451 | ddev->parent = disk->driverfs_dev; |
418 | disk->dev.devt = MKDEV(disk->major, disk->first_minor); | ||
419 | 452 | ||
420 | strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE); | 453 | strlcpy(ddev->bus_id, disk->disk_name, BUS_ID_SIZE); |
421 | /* ewww... some of these buggers have / in the name... */ | 454 | /* ewww... some of these buggers have / in the name... */ |
422 | s = strchr(disk->dev.bus_id, '/'); | 455 | s = strchr(ddev->bus_id, '/'); |
423 | if (s) | 456 | if (s) |
424 | *s = '!'; | 457 | *s = '!'; |
425 | 458 | ||
426 | /* delay uevents, until we scanned partition table */ | 459 | /* delay uevents, until we scanned partition table */ |
427 | disk->dev.uevent_suppress = 1; | 460 | ddev->uevent_suppress = 1; |
428 | 461 | ||
429 | if (device_add(&disk->dev)) | 462 | if (device_add(ddev)) |
430 | return; | 463 | return; |
431 | #ifndef CONFIG_SYSFS_DEPRECATED | 464 | #ifndef CONFIG_SYSFS_DEPRECATED |
432 | err = sysfs_create_link(block_depr, &disk->dev.kobj, | 465 | err = sysfs_create_link(block_depr, &ddev->kobj, |
433 | kobject_name(&disk->dev.kobj)); | 466 | kobject_name(&ddev->kobj)); |
434 | if (err) { | 467 | if (err) { |
435 | device_del(&disk->dev); | 468 | device_del(ddev); |
436 | return; | 469 | return; |
437 | } | 470 | } |
438 | #endif | 471 | #endif |
439 | disk_sysfs_add_subdirs(disk); | 472 | disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); |
473 | disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); | ||
440 | 474 | ||
441 | /* No minors to use for partitions */ | 475 | /* No minors to use for partitions */ |
442 | if (disk->minors == 1) | 476 | if (!disk_partitionable(disk)) |
443 | goto exit; | 477 | goto exit; |
444 | 478 | ||
445 | /* No such device (e.g., media were just removed) */ | 479 | /* No such device (e.g., media were just removed) */ |
@@ -458,51 +492,80 @@ void register_disk(struct gendisk *disk) | |||
458 | 492 | ||
459 | exit: | 493 | exit: |
460 | /* announce disk after possible partitions are created */ | 494 | /* announce disk after possible partitions are created */ |
461 | disk->dev.uevent_suppress = 0; | 495 | ddev->uevent_suppress = 0; |
462 | kobject_uevent(&disk->dev.kobj, KOBJ_ADD); | 496 | kobject_uevent(&ddev->kobj, KOBJ_ADD); |
463 | 497 | ||
464 | /* announce possible partitions */ | 498 | /* announce possible partitions */ |
465 | for (i = 1; i < disk->minors; i++) { | 499 | disk_part_iter_init(&piter, disk, 0); |
466 | p = disk->part[i-1]; | 500 | while ((part = disk_part_iter_next(&piter))) |
467 | if (!p || !p->nr_sects) | 501 | kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); |
468 | continue; | 502 | disk_part_iter_exit(&piter); |
469 | kobject_uevent(&p->dev.kobj, KOBJ_ADD); | ||
470 | } | ||
471 | } | 503 | } |
472 | 504 | ||
473 | int rescan_partitions(struct gendisk *disk, struct block_device *bdev) | 505 | int rescan_partitions(struct gendisk *disk, struct block_device *bdev) |
474 | { | 506 | { |
507 | struct disk_part_iter piter; | ||
508 | struct hd_struct *part; | ||
475 | struct parsed_partitions *state; | 509 | struct parsed_partitions *state; |
476 | int p, res; | 510 | int p, highest, res; |
477 | 511 | ||
478 | if (bdev->bd_part_count) | 512 | if (bdev->bd_part_count) |
479 | return -EBUSY; | 513 | return -EBUSY; |
480 | res = invalidate_partition(disk, 0); | 514 | res = invalidate_partition(disk, 0); |
481 | if (res) | 515 | if (res) |
482 | return res; | 516 | return res; |
483 | bdev->bd_invalidated = 0; | 517 | |
484 | for (p = 1; p < disk->minors; p++) | 518 | disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); |
485 | delete_partition(disk, p); | 519 | while ((part = disk_part_iter_next(&piter))) |
520 | delete_partition(disk, part->partno); | ||
521 | disk_part_iter_exit(&piter); | ||
522 | |||
486 | if (disk->fops->revalidate_disk) | 523 | if (disk->fops->revalidate_disk) |
487 | disk->fops->revalidate_disk(disk); | 524 | disk->fops->revalidate_disk(disk); |
525 | check_disk_size_change(disk, bdev); | ||
526 | bdev->bd_invalidated = 0; | ||
488 | if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) | 527 | if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) |
489 | return 0; | 528 | return 0; |
490 | if (IS_ERR(state)) /* I/O error reading the partition table */ | 529 | if (IS_ERR(state)) /* I/O error reading the partition table */ |
491 | return -EIO; | 530 | return -EIO; |
492 | 531 | ||
493 | /* tell userspace that the media / partition table may have changed */ | 532 | /* tell userspace that the media / partition table may have changed */ |
494 | kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE); | 533 | kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); |
534 | |||
535 | /* Detect the highest partition number and preallocate | ||
536 | * disk->part_tbl. This is an optimization and not strictly | ||
537 | * necessary. | ||
538 | */ | ||
539 | for (p = 1, highest = 0; p < state->limit; p++) | ||
540 | if (state->parts[p].size) | ||
541 | highest = p; | ||
495 | 542 | ||
543 | disk_expand_part_tbl(disk, highest); | ||
544 | |||
545 | /* add partitions */ | ||
496 | for (p = 1; p < state->limit; p++) { | 546 | for (p = 1; p < state->limit; p++) { |
497 | sector_t size = state->parts[p].size; | 547 | sector_t size = state->parts[p].size; |
498 | sector_t from = state->parts[p].from; | 548 | sector_t from = state->parts[p].from; |
499 | if (!size) | 549 | if (!size) |
500 | continue; | 550 | continue; |
501 | if (from + size > get_capacity(disk)) { | 551 | if (from >= get_capacity(disk)) { |
502 | printk(KERN_ERR " %s: p%d exceeds device capacity\n", | 552 | printk(KERN_WARNING |
503 | disk->disk_name, p); | 553 | "%s: p%d ignored, start %llu is behind the end of the disk\n", |
554 | disk->disk_name, p, (unsigned long long) from); | ||
504 | continue; | 555 | continue; |
505 | } | 556 | } |
557 | if (from + size > get_capacity(disk)) { | ||
558 | /* | ||
559 | * we can not ignore partitions of broken tables | ||
560 | * created by for example camera firmware, but we | ||
561 | * limit them to the end of the disk to avoid | ||
562 | * creating invalid block devices | ||
563 | */ | ||
564 | printk(KERN_WARNING | ||
565 | "%s: p%d size %llu limited to end of disk\n", | ||
566 | disk->disk_name, p, (unsigned long long) size); | ||
567 | size = get_capacity(disk) - from; | ||
568 | } | ||
506 | res = add_partition(disk, p, from, size, state->parts[p].flags); | 569 | res = add_partition(disk, p, from, size, state->parts[p].flags); |
507 | if (res) { | 570 | if (res) { |
508 | printk(KERN_ERR " %s: p%d could not be added: %d\n", | 571 | printk(KERN_ERR " %s: p%d could not be added: %d\n", |
@@ -541,25 +604,31 @@ EXPORT_SYMBOL(read_dev_sector); | |||
541 | 604 | ||
542 | void del_gendisk(struct gendisk *disk) | 605 | void del_gendisk(struct gendisk *disk) |
543 | { | 606 | { |
544 | int p; | 607 | struct disk_part_iter piter; |
608 | struct hd_struct *part; | ||
545 | 609 | ||
546 | /* invalidate stuff */ | 610 | /* invalidate stuff */ |
547 | for (p = disk->minors - 1; p > 0; p--) { | 611 | disk_part_iter_init(&piter, disk, |
548 | invalidate_partition(disk, p); | 612 | DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); |
549 | delete_partition(disk, p); | 613 | while ((part = disk_part_iter_next(&piter))) { |
614 | invalidate_partition(disk, part->partno); | ||
615 | delete_partition(disk, part->partno); | ||
550 | } | 616 | } |
617 | disk_part_iter_exit(&piter); | ||
618 | |||
551 | invalidate_partition(disk, 0); | 619 | invalidate_partition(disk, 0); |
552 | disk->capacity = 0; | 620 | blk_free_devt(disk_to_dev(disk)->devt); |
621 | set_capacity(disk, 0); | ||
553 | disk->flags &= ~GENHD_FL_UP; | 622 | disk->flags &= ~GENHD_FL_UP; |
554 | unlink_gendisk(disk); | 623 | unlink_gendisk(disk); |
555 | disk_stat_set_all(disk, 0); | 624 | part_stat_set_all(&disk->part0, 0); |
556 | disk->stamp = 0; | 625 | disk->part0.stamp = 0; |
557 | 626 | ||
558 | kobject_put(disk->holder_dir); | 627 | kobject_put(disk->part0.holder_dir); |
559 | kobject_put(disk->slave_dir); | 628 | kobject_put(disk->slave_dir); |
560 | disk->driverfs_dev = NULL; | 629 | disk->driverfs_dev = NULL; |
561 | #ifndef CONFIG_SYSFS_DEPRECATED | 630 | #ifndef CONFIG_SYSFS_DEPRECATED |
562 | sysfs_remove_link(block_depr, disk->dev.bus_id); | 631 | sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); |
563 | #endif | 632 | #endif |
564 | device_del(&disk->dev); | 633 | device_del(disk_to_dev(disk)); |
565 | } | 634 | } |
diff --git a/fs/partitions/check.h b/fs/partitions/check.h index 17ae8ecd9e8b..98dbe1a84528 100644 --- a/fs/partitions/check.h +++ b/fs/partitions/check.h | |||
@@ -5,15 +5,13 @@ | |||
5 | * add_gd_partition adds a partitions details to the devices partition | 5 | * add_gd_partition adds a partitions details to the devices partition |
6 | * description. | 6 | * description. |
7 | */ | 7 | */ |
8 | enum { MAX_PART = 256 }; | ||
9 | |||
10 | struct parsed_partitions { | 8 | struct parsed_partitions { |
11 | char name[BDEVNAME_SIZE]; | 9 | char name[BDEVNAME_SIZE]; |
12 | struct { | 10 | struct { |
13 | sector_t from; | 11 | sector_t from; |
14 | sector_t size; | 12 | sector_t size; |
15 | int flags; | 13 | int flags; |
16 | } parts[MAX_PART]; | 14 | } parts[DISK_MAX_PARTS]; |
17 | int next; | 15 | int next; |
18 | int limit; | 16 | int limit; |
19 | }; | 17 | }; |
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 73cd7a418f06..50f8f0600f06 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig | |||
@@ -57,3 +57,13 @@ config PROC_SYSCTL | |||
57 | As it is generally a good thing, you should say Y here unless | 57 | As it is generally a good thing, you should say Y here unless |
58 | building a kernel for install/rescue disks or your system is very | 58 | building a kernel for install/rescue disks or your system is very |
59 | limited in memory. | 59 | limited in memory. |
60 | |||
61 | config PROC_PAGE_MONITOR | ||
62 | default y | ||
63 | depends on PROC_FS && MMU | ||
64 | bool "Enable /proc page monitoring" if EMBEDDED | ||
65 | help | ||
66 | Various /proc files exist to monitor process memory utilization: | ||
67 | /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap, | ||
68 | /proc/kpagecount, and /proc/kpageflags. Disabling these | ||
69 | interfaces will reduce the size of the kernel by approximately 4kb. | ||
diff --git a/fs/proc/array.c b/fs/proc/array.c index 0d6eb33597c6..bb9f4b05703d 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -86,11 +86,6 @@ | |||
86 | #include <asm/processor.h> | 86 | #include <asm/processor.h> |
87 | #include "internal.h" | 87 | #include "internal.h" |
88 | 88 | ||
89 | /* Gcc optimizes away "strlen(x)" for constant x */ | ||
90 | #define ADDBUF(buffer, string) \ | ||
91 | do { memcpy(buffer, string, strlen(string)); \ | ||
92 | buffer += strlen(string); } while (0) | ||
93 | |||
94 | static inline void task_name(struct seq_file *m, struct task_struct *p) | 89 | static inline void task_name(struct seq_file *m, struct task_struct *p) |
95 | { | 90 | { |
96 | int i; | 91 | int i; |
@@ -261,7 +256,6 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) | |||
261 | sigemptyset(&ignored); | 256 | sigemptyset(&ignored); |
262 | sigemptyset(&caught); | 257 | sigemptyset(&caught); |
263 | 258 | ||
264 | rcu_read_lock(); | ||
265 | if (lock_task_sighand(p, &flags)) { | 259 | if (lock_task_sighand(p, &flags)) { |
266 | pending = p->pending.signal; | 260 | pending = p->pending.signal; |
267 | shpending = p->signal->shared_pending.signal; | 261 | shpending = p->signal->shared_pending.signal; |
@@ -272,7 +266,6 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) | |||
272 | qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; | 266 | qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; |
273 | unlock_task_sighand(p, &flags); | 267 | unlock_task_sighand(p, &flags); |
274 | } | 268 | } |
275 | rcu_read_unlock(); | ||
276 | 269 | ||
277 | seq_printf(m, "Threads:\t%d\n", num_threads); | 270 | seq_printf(m, "Threads:\t%d\n", num_threads); |
278 | seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim); | 271 | seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim); |
@@ -337,65 +330,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, | |||
337 | return 0; | 330 | return 0; |
338 | } | 331 | } |
339 | 332 | ||
340 | /* | ||
341 | * Use precise platform statistics if available: | ||
342 | */ | ||
343 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | ||
344 | static cputime_t task_utime(struct task_struct *p) | ||
345 | { | ||
346 | return p->utime; | ||
347 | } | ||
348 | |||
349 | static cputime_t task_stime(struct task_struct *p) | ||
350 | { | ||
351 | return p->stime; | ||
352 | } | ||
353 | #else | ||
354 | static cputime_t task_utime(struct task_struct *p) | ||
355 | { | ||
356 | clock_t utime = cputime_to_clock_t(p->utime), | ||
357 | total = utime + cputime_to_clock_t(p->stime); | ||
358 | u64 temp; | ||
359 | |||
360 | /* | ||
361 | * Use CFS's precise accounting: | ||
362 | */ | ||
363 | temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); | ||
364 | |||
365 | if (total) { | ||
366 | temp *= utime; | ||
367 | do_div(temp, total); | ||
368 | } | ||
369 | utime = (clock_t)temp; | ||
370 | |||
371 | p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); | ||
372 | return p->prev_utime; | ||
373 | } | ||
374 | |||
375 | static cputime_t task_stime(struct task_struct *p) | ||
376 | { | ||
377 | clock_t stime; | ||
378 | |||
379 | /* | ||
380 | * Use CFS's precise accounting. (we subtract utime from | ||
381 | * the total, to make sure the total observed by userspace | ||
382 | * grows monotonically - apps rely on that): | ||
383 | */ | ||
384 | stime = nsec_to_clock_t(p->se.sum_exec_runtime) - | ||
385 | cputime_to_clock_t(task_utime(p)); | ||
386 | |||
387 | if (stime >= 0) | ||
388 | p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); | ||
389 | |||
390 | return p->prev_stime; | ||
391 | } | ||
392 | #endif | ||
393 | |||
394 | static cputime_t task_gtime(struct task_struct *p) | ||
395 | { | ||
396 | return p->gtime; | ||
397 | } | ||
398 | |||
399 | static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | 333 | static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, |
400 | struct pid *pid, struct task_struct *task, int whole) | 334 | struct pid *pid, struct task_struct *task, int whole) |
401 | { | 335 | { |
@@ -454,20 +388,20 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
454 | 388 | ||
455 | /* add up live thread stats at the group level */ | 389 | /* add up live thread stats at the group level */ |
456 | if (whole) { | 390 | if (whole) { |
391 | struct task_cputime cputime; | ||
457 | struct task_struct *t = task; | 392 | struct task_struct *t = task; |
458 | do { | 393 | do { |
459 | min_flt += t->min_flt; | 394 | min_flt += t->min_flt; |
460 | maj_flt += t->maj_flt; | 395 | maj_flt += t->maj_flt; |
461 | utime = cputime_add(utime, task_utime(t)); | ||
462 | stime = cputime_add(stime, task_stime(t)); | ||
463 | gtime = cputime_add(gtime, task_gtime(t)); | 396 | gtime = cputime_add(gtime, task_gtime(t)); |
464 | t = next_thread(t); | 397 | t = next_thread(t); |
465 | } while (t != task); | 398 | } while (t != task); |
466 | 399 | ||
467 | min_flt += sig->min_flt; | 400 | min_flt += sig->min_flt; |
468 | maj_flt += sig->maj_flt; | 401 | maj_flt += sig->maj_flt; |
469 | utime = cputime_add(utime, sig->utime); | 402 | thread_group_cputime(task, &cputime); |
470 | stime = cputime_add(stime, sig->stime); | 403 | utime = cputime.utime; |
404 | stime = cputime.stime; | ||
471 | gtime = cputime_add(gtime, sig->gtime); | 405 | gtime = cputime_add(gtime, sig->gtime); |
472 | } | 406 | } |
473 | 407 | ||
diff --git a/fs/proc/base.c b/fs/proc/base.c index a28840b11b89..b5918ae8ca79 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -148,9 +148,6 @@ static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, | |||
148 | return count; | 148 | return count; |
149 | } | 149 | } |
150 | 150 | ||
151 | int maps_protect; | ||
152 | EXPORT_SYMBOL(maps_protect); | ||
153 | |||
154 | static struct fs_struct *get_fs_struct(struct task_struct *task) | 151 | static struct fs_struct *get_fs_struct(struct task_struct *task) |
155 | { | 152 | { |
156 | struct fs_struct *fs; | 153 | struct fs_struct *fs; |
@@ -164,7 +161,6 @@ static struct fs_struct *get_fs_struct(struct task_struct *task) | |||
164 | 161 | ||
165 | static int get_nr_threads(struct task_struct *tsk) | 162 | static int get_nr_threads(struct task_struct *tsk) |
166 | { | 163 | { |
167 | /* Must be called with the rcu_read_lock held */ | ||
168 | unsigned long flags; | 164 | unsigned long flags; |
169 | int count = 0; | 165 | int count = 0; |
170 | 166 | ||
@@ -471,14 +467,10 @@ static int proc_pid_limits(struct task_struct *task, char *buffer) | |||
471 | 467 | ||
472 | struct rlimit rlim[RLIM_NLIMITS]; | 468 | struct rlimit rlim[RLIM_NLIMITS]; |
473 | 469 | ||
474 | rcu_read_lock(); | 470 | if (!lock_task_sighand(task, &flags)) |
475 | if (!lock_task_sighand(task,&flags)) { | ||
476 | rcu_read_unlock(); | ||
477 | return 0; | 471 | return 0; |
478 | } | ||
479 | memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); | 472 | memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); |
480 | unlock_task_sighand(task, &flags); | 473 | unlock_task_sighand(task, &flags); |
481 | rcu_read_unlock(); | ||
482 | 474 | ||
483 | /* | 475 | /* |
484 | * print the file header | 476 | * print the file header |
@@ -2443,6 +2435,13 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) | |||
2443 | } | 2435 | } |
2444 | #endif /* CONFIG_TASK_IO_ACCOUNTING */ | 2436 | #endif /* CONFIG_TASK_IO_ACCOUNTING */ |
2445 | 2437 | ||
2438 | static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, | ||
2439 | struct pid *pid, struct task_struct *task) | ||
2440 | { | ||
2441 | seq_printf(m, "%08x\n", task->personality); | ||
2442 | return 0; | ||
2443 | } | ||
2444 | |||
2446 | /* | 2445 | /* |
2447 | * Thread groups | 2446 | * Thread groups |
2448 | */ | 2447 | */ |
@@ -2459,6 +2458,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2459 | REG("environ", S_IRUSR, environ), | 2458 | REG("environ", S_IRUSR, environ), |
2460 | INF("auxv", S_IRUSR, pid_auxv), | 2459 | INF("auxv", S_IRUSR, pid_auxv), |
2461 | ONE("status", S_IRUGO, pid_status), | 2460 | ONE("status", S_IRUGO, pid_status), |
2461 | ONE("personality", S_IRUSR, pid_personality), | ||
2462 | INF("limits", S_IRUSR, pid_limits), | 2462 | INF("limits", S_IRUSR, pid_limits), |
2463 | #ifdef CONFIG_SCHED_DEBUG | 2463 | #ifdef CONFIG_SCHED_DEBUG |
2464 | REG("sched", S_IRUGO|S_IWUSR, pid_sched), | 2464 | REG("sched", S_IRUGO|S_IWUSR, pid_sched), |
@@ -2794,6 +2794,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
2794 | REG("environ", S_IRUSR, environ), | 2794 | REG("environ", S_IRUSR, environ), |
2795 | INF("auxv", S_IRUSR, pid_auxv), | 2795 | INF("auxv", S_IRUSR, pid_auxv), |
2796 | ONE("status", S_IRUGO, pid_status), | 2796 | ONE("status", S_IRUGO, pid_status), |
2797 | ONE("personality", S_IRUSR, pid_personality), | ||
2797 | INF("limits", S_IRUSR, pid_limits), | 2798 | INF("limits", S_IRUSR, pid_limits), |
2798 | #ifdef CONFIG_SCHED_DEBUG | 2799 | #ifdef CONFIG_SCHED_DEBUG |
2799 | REG("sched", S_IRUGO|S_IWUSR, pid_sched), | 2800 | REG("sched", S_IRUGO|S_IWUSR, pid_sched), |
@@ -3088,9 +3089,7 @@ static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct | |||
3088 | generic_fillattr(inode, stat); | 3089 | generic_fillattr(inode, stat); |
3089 | 3090 | ||
3090 | if (p) { | 3091 | if (p) { |
3091 | rcu_read_lock(); | ||
3092 | stat->nlink += get_nr_threads(p); | 3092 | stat->nlink += get_nr_threads(p); |
3093 | rcu_read_unlock(); | ||
3094 | put_task_struct(p); | 3093 | put_task_struct(p); |
3095 | } | 3094 | } |
3096 | 3095 | ||
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 4fb81e9c94e3..7821589a17d5 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -330,6 +330,7 @@ retry: | |||
330 | spin_lock(&proc_inum_lock); | 330 | spin_lock(&proc_inum_lock); |
331 | ida_remove(&proc_inum_ida, i); | 331 | ida_remove(&proc_inum_ida, i); |
332 | spin_unlock(&proc_inum_lock); | 332 | spin_unlock(&proc_inum_lock); |
333 | return 0; | ||
333 | } | 334 | } |
334 | return PROC_DYNAMIC_FIRST + i; | 335 | return PROC_DYNAMIC_FIRST + i; |
335 | } | 336 | } |
@@ -546,8 +547,8 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp | |||
546 | 547 | ||
547 | for (tmp = dir->subdir; tmp; tmp = tmp->next) | 548 | for (tmp = dir->subdir; tmp; tmp = tmp->next) |
548 | if (strcmp(tmp->name, dp->name) == 0) { | 549 | if (strcmp(tmp->name, dp->name) == 0) { |
549 | printk(KERN_WARNING "proc_dir_entry '%s' already " | 550 | printk(KERN_WARNING "proc_dir_entry '%s/%s' already registered\n", |
550 | "registered\n", dp->name); | 551 | dir->name, dp->name); |
551 | dump_stack(); | 552 | dump_stack(); |
552 | break; | 553 | break; |
553 | } | 554 | } |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 8bb03f056c28..c6b4fa7e3b49 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -342,7 +342,7 @@ static int proc_reg_open(struct inode *inode, struct file *file) | |||
342 | if (!pde->proc_fops) { | 342 | if (!pde->proc_fops) { |
343 | spin_unlock(&pde->pde_unload_lock); | 343 | spin_unlock(&pde->pde_unload_lock); |
344 | kfree(pdeo); | 344 | kfree(pdeo); |
345 | return rv; | 345 | return -EINVAL; |
346 | } | 346 | } |
347 | pde->pde_users++; | 347 | pde->pde_users++; |
348 | open = pde->proc_fops->open; | 348 | open = pde->proc_fops->open; |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 442202314d53..3bfb7b8747b3 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -45,8 +45,6 @@ do { \ | |||
45 | extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); | 45 | extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); |
46 | #endif | 46 | #endif |
47 | 47 | ||
48 | extern int maps_protect; | ||
49 | |||
50 | extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, | 48 | extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, |
51 | struct pid *pid, struct task_struct *task); | 49 | struct pid *pid, struct task_struct *task); |
52 | extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, | 50 | extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, |
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index ded969862960..59ea42e1ef03 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/tty.h> | 24 | #include <linux/tty.h> |
25 | #include <linux/string.h> | 25 | #include <linux/string.h> |
26 | #include <linux/mman.h> | 26 | #include <linux/mman.h> |
27 | #include <linux/quicklist.h> | ||
27 | #include <linux/proc_fs.h> | 28 | #include <linux/proc_fs.h> |
28 | #include <linux/ioport.h> | 29 | #include <linux/ioport.h> |
29 | #include <linux/mm.h> | 30 | #include <linux/mm.h> |
@@ -44,7 +45,6 @@ | |||
44 | #include <linux/blkdev.h> | 45 | #include <linux/blkdev.h> |
45 | #include <linux/hugetlb.h> | 46 | #include <linux/hugetlb.h> |
46 | #include <linux/jiffies.h> | 47 | #include <linux/jiffies.h> |
47 | #include <linux/sysrq.h> | ||
48 | #include <linux/vmalloc.h> | 48 | #include <linux/vmalloc.h> |
49 | #include <linux/crash_dump.h> | 49 | #include <linux/crash_dump.h> |
50 | #include <linux/pid_namespace.h> | 50 | #include <linux/pid_namespace.h> |
@@ -67,7 +67,6 @@ | |||
67 | extern int get_hardware_list(char *); | 67 | extern int get_hardware_list(char *); |
68 | extern int get_stram_list(char *); | 68 | extern int get_stram_list(char *); |
69 | extern int get_exec_domain_list(char *); | 69 | extern int get_exec_domain_list(char *); |
70 | extern int get_dma_list(char *); | ||
71 | 70 | ||
72 | static int proc_calc_metrics(char *page, char **start, off_t off, | 71 | static int proc_calc_metrics(char *page, char **start, off_t off, |
73 | int count, int *eof, int len) | 72 | int count, int *eof, int len) |
@@ -182,6 +181,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off, | |||
182 | "SReclaimable: %8lu kB\n" | 181 | "SReclaimable: %8lu kB\n" |
183 | "SUnreclaim: %8lu kB\n" | 182 | "SUnreclaim: %8lu kB\n" |
184 | "PageTables: %8lu kB\n" | 183 | "PageTables: %8lu kB\n" |
184 | #ifdef CONFIG_QUICKLIST | ||
185 | "Quicklists: %8lu kB\n" | ||
186 | #endif | ||
185 | "NFS_Unstable: %8lu kB\n" | 187 | "NFS_Unstable: %8lu kB\n" |
186 | "Bounce: %8lu kB\n" | 188 | "Bounce: %8lu kB\n" |
187 | "WritebackTmp: %8lu kB\n" | 189 | "WritebackTmp: %8lu kB\n" |
@@ -214,6 +216,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off, | |||
214 | K(global_page_state(NR_SLAB_RECLAIMABLE)), | 216 | K(global_page_state(NR_SLAB_RECLAIMABLE)), |
215 | K(global_page_state(NR_SLAB_UNRECLAIMABLE)), | 217 | K(global_page_state(NR_SLAB_UNRECLAIMABLE)), |
216 | K(global_page_state(NR_PAGETABLE)), | 218 | K(global_page_state(NR_PAGETABLE)), |
219 | #ifdef CONFIG_QUICKLIST | ||
220 | K(quicklist_total_size()), | ||
221 | #endif | ||
217 | K(global_page_state(NR_UNSTABLE_NFS)), | 222 | K(global_page_state(NR_UNSTABLE_NFS)), |
218 | K(global_page_state(NR_BOUNCE)), | 223 | K(global_page_state(NR_BOUNCE)), |
219 | K(global_page_state(NR_WRITEBACK_TEMP)), | 224 | K(global_page_state(NR_WRITEBACK_TEMP)), |
@@ -677,6 +682,7 @@ static int cmdline_read_proc(char *page, char **start, off_t off, | |||
677 | return proc_calc_metrics(page, start, off, count, eof, len); | 682 | return proc_calc_metrics(page, start, off, count, eof, len); |
678 | } | 683 | } |
679 | 684 | ||
685 | #ifdef CONFIG_FILE_LOCKING | ||
680 | static int locks_open(struct inode *inode, struct file *filp) | 686 | static int locks_open(struct inode *inode, struct file *filp) |
681 | { | 687 | { |
682 | return seq_open(filp, &locks_seq_operations); | 688 | return seq_open(filp, &locks_seq_operations); |
@@ -688,6 +694,7 @@ static const struct file_operations proc_locks_operations = { | |||
688 | .llseek = seq_lseek, | 694 | .llseek = seq_lseek, |
689 | .release = seq_release, | 695 | .release = seq_release, |
690 | }; | 696 | }; |
697 | #endif /* CONFIG_FILE_LOCKING */ | ||
691 | 698 | ||
692 | static int execdomains_read_proc(char *page, char **start, off_t off, | 699 | static int execdomains_read_proc(char *page, char **start, off_t off, |
693 | int count, int *eof, void *data) | 700 | int count, int *eof, void *data) |
@@ -696,28 +703,6 @@ static int execdomains_read_proc(char *page, char **start, off_t off, | |||
696 | return proc_calc_metrics(page, start, off, count, eof, len); | 703 | return proc_calc_metrics(page, start, off, count, eof, len); |
697 | } | 704 | } |
698 | 705 | ||
699 | #ifdef CONFIG_MAGIC_SYSRQ | ||
700 | /* | ||
701 | * writing 'C' to /proc/sysrq-trigger is like sysrq-C | ||
702 | */ | ||
703 | static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf, | ||
704 | size_t count, loff_t *ppos) | ||
705 | { | ||
706 | if (count) { | ||
707 | char c; | ||
708 | |||
709 | if (get_user(c, buf)) | ||
710 | return -EFAULT; | ||
711 | __handle_sysrq(c, NULL, 0); | ||
712 | } | ||
713 | return count; | ||
714 | } | ||
715 | |||
716 | static const struct file_operations proc_sysrq_trigger_operations = { | ||
717 | .write = write_sysrq_trigger, | ||
718 | }; | ||
719 | #endif | ||
720 | |||
721 | #ifdef CONFIG_PROC_PAGE_MONITOR | 706 | #ifdef CONFIG_PROC_PAGE_MONITOR |
722 | #define KPMSIZE sizeof(u64) | 707 | #define KPMSIZE sizeof(u64) |
723 | #define KPMMASK (KPMSIZE - 1) | 708 | #define KPMMASK (KPMSIZE - 1) |
@@ -881,7 +866,9 @@ void __init proc_misc_init(void) | |||
881 | #ifdef CONFIG_PRINTK | 866 | #ifdef CONFIG_PRINTK |
882 | proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); | 867 | proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); |
883 | #endif | 868 | #endif |
869 | #ifdef CONFIG_FILE_LOCKING | ||
884 | proc_create("locks", 0, NULL, &proc_locks_operations); | 870 | proc_create("locks", 0, NULL, &proc_locks_operations); |
871 | #endif | ||
885 | proc_create("devices", 0, NULL, &proc_devinfo_operations); | 872 | proc_create("devices", 0, NULL, &proc_devinfo_operations); |
886 | proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); | 873 | proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); |
887 | #ifdef CONFIG_BLOCK | 874 | #ifdef CONFIG_BLOCK |
@@ -924,7 +911,4 @@ void __init proc_misc_init(void) | |||
924 | #ifdef CONFIG_PROC_VMCORE | 911 | #ifdef CONFIG_PROC_VMCORE |
925 | proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); | 912 | proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); |
926 | #endif | 913 | #endif |
927 | #ifdef CONFIG_MAGIC_SYSRQ | ||
928 | proc_create("sysrq-trigger", S_IWUSR, NULL, &proc_sysrq_trigger_operations); | ||
929 | #endif | ||
930 | } | 914 | } |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index f9a8b892718f..945a81043ba2 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -66,7 +66,7 @@ static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) | |||
66 | return NULL; | 66 | return NULL; |
67 | } | 67 | } |
68 | 68 | ||
69 | struct ctl_table_header *grab_header(struct inode *inode) | 69 | static struct ctl_table_header *grab_header(struct inode *inode) |
70 | { | 70 | { |
71 | if (PROC_I(inode)->sysctl) | 71 | if (PROC_I(inode)->sysctl) |
72 | return sysctl_head_grab(PROC_I(inode)->sysctl); | 72 | return sysctl_head_grab(PROC_I(inode)->sysctl); |
@@ -395,10 +395,10 @@ static struct dentry_operations proc_sys_dentry_operations = { | |||
395 | .d_compare = proc_sys_compare, | 395 | .d_compare = proc_sys_compare, |
396 | }; | 396 | }; |
397 | 397 | ||
398 | static struct proc_dir_entry *proc_sys_root; | ||
399 | |||
400 | int proc_sys_init(void) | 398 | int proc_sys_init(void) |
401 | { | 399 | { |
400 | struct proc_dir_entry *proc_sys_root; | ||
401 | |||
402 | proc_sys_root = proc_mkdir("sys", NULL); | 402 | proc_sys_root = proc_mkdir("sys", NULL); |
403 | proc_sys_root->proc_iops = &proc_sys_dir_operations; | 403 | proc_sys_root->proc_iops = &proc_sys_dir_operations; |
404 | proc_sys_root->proc_fops = &proc_sys_dir_file_operations; | 404 | proc_sys_root->proc_fops = &proc_sys_dir_file_operations; |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 73d1891ee625..4806830ea2a1 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -210,9 +210,6 @@ static int show_map(struct seq_file *m, void *v) | |||
210 | dev_t dev = 0; | 210 | dev_t dev = 0; |
211 | int len; | 211 | int len; |
212 | 212 | ||
213 | if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ)) | ||
214 | return -EACCES; | ||
215 | |||
216 | if (file) { | 213 | if (file) { |
217 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 214 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
218 | dev = inode->i_sb->s_dev; | 215 | dev = inode->i_sb->s_dev; |
@@ -742,22 +739,11 @@ const struct file_operations proc_pagemap_operations = { | |||
742 | #ifdef CONFIG_NUMA | 739 | #ifdef CONFIG_NUMA |
743 | extern int show_numa_map(struct seq_file *m, void *v); | 740 | extern int show_numa_map(struct seq_file *m, void *v); |
744 | 741 | ||
745 | static int show_numa_map_checked(struct seq_file *m, void *v) | ||
746 | { | ||
747 | struct proc_maps_private *priv = m->private; | ||
748 | struct task_struct *task = priv->task; | ||
749 | |||
750 | if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ)) | ||
751 | return -EACCES; | ||
752 | |||
753 | return show_numa_map(m, v); | ||
754 | } | ||
755 | |||
756 | static const struct seq_operations proc_pid_numa_maps_op = { | 742 | static const struct seq_operations proc_pid_numa_maps_op = { |
757 | .start = m_start, | 743 | .start = m_start, |
758 | .next = m_next, | 744 | .next = m_next, |
759 | .stop = m_stop, | 745 | .stop = m_stop, |
760 | .show = show_numa_map_checked | 746 | .show = show_numa_map, |
761 | }; | 747 | }; |
762 | 748 | ||
763 | static int numa_maps_open(struct inode *inode, struct file *file) | 749 | static int numa_maps_open(struct inode *inode, struct file *file) |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 5d84e7121df8..219bd79ea894 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
@@ -110,11 +110,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, | |||
110 | static int show_map(struct seq_file *m, void *_vml) | 110 | static int show_map(struct seq_file *m, void *_vml) |
111 | { | 111 | { |
112 | struct vm_list_struct *vml = _vml; | 112 | struct vm_list_struct *vml = _vml; |
113 | struct proc_maps_private *priv = m->private; | ||
114 | struct task_struct *task = priv->task; | ||
115 | |||
116 | if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ)) | ||
117 | return -EACCES; | ||
118 | 113 | ||
119 | return nommu_vma_show(m, vml->vma); | 114 | return nommu_vma_show(m, vml->vma); |
120 | } | 115 | } |
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 9ac0f5e064e0..841368b87a29 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
@@ -165,14 +165,8 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
165 | return acc; | 165 | return acc; |
166 | } | 166 | } |
167 | 167 | ||
168 | static int open_vmcore(struct inode *inode, struct file *filp) | ||
169 | { | ||
170 | return 0; | ||
171 | } | ||
172 | |||
173 | const struct file_operations proc_vmcore_operations = { | 168 | const struct file_operations proc_vmcore_operations = { |
174 | .read = read_vmcore, | 169 | .read = read_vmcore, |
175 | .open = open_vmcore, | ||
176 | }; | 170 | }; |
177 | 171 | ||
178 | static struct vmcore* __init get_new_element(void) | 172 | static struct vmcore* __init get_new_element(void) |
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 52312ec93ff4..5145cb9125af 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c | |||
@@ -58,7 +58,7 @@ const struct inode_operations ramfs_file_inode_operations = { | |||
58 | * size 0 on the assumption that it's going to be used for an mmap of shared | 58 | * size 0 on the assumption that it's going to be used for an mmap of shared |
59 | * memory | 59 | * memory |
60 | */ | 60 | */ |
61 | static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) | 61 | int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) |
62 | { | 62 | { |
63 | struct pagevec lru_pvec; | 63 | struct pagevec lru_pvec; |
64 | unsigned long npages, xpages, loop, limit; | 64 | unsigned long npages, xpages, loop, limit; |
diff --git a/fs/readdir.c b/fs/readdir.c index 4e026e5407fb..93a7559bbfd8 100644 --- a/fs/readdir.c +++ b/fs/readdir.c | |||
@@ -80,8 +80,10 @@ static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset | |||
80 | if (buf->result) | 80 | if (buf->result) |
81 | return -EINVAL; | 81 | return -EINVAL; |
82 | d_ino = ino; | 82 | d_ino = ino; |
83 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | 83 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
84 | buf->result = -EOVERFLOW; | ||
84 | return -EOVERFLOW; | 85 | return -EOVERFLOW; |
86 | } | ||
85 | buf->result++; | 87 | buf->result++; |
86 | dirent = buf->dirent; | 88 | dirent = buf->dirent; |
87 | if (!access_ok(VERIFY_WRITE, dirent, | 89 | if (!access_ok(VERIFY_WRITE, dirent, |
@@ -155,8 +157,10 @@ static int filldir(void * __buf, const char * name, int namlen, loff_t offset, | |||
155 | if (reclen > buf->count) | 157 | if (reclen > buf->count) |
156 | return -EINVAL; | 158 | return -EINVAL; |
157 | d_ino = ino; | 159 | d_ino = ino; |
158 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | 160 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
161 | buf->error = -EOVERFLOW; | ||
159 | return -EOVERFLOW; | 162 | return -EOVERFLOW; |
163 | } | ||
160 | dirent = buf->previous; | 164 | dirent = buf->previous; |
161 | if (dirent) { | 165 | if (dirent) { |
162 | if (__put_user(offset, &dirent->d_off)) | 166 | if (__put_user(offset, &dirent->d_off)) |
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index b9dbeeca7049..37173fa07d15 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c | |||
@@ -8,8 +8,6 @@ | |||
8 | 8 | ||
9 | /* proc info support a la one created by Sizif@Botik.RU for PGC */ | 9 | /* proc info support a la one created by Sizif@Botik.RU for PGC */ |
10 | 10 | ||
11 | /* $Id: procfs.c,v 1.1.8.2 2001/07/15 17:08:42 god Exp $ */ | ||
12 | |||
13 | #include <linux/module.h> | 11 | #include <linux/module.h> |
14 | #include <linux/time.h> | 12 | #include <linux/time.h> |
15 | #include <linux/seq_file.h> | 13 | #include <linux/seq_file.h> |
@@ -621,7 +619,6 @@ int reiserfs_global_version_in_proc(char *buffer, char **start, | |||
621 | #endif | 619 | #endif |
622 | 620 | ||
623 | /* | 621 | /* |
624 | * $Log: procfs.c,v $ | ||
625 | * Revision 1.1.8.2 2001/07/15 17:08:42 god | 622 | * Revision 1.1.8.2 2001/07/15 17:08:42 god |
626 | * . use get_super() in procfs.c | 623 | * . use get_super() in procfs.c |
627 | * . remove remove_save_link() from reiserfs_do_truncate() | 624 | * . remove remove_save_link() from reiserfs_do_truncate() |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index bb3cb5b7cdb2..ad92461cbfc3 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
@@ -155,7 +155,7 @@ static struct dentry *get_xa_file_dentry(const struct inode *inode, | |||
155 | xadir = open_xa_dir(inode, flags); | 155 | xadir = open_xa_dir(inode, flags); |
156 | if (IS_ERR(xadir)) { | 156 | if (IS_ERR(xadir)) { |
157 | return ERR_CAST(xadir); | 157 | return ERR_CAST(xadir); |
158 | } else if (xadir && !xadir->d_inode) { | 158 | } else if (!xadir->d_inode) { |
159 | dput(xadir); | 159 | dput(xadir); |
160 | return ERR_PTR(-ENODATA); | 160 | return ERR_PTR(-ENODATA); |
161 | } | 161 | } |
diff --git a/fs/seq_file.c b/fs/seq_file.c index 5d54205e486b..bd20f7f5a933 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
@@ -108,9 +108,9 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) | |||
108 | goto Done; | 108 | goto Done; |
109 | } | 109 | } |
110 | /* we need at least one record in buffer */ | 110 | /* we need at least one record in buffer */ |
111 | pos = m->index; | ||
112 | p = m->op->start(m, &pos); | ||
111 | while (1) { | 113 | while (1) { |
112 | pos = m->index; | ||
113 | p = m->op->start(m, &pos); | ||
114 | err = PTR_ERR(p); | 114 | err = PTR_ERR(p); |
115 | if (!p || IS_ERR(p)) | 115 | if (!p || IS_ERR(p)) |
116 | break; | 116 | break; |
@@ -119,6 +119,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) | |||
119 | break; | 119 | break; |
120 | if (unlikely(err)) | 120 | if (unlikely(err)) |
121 | m->count = 0; | 121 | m->count = 0; |
122 | if (unlikely(!m->count)) { | ||
123 | p = m->op->next(m, p, &pos); | ||
124 | m->index = pos; | ||
125 | continue; | ||
126 | } | ||
122 | if (m->count < m->size) | 127 | if (m->count < m->size) |
123 | goto Fill; | 128 | goto Fill; |
124 | m->op->stop(m, p); | 129 | m->op->stop(m, p); |
@@ -128,6 +133,8 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) | |||
128 | goto Enomem; | 133 | goto Enomem; |
129 | m->count = 0; | 134 | m->count = 0; |
130 | m->version = 0; | 135 | m->version = 0; |
136 | pos = m->index; | ||
137 | p = m->op->start(m, &pos); | ||
131 | } | 138 | } |
132 | m->op->stop(m, p); | 139 | m->op->stop(m, p); |
133 | m->count = 0; | 140 | m->count = 0; |
diff --git a/fs/splice.c b/fs/splice.c index 1bbc6f4bb09c..a1e701c27156 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -898,6 +898,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, | |||
898 | if (unlikely(!(out->f_mode & FMODE_WRITE))) | 898 | if (unlikely(!(out->f_mode & FMODE_WRITE))) |
899 | return -EBADF; | 899 | return -EBADF; |
900 | 900 | ||
901 | if (unlikely(out->f_flags & O_APPEND)) | ||
902 | return -EINVAL; | ||
903 | |||
901 | ret = rw_verify_area(WRITE, out, ppos, len); | 904 | ret = rw_verify_area(WRITE, out, ppos, len); |
902 | if (unlikely(ret < 0)) | 905 | if (unlikely(ret < 0)) |
903 | return ret; | 906 | return ret; |
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 006fc64227dd..66f6e58a7e4b 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c | |||
@@ -61,6 +61,7 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) | |||
61 | int size = dentry->d_inode->i_size; | 61 | int size = dentry->d_inode->i_size; |
62 | loff_t offs = *off; | 62 | loff_t offs = *off; |
63 | int count = min_t(size_t, bytes, PAGE_SIZE); | 63 | int count = min_t(size_t, bytes, PAGE_SIZE); |
64 | char *temp; | ||
64 | 65 | ||
65 | if (size) { | 66 | if (size) { |
66 | if (offs > size) | 67 | if (offs > size) |
@@ -69,23 +70,33 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) | |||
69 | count = size - offs; | 70 | count = size - offs; |
70 | } | 71 | } |
71 | 72 | ||
73 | temp = kmalloc(count, GFP_KERNEL); | ||
74 | if (!temp) | ||
75 | return -ENOMEM; | ||
76 | |||
72 | mutex_lock(&bb->mutex); | 77 | mutex_lock(&bb->mutex); |
73 | 78 | ||
74 | count = fill_read(dentry, bb->buffer, offs, count); | 79 | count = fill_read(dentry, bb->buffer, offs, count); |
75 | if (count < 0) | 80 | if (count < 0) { |
76 | goto out_unlock; | 81 | mutex_unlock(&bb->mutex); |
82 | goto out_free; | ||
83 | } | ||
77 | 84 | ||
78 | if (copy_to_user(userbuf, bb->buffer, count)) { | 85 | memcpy(temp, bb->buffer, count); |
86 | |||
87 | mutex_unlock(&bb->mutex); | ||
88 | |||
89 | if (copy_to_user(userbuf, temp, count)) { | ||
79 | count = -EFAULT; | 90 | count = -EFAULT; |
80 | goto out_unlock; | 91 | goto out_free; |
81 | } | 92 | } |
82 | 93 | ||
83 | pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); | 94 | pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); |
84 | 95 | ||
85 | *off = offs + count; | 96 | *off = offs + count; |
86 | 97 | ||
87 | out_unlock: | 98 | out_free: |
88 | mutex_unlock(&bb->mutex); | 99 | kfree(temp); |
89 | return count; | 100 | return count; |
90 | } | 101 | } |
91 | 102 | ||
@@ -118,6 +129,7 @@ static ssize_t write(struct file *file, const char __user *userbuf, | |||
118 | int size = dentry->d_inode->i_size; | 129 | int size = dentry->d_inode->i_size; |
119 | loff_t offs = *off; | 130 | loff_t offs = *off; |
120 | int count = min_t(size_t, bytes, PAGE_SIZE); | 131 | int count = min_t(size_t, bytes, PAGE_SIZE); |
132 | char *temp; | ||
121 | 133 | ||
122 | if (size) { | 134 | if (size) { |
123 | if (offs > size) | 135 | if (offs > size) |
@@ -126,19 +138,27 @@ static ssize_t write(struct file *file, const char __user *userbuf, | |||
126 | count = size - offs; | 138 | count = size - offs; |
127 | } | 139 | } |
128 | 140 | ||
129 | mutex_lock(&bb->mutex); | 141 | temp = kmalloc(count, GFP_KERNEL); |
142 | if (!temp) | ||
143 | return -ENOMEM; | ||
130 | 144 | ||
131 | if (copy_from_user(bb->buffer, userbuf, count)) { | 145 | if (copy_from_user(temp, userbuf, count)) { |
132 | count = -EFAULT; | 146 | count = -EFAULT; |
133 | goto out_unlock; | 147 | goto out_free; |
134 | } | 148 | } |
135 | 149 | ||
150 | mutex_lock(&bb->mutex); | ||
151 | |||
152 | memcpy(bb->buffer, temp, count); | ||
153 | |||
136 | count = flush_write(dentry, bb->buffer, offs, count); | 154 | count = flush_write(dentry, bb->buffer, offs, count); |
155 | mutex_unlock(&bb->mutex); | ||
156 | |||
137 | if (count > 0) | 157 | if (count > 0) |
138 | *off = offs + count; | 158 | *off = offs + count; |
139 | 159 | ||
140 | out_unlock: | 160 | out_free: |
141 | mutex_unlock(&bb->mutex); | 161 | kfree(temp); |
142 | return count; | 162 | return count; |
143 | } | 163 | } |
144 | 164 | ||
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index aedaeba82ae5..3a05a596e3b4 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c | |||
@@ -370,17 +370,17 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, | |||
370 | memset(acxt, 0, sizeof(*acxt)); | 370 | memset(acxt, 0, sizeof(*acxt)); |
371 | acxt->parent_sd = parent_sd; | 371 | acxt->parent_sd = parent_sd; |
372 | 372 | ||
373 | /* Lookup parent inode. inode initialization and I_NEW | 373 | /* Lookup parent inode. inode initialization is protected by |
374 | * clearing are protected by sysfs_mutex. By grabbing it and | 374 | * sysfs_mutex, so inode existence can be determined by |
375 | * looking up with _nowait variant, inode state can be | 375 | * looking up inode while holding sysfs_mutex. |
376 | * determined reliably. | ||
377 | */ | 376 | */ |
378 | mutex_lock(&sysfs_mutex); | 377 | mutex_lock(&sysfs_mutex); |
379 | 378 | ||
380 | inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test, | 379 | inode = ilookup5(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test, |
381 | parent_sd); | 380 | parent_sd); |
381 | if (inode) { | ||
382 | WARN_ON(inode->i_state & I_NEW); | ||
382 | 383 | ||
383 | if (inode && !(inode->i_state & I_NEW)) { | ||
384 | /* parent inode available */ | 384 | /* parent inode available */ |
385 | acxt->parent_inode = inode; | 385 | acxt->parent_inode = inode; |
386 | 386 | ||
@@ -393,8 +393,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, | |||
393 | mutex_lock(&inode->i_mutex); | 393 | mutex_lock(&inode->i_mutex); |
394 | mutex_lock(&sysfs_mutex); | 394 | mutex_lock(&sysfs_mutex); |
395 | } | 395 | } |
396 | } else | 396 | } |
397 | iput(inode); | ||
398 | } | 397 | } |
399 | 398 | ||
400 | /** | 399 | /** |
@@ -636,6 +635,7 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, | |||
636 | 635 | ||
637 | return sd; | 636 | return sd; |
638 | } | 637 | } |
638 | EXPORT_SYMBOL_GPL(sysfs_get_dirent); | ||
639 | 639 | ||
640 | static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, | 640 | static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, |
641 | const char *name, struct sysfs_dirent **p_sd) | 641 | const char *name, struct sysfs_dirent **p_sd) |
@@ -829,16 +829,12 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) | |||
829 | if (!new_dentry) | 829 | if (!new_dentry) |
830 | goto out_unlock; | 830 | goto out_unlock; |
831 | 831 | ||
832 | /* rename kobject and sysfs_dirent */ | 832 | /* rename sysfs_dirent */ |
833 | error = -ENOMEM; | 833 | error = -ENOMEM; |
834 | new_name = dup_name = kstrdup(new_name, GFP_KERNEL); | 834 | new_name = dup_name = kstrdup(new_name, GFP_KERNEL); |
835 | if (!new_name) | 835 | if (!new_name) |
836 | goto out_unlock; | 836 | goto out_unlock; |
837 | 837 | ||
838 | error = kobject_set_name(kobj, "%s", new_name); | ||
839 | if (error) | ||
840 | goto out_unlock; | ||
841 | |||
842 | dup_name = sd->s_name; | 838 | dup_name = sd->s_name; |
843 | sd->s_name = new_name; | 839 | sd->s_name = new_name; |
844 | 840 | ||
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index c9e4e5091da1..1f4a3f877262 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -19,10 +19,18 @@ | |||
19 | #include <linux/poll.h> | 19 | #include <linux/poll.h> |
20 | #include <linux/list.h> | 20 | #include <linux/list.h> |
21 | #include <linux/mutex.h> | 21 | #include <linux/mutex.h> |
22 | #include <linux/limits.h> | ||
22 | #include <asm/uaccess.h> | 23 | #include <asm/uaccess.h> |
23 | 24 | ||
24 | #include "sysfs.h" | 25 | #include "sysfs.h" |
25 | 26 | ||
27 | /* used in crash dumps to help with debugging */ | ||
28 | static char last_sysfs_file[PATH_MAX]; | ||
29 | void sysfs_printk_last_file(void) | ||
30 | { | ||
31 | printk(KERN_EMERG "last sysfs file: %s\n", last_sysfs_file); | ||
32 | } | ||
33 | |||
26 | /* | 34 | /* |
27 | * There's one sysfs_buffer for each open file and one | 35 | * There's one sysfs_buffer for each open file and one |
28 | * sysfs_open_dirent for each sysfs_dirent with one or more open | 36 | * sysfs_open_dirent for each sysfs_dirent with one or more open |
@@ -328,6 +336,11 @@ static int sysfs_open_file(struct inode *inode, struct file *file) | |||
328 | struct sysfs_buffer *buffer; | 336 | struct sysfs_buffer *buffer; |
329 | struct sysfs_ops *ops; | 337 | struct sysfs_ops *ops; |
330 | int error = -EACCES; | 338 | int error = -EACCES; |
339 | char *p; | ||
340 | |||
341 | p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file)); | ||
342 | if (p) | ||
343 | memmove(last_sysfs_file, p, strlen(p) + 1); | ||
331 | 344 | ||
332 | /* need attr_sd for attr and ops, its parent for kobj */ | 345 | /* need attr_sd for attr and ops, its parent for kobj */ |
333 | if (!sysfs_get_active_two(attr_sd)) | 346 | if (!sysfs_get_active_two(attr_sd)) |
@@ -440,7 +453,23 @@ static unsigned int sysfs_poll(struct file *filp, poll_table *wait) | |||
440 | return POLLERR|POLLPRI; | 453 | return POLLERR|POLLPRI; |
441 | } | 454 | } |
442 | 455 | ||
443 | void sysfs_notify(struct kobject *k, char *dir, char *attr) | 456 | void sysfs_notify_dirent(struct sysfs_dirent *sd) |
457 | { | ||
458 | struct sysfs_open_dirent *od; | ||
459 | |||
460 | spin_lock(&sysfs_open_dirent_lock); | ||
461 | |||
462 | od = sd->s_attr.open; | ||
463 | if (od) { | ||
464 | atomic_inc(&od->event); | ||
465 | wake_up_interruptible(&od->poll); | ||
466 | } | ||
467 | |||
468 | spin_unlock(&sysfs_open_dirent_lock); | ||
469 | } | ||
470 | EXPORT_SYMBOL_GPL(sysfs_notify_dirent); | ||
471 | |||
472 | void sysfs_notify(struct kobject *k, const char *dir, const char *attr) | ||
444 | { | 473 | { |
445 | struct sysfs_dirent *sd = k->sd; | 474 | struct sysfs_dirent *sd = k->sd; |
446 | 475 | ||
@@ -450,19 +479,8 @@ void sysfs_notify(struct kobject *k, char *dir, char *attr) | |||
450 | sd = sysfs_find_dirent(sd, dir); | 479 | sd = sysfs_find_dirent(sd, dir); |
451 | if (sd && attr) | 480 | if (sd && attr) |
452 | sd = sysfs_find_dirent(sd, attr); | 481 | sd = sysfs_find_dirent(sd, attr); |
453 | if (sd) { | 482 | if (sd) |
454 | struct sysfs_open_dirent *od; | 483 | sysfs_notify_dirent(sd); |
455 | |||
456 | spin_lock(&sysfs_open_dirent_lock); | ||
457 | |||
458 | od = sd->s_attr.open; | ||
459 | if (od) { | ||
460 | atomic_inc(&od->event); | ||
461 | wake_up_interruptible(&od->poll); | ||
462 | } | ||
463 | |||
464 | spin_unlock(&sysfs_open_dirent_lock); | ||
465 | } | ||
466 | 484 | ||
467 | mutex_unlock(&sysfs_mutex); | 485 | mutex_unlock(&sysfs_mutex); |
468 | } | 486 | } |
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 14f0023984d7..ab343e371d64 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/mount.h> | 16 | #include <linux/mount.h> |
17 | #include <linux/pagemap.h> | 17 | #include <linux/pagemap.h> |
18 | #include <linux/init.h> | 18 | #include <linux/init.h> |
19 | #include <linux/module.h> | ||
19 | 20 | ||
20 | #include "sysfs.h" | 21 | #include "sysfs.h" |
21 | 22 | ||
@@ -115,3 +116,17 @@ out_err: | |||
115 | sysfs_dir_cachep = NULL; | 116 | sysfs_dir_cachep = NULL; |
116 | goto out; | 117 | goto out; |
117 | } | 118 | } |
119 | |||
120 | #undef sysfs_get | ||
121 | struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd) | ||
122 | { | ||
123 | return __sysfs_get(sd); | ||
124 | } | ||
125 | EXPORT_SYMBOL_GPL(sysfs_get); | ||
126 | |||
127 | #undef sysfs_put | ||
128 | void sysfs_put(struct sysfs_dirent *sd) | ||
129 | { | ||
130 | __sysfs_put(sd); | ||
131 | } | ||
132 | EXPORT_SYMBOL_GPL(sysfs_put); | ||
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index a5db496f71c7..93c6d6b27c4d 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h | |||
@@ -124,7 +124,7 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name, | |||
124 | struct sysfs_dirent **p_sd); | 124 | struct sysfs_dirent **p_sd); |
125 | void sysfs_remove_subdir(struct sysfs_dirent *sd); | 125 | void sysfs_remove_subdir(struct sysfs_dirent *sd); |
126 | 126 | ||
127 | static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd) | 127 | static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd) |
128 | { | 128 | { |
129 | if (sd) { | 129 | if (sd) { |
130 | WARN_ON(!atomic_read(&sd->s_count)); | 130 | WARN_ON(!atomic_read(&sd->s_count)); |
@@ -132,12 +132,14 @@ static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd) | |||
132 | } | 132 | } |
133 | return sd; | 133 | return sd; |
134 | } | 134 | } |
135 | #define sysfs_get(sd) __sysfs_get(sd) | ||
135 | 136 | ||
136 | static inline void sysfs_put(struct sysfs_dirent *sd) | 137 | static inline void __sysfs_put(struct sysfs_dirent *sd) |
137 | { | 138 | { |
138 | if (sd && atomic_dec_and_test(&sd->s_count)) | 139 | if (sd && atomic_dec_and_test(&sd->s_count)) |
139 | release_sysfs_dirent(sd); | 140 | release_sysfs_dirent(sd); |
140 | } | 141 | } |
142 | #define sysfs_put(sd) __sysfs_put(sd) | ||
141 | 143 | ||
142 | /* | 144 | /* |
143 | * inode.c | 145 | * inode.c |
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 154098157473..73db464cd08b 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
@@ -302,18 +302,6 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) | |||
302 | int subtract_lebs; | 302 | int subtract_lebs; |
303 | long long available; | 303 | long long available; |
304 | 304 | ||
305 | /* | ||
306 | * Force the amount available to the total size reported if the used | ||
307 | * space is zero. | ||
308 | */ | ||
309 | if (c->lst.total_used <= UBIFS_INO_NODE_SZ && | ||
310 | c->budg_data_growth + c->budg_dd_growth == 0) { | ||
311 | /* Do the same calculation as for c->block_cnt */ | ||
312 | available = c->main_lebs - 2; | ||
313 | available *= c->leb_size - c->dark_wm; | ||
314 | return available; | ||
315 | } | ||
316 | |||
317 | available = c->main_bytes - c->lst.total_used; | 305 | available = c->main_bytes - c->lst.total_used; |
318 | 306 | ||
319 | /* | 307 | /* |
@@ -714,34 +702,106 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, | |||
714 | } | 702 | } |
715 | 703 | ||
716 | /** | 704 | /** |
717 | * ubifs_budg_get_free_space - return amount of free space. | 705 | * ubifs_reported_space - calculate reported free space. |
706 | * @c: the UBIFS file-system description object | ||
707 | * @free: amount of free space | ||
708 | * | ||
709 | * This function calculates amount of free space which will be reported to | ||
710 | * user-space. User-space application tend to expect that if the file-system | ||
711 | * (e.g., via the 'statfs()' call) reports that it has N bytes available, they | ||
712 | * are able to write a file of size N. UBIFS attaches node headers to each data | ||
713 | * node and it has to write indexind nodes as well. This introduces additional | ||
714 | * overhead, and UBIFS it has to report sligtly less free space to meet the | ||
715 | * above expectetion. | ||
716 | * | ||
717 | * This function assumes free space is made up of uncompressed data nodes and | ||
718 | * full index nodes (one per data node, tripled because we always allow enough | ||
719 | * space to write the index thrice). | ||
720 | * | ||
721 | * Note, the calculation is pessimistic, which means that most of the time | ||
722 | * UBIFS reports less space than it actually has. | ||
723 | */ | ||
724 | long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) | ||
725 | { | ||
726 | int divisor, factor, f; | ||
727 | |||
728 | /* | ||
729 | * Reported space size is @free * X, where X is UBIFS block size | ||
730 | * divided by UBIFS block size + all overhead one data block | ||
731 | * introduces. The overhead is the node header + indexing overhead. | ||
732 | * | ||
733 | * Indexing overhead calculations are based on the following formula: | ||
734 | * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number | ||
735 | * of data nodes, f - fanout. Because effective UBIFS fanout is twice | ||
736 | * as less than maximum fanout, we assume that each data node | ||
737 | * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. | ||
738 | * Note, the multiplier 3 is because UBIFS reseves thrice as more space | ||
739 | * for the index. | ||
740 | */ | ||
741 | f = c->fanout > 3 ? c->fanout >> 1 : 2; | ||
742 | factor = UBIFS_BLOCK_SIZE; | ||
743 | divisor = UBIFS_MAX_DATA_NODE_SZ; | ||
744 | divisor += (c->max_idx_node_sz * 3) / (f - 1); | ||
745 | free *= factor; | ||
746 | do_div(free, divisor); | ||
747 | return free; | ||
748 | } | ||
749 | |||
750 | /** | ||
751 | * ubifs_get_free_space - return amount of free space. | ||
718 | * @c: UBIFS file-system description object | 752 | * @c: UBIFS file-system description object |
719 | * | 753 | * |
720 | * This function returns amount of free space on the file-system. | 754 | * This function calculates amount of free space to report to user-space. |
755 | * | ||
756 | * Because UBIFS may introduce substantial overhead (the index, node headers, | ||
757 | * alighment, wastage at the end of eraseblocks, etc), it cannot report real | ||
758 | * amount of free flash space it has (well, because not all dirty space is | ||
759 | * reclamable, UBIFS does not actually know the real amount). If UBIFS did so, | ||
760 | * it would bread user expectetion about what free space is. Users seem to | ||
761 | * accustomed to assume that if the file-system reports N bytes of free space, | ||
762 | * they would be able to fit a file of N bytes to the FS. This almost works for | ||
763 | * traditional file-systems, because they have way less overhead than UBIFS. | ||
764 | * So, to keep users happy, UBIFS tries to take the overhead into account. | ||
721 | */ | 765 | */ |
722 | long long ubifs_budg_get_free_space(struct ubifs_info *c) | 766 | long long ubifs_get_free_space(struct ubifs_info *c) |
723 | { | 767 | { |
724 | int min_idx_lebs, rsvd_idx_lebs; | 768 | int min_idx_lebs, rsvd_idx_lebs, lebs; |
725 | long long available, outstanding, free; | 769 | long long available, outstanding, free; |
726 | 770 | ||
727 | /* Do exactly the same calculations as in 'do_budget_space()' */ | ||
728 | spin_lock(&c->space_lock); | 771 | spin_lock(&c->space_lock); |
729 | min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 772 | min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
773 | outstanding = c->budg_data_growth + c->budg_dd_growth; | ||
730 | 774 | ||
731 | if (min_idx_lebs > c->lst.idx_lebs) | 775 | /* |
732 | rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; | 776 | * Force the amount available to the total size reported if the used |
733 | else | 777 | * space is zero. |
734 | rsvd_idx_lebs = 0; | 778 | */ |
735 | 779 | if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) { | |
736 | if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt | ||
737 | - c->lst.taken_empty_lebs) { | ||
738 | spin_unlock(&c->space_lock); | 780 | spin_unlock(&c->space_lock); |
739 | return 0; | 781 | return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT; |
740 | } | 782 | } |
741 | 783 | ||
742 | available = ubifs_calc_available(c, min_idx_lebs); | 784 | available = ubifs_calc_available(c, min_idx_lebs); |
743 | outstanding = c->budg_data_growth + c->budg_dd_growth; | 785 | |
744 | c->min_idx_lebs = min_idx_lebs; | 786 | /* |
787 | * When reporting free space to user-space, UBIFS guarantees that it is | ||
788 | * possible to write a file of free space size. This means that for | ||
789 | * empty LEBs we may use more precise calculations than | ||
790 | * 'ubifs_calc_available()' is using. Namely, we know that in empty | ||
791 | * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm. | ||
792 | * Thus, amend the available space. | ||
793 | * | ||
794 | * Note, the calculations below are similar to what we have in | ||
795 | * 'do_budget_space()', so refer there for comments. | ||
796 | */ | ||
797 | if (min_idx_lebs > c->lst.idx_lebs) | ||
798 | rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; | ||
799 | else | ||
800 | rsvd_idx_lebs = 0; | ||
801 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | ||
802 | c->lst.taken_empty_lebs; | ||
803 | lebs -= rsvd_idx_lebs; | ||
804 | available += lebs * (c->dark_wm - c->leb_overhead); | ||
745 | spin_unlock(&c->space_lock); | 805 | spin_unlock(&c->space_lock); |
746 | 806 | ||
747 | if (available > outstanding) | 807 | if (available > outstanding) |
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index b9cb77473758..d7f7645779f2 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
@@ -538,7 +538,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
538 | printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); | 538 | printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); |
539 | for (i = 0; i < n; i++) | 539 | for (i = 0; i < n; i++) |
540 | printk(KERN_DEBUG "\t ino %llu\n", | 540 | printk(KERN_DEBUG "\t ino %llu\n", |
541 | le64_to_cpu(orph->inos[i])); | 541 | (unsigned long long)le64_to_cpu(orph->inos[i])); |
542 | break; | 542 | break; |
543 | } | 543 | } |
544 | default: | 544 | default: |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 5c96f1fb7016..526c01ec8003 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
@@ -426,7 +426,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
426 | 426 | ||
427 | while (1) { | 427 | while (1) { |
428 | dbg_gen("feed '%s', ino %llu, new f_pos %#x", | 428 | dbg_gen("feed '%s', ino %llu, new f_pos %#x", |
429 | dent->name, le64_to_cpu(dent->inum), | 429 | dent->name, (unsigned long long)le64_to_cpu(dent->inum), |
430 | key_hash_flash(c, &dent->key)); | 430 | key_hash_flash(c, &dent->key)); |
431 | ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); | 431 | ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); |
432 | 432 | ||
@@ -587,7 +587,6 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) | |||
587 | if (err) { | 587 | if (err) { |
588 | if (err != -ENOSPC) | 588 | if (err != -ENOSPC) |
589 | return err; | 589 | return err; |
590 | err = 0; | ||
591 | budgeted = 0; | 590 | budgeted = 0; |
592 | } | 591 | } |
593 | 592 | ||
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 4071d1cae29f..3d698e2022b1 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -793,7 +793,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
793 | int err; | 793 | int err; |
794 | struct ubifs_budget_req req; | 794 | struct ubifs_budget_req req; |
795 | loff_t old_size = inode->i_size, new_size = attr->ia_size; | 795 | loff_t old_size = inode->i_size, new_size = attr->ia_size; |
796 | int offset = new_size & (UBIFS_BLOCK_SIZE - 1); | 796 | int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1; |
797 | struct ubifs_inode *ui = ubifs_inode(inode); | 797 | struct ubifs_inode *ui = ubifs_inode(inode); |
798 | 798 | ||
799 | dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); | 799 | dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); |
@@ -811,8 +811,15 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
811 | /* A funny way to budget for truncation node */ | 811 | /* A funny way to budget for truncation node */ |
812 | req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; | 812 | req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; |
813 | err = ubifs_budget_space(c, &req); | 813 | err = ubifs_budget_space(c, &req); |
814 | if (err) | 814 | if (err) { |
815 | return err; | 815 | /* |
816 | * Treat truncations to zero as deletion and always allow them, | ||
817 | * just like we do for '->unlink()'. | ||
818 | */ | ||
819 | if (new_size || err != -ENOSPC) | ||
820 | return err; | ||
821 | budgeted = 0; | ||
822 | } | ||
816 | 823 | ||
817 | err = vmtruncate(inode, new_size); | 824 | err = vmtruncate(inode, new_size); |
818 | if (err) | 825 | if (err) |
@@ -869,7 +876,12 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
869 | err = ubifs_jnl_truncate(c, inode, old_size, new_size); | 876 | err = ubifs_jnl_truncate(c, inode, old_size, new_size); |
870 | mutex_unlock(&ui->ui_mutex); | 877 | mutex_unlock(&ui->ui_mutex); |
871 | out_budg: | 878 | out_budg: |
872 | ubifs_release_budget(c, &req); | 879 | if (budgeted) |
880 | ubifs_release_budget(c, &req); | ||
881 | else { | ||
882 | c->nospace = c->nospace_rp = 0; | ||
883 | smp_wmb(); | ||
884 | } | ||
873 | return err; | 885 | return err; |
874 | } | 886 | } |
875 | 887 | ||
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index adee7b5ddeab..47814cde2407 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c | |||
@@ -211,14 +211,8 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, | |||
211 | * dirty index heap, and it falls-back to LPT scanning if the heaps are empty | 211 | * dirty index heap, and it falls-back to LPT scanning if the heaps are empty |
212 | * or do not have an LEB which satisfies the @min_space criteria. | 212 | * or do not have an LEB which satisfies the @min_space criteria. |
213 | * | 213 | * |
214 | * Note: | 214 | * Note, LEBs which have less than dead watermark of free + dirty space are |
215 | * o LEBs which have less than dead watermark of dirty space are never picked | 215 | * never picked by this function. |
216 | * by this function; | ||
217 | * | ||
218 | * Returns zero and the LEB properties of | ||
219 | * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a | ||
220 | * negative error code in case of other failures. The returned LEB is marked as | ||
221 | * "taken". | ||
222 | * | 216 | * |
223 | * The additional @pick_free argument controls if this function has to return a | 217 | * The additional @pick_free argument controls if this function has to return a |
224 | * free or freeable LEB if one is present. For example, GC must to set it to %1, | 218 | * free or freeable LEB if one is present. For example, GC must to set it to %1, |
@@ -231,6 +225,10 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, | |||
231 | * | 225 | * |
232 | * In addition @pick_free is set to %2 by the recovery process in order to | 226 | * In addition @pick_free is set to %2 by the recovery process in order to |
233 | * recover gc_lnum in which case an index LEB must not be returned. | 227 | * recover gc_lnum in which case an index LEB must not be returned. |
228 | * | ||
229 | * This function returns zero and the LEB properties of found dirty LEB in case | ||
230 | * of success, %-ENOSPC if no dirty LEB was found and a negative error code in | ||
231 | * case of other failures. The returned LEB is marked as "taken". | ||
234 | */ | 232 | */ |
235 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | 233 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, |
236 | int min_space, int pick_free) | 234 | int min_space, int pick_free) |
@@ -245,7 +243,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
245 | int lebs, rsvd_idx_lebs = 0; | 243 | int lebs, rsvd_idx_lebs = 0; |
246 | 244 | ||
247 | spin_lock(&c->space_lock); | 245 | spin_lock(&c->space_lock); |
248 | lebs = c->lst.empty_lebs; | 246 | lebs = c->lst.empty_lebs + c->idx_gc_cnt; |
249 | lebs += c->freeable_cnt - c->lst.taken_empty_lebs; | 247 | lebs += c->freeable_cnt - c->lst.taken_empty_lebs; |
250 | 248 | ||
251 | /* | 249 | /* |
@@ -317,7 +315,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
317 | lp = idx_lp; | 315 | lp = idx_lp; |
318 | 316 | ||
319 | if (lp) { | 317 | if (lp) { |
320 | ubifs_assert(lp->dirty >= c->dead_wm); | 318 | ubifs_assert(lp->free + lp->dirty >= c->dead_wm); |
321 | goto found; | 319 | goto found; |
322 | } | 320 | } |
323 | 321 | ||
@@ -509,7 +507,6 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, | |||
509 | rsvd_idx_lebs = 0; | 507 | rsvd_idx_lebs = 0; |
510 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | 508 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - |
511 | c->lst.taken_empty_lebs; | 509 | c->lst.taken_empty_lebs; |
512 | ubifs_assert(lebs + c->lst.idx_lebs >= c->min_idx_lebs); | ||
513 | if (rsvd_idx_lebs < lebs) | 510 | if (rsvd_idx_lebs < lebs) |
514 | /* | 511 | /* |
515 | * OK to allocate an empty LEB, but we still don't want to go | 512 | * OK to allocate an empty LEB, but we still don't want to go |
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index d0f3dac29081..02aba36fe3d4 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c | |||
@@ -334,15 +334,21 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) | |||
334 | 334 | ||
335 | err = move_nodes(c, sleb); | 335 | err = move_nodes(c, sleb); |
336 | if (err) | 336 | if (err) |
337 | goto out; | 337 | goto out_inc_seq; |
338 | 338 | ||
339 | err = gc_sync_wbufs(c); | 339 | err = gc_sync_wbufs(c); |
340 | if (err) | 340 | if (err) |
341 | goto out; | 341 | goto out_inc_seq; |
342 | 342 | ||
343 | err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); | 343 | err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); |
344 | if (err) | 344 | if (err) |
345 | goto out; | 345 | goto out_inc_seq; |
346 | |||
347 | /* Allow for races with TNC */ | ||
348 | c->gced_lnum = lnum; | ||
349 | smp_wmb(); | ||
350 | c->gc_seq += 1; | ||
351 | smp_wmb(); | ||
346 | 352 | ||
347 | if (c->gc_lnum == -1) { | 353 | if (c->gc_lnum == -1) { |
348 | c->gc_lnum = lnum; | 354 | c->gc_lnum = lnum; |
@@ -363,6 +369,14 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) | |||
363 | out: | 369 | out: |
364 | ubifs_scan_destroy(sleb); | 370 | ubifs_scan_destroy(sleb); |
365 | return err; | 371 | return err; |
372 | |||
373 | out_inc_seq: | ||
374 | /* We may have moved at least some nodes so allow for races with TNC */ | ||
375 | c->gced_lnum = lnum; | ||
376 | smp_wmb(); | ||
377 | c->gc_seq += 1; | ||
378 | smp_wmb(); | ||
379 | goto out; | ||
366 | } | 380 | } |
367 | 381 | ||
368 | /** | 382 | /** |
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 87dabf9fe742..4c12a9215d7f 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h | |||
@@ -284,38 +284,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c, | |||
284 | } | 284 | } |
285 | 285 | ||
286 | /** | 286 | /** |
287 | * ubifs_reported_space - calculate reported free space. | ||
288 | * @c: the UBIFS file-system description object | ||
289 | * @free: amount of free space | ||
290 | * | ||
291 | * This function calculates amount of free space which will be reported to | ||
292 | * user-space. User-space application tend to expect that if the file-system | ||
293 | * (e.g., via the 'statfs()' call) reports that it has N bytes available, they | ||
294 | * are able to write a file of size N. UBIFS attaches node headers to each data | ||
295 | * node and it has to write indexind nodes as well. This introduces additional | ||
296 | * overhead, and UBIFS it has to report sligtly less free space to meet the | ||
297 | * above expectetion. | ||
298 | * | ||
299 | * This function assumes free space is made up of uncompressed data nodes and | ||
300 | * full index nodes (one per data node, doubled because we always allow enough | ||
301 | * space to write the index twice). | ||
302 | * | ||
303 | * Note, the calculation is pessimistic, which means that most of the time | ||
304 | * UBIFS reports less space than it actually has. | ||
305 | */ | ||
306 | static inline long long ubifs_reported_space(const struct ubifs_info *c, | ||
307 | uint64_t free) | ||
308 | { | ||
309 | int divisor, factor; | ||
310 | |||
311 | divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3); | ||
312 | factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ; | ||
313 | do_div(free, divisor); | ||
314 | |||
315 | return free * factor; | ||
316 | } | ||
317 | |||
318 | /** | ||
319 | * ubifs_current_time - round current time to time granularity. | 287 | * ubifs_current_time - round current time to time granularity. |
320 | * @inode: inode | 288 | * @inode: inode |
321 | */ | 289 | */ |
@@ -325,4 +293,21 @@ static inline struct timespec ubifs_current_time(struct inode *inode) | |||
325 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; | 293 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; |
326 | } | 294 | } |
327 | 295 | ||
296 | /** | ||
297 | * ubifs_tnc_lookup - look up a file-system node. | ||
298 | * @c: UBIFS file-system description object | ||
299 | * @key: node key to lookup | ||
300 | * @node: the node is returned here | ||
301 | * | ||
302 | * This function look up and reads node with key @key. The caller has to make | ||
303 | * sure the @node buffer is large enough to fit the node. Returns zero in case | ||
304 | * of success, %-ENOENT if the node was not found, and a negative error code in | ||
305 | * case of failure. | ||
306 | */ | ||
307 | static inline int ubifs_tnc_lookup(struct ubifs_info *c, | ||
308 | const union ubifs_key *key, void *node) | ||
309 | { | ||
310 | return ubifs_tnc_locate(c, key, node, NULL, NULL); | ||
311 | } | ||
312 | |||
328 | #endif /* __UBIFS_MISC_H__ */ | 313 | #endif /* __UBIFS_MISC_H__ */ |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index f71e6b8822c4..9a9220333b3b 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -370,8 +370,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
370 | { | 370 | { |
371 | struct ubifs_info *c = dentry->d_sb->s_fs_info; | 371 | struct ubifs_info *c = dentry->d_sb->s_fs_info; |
372 | unsigned long long free; | 372 | unsigned long long free; |
373 | __le32 *uuid = (__le32 *)c->uuid; | ||
373 | 374 | ||
374 | free = ubifs_budg_get_free_space(c); | 375 | free = ubifs_get_free_space(c); |
375 | dbg_gen("free space %lld bytes (%lld blocks)", | 376 | dbg_gen("free space %lld bytes (%lld blocks)", |
376 | free, free >> UBIFS_BLOCK_SHIFT); | 377 | free, free >> UBIFS_BLOCK_SHIFT); |
377 | 378 | ||
@@ -386,7 +387,8 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
386 | buf->f_files = 0; | 387 | buf->f_files = 0; |
387 | buf->f_ffree = 0; | 388 | buf->f_ffree = 0; |
388 | buf->f_namelen = UBIFS_MAX_NLEN; | 389 | buf->f_namelen = UBIFS_MAX_NLEN; |
389 | 390 | buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]); | |
391 | buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]); | ||
390 | return 0; | 392 | return 0; |
391 | } | 393 | } |
392 | 394 | ||
@@ -530,6 +532,12 @@ static int init_constants_early(struct ubifs_info *c) | |||
530 | c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); | 532 | c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); |
531 | c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); | 533 | c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); |
532 | 534 | ||
535 | /* | ||
536 | * Calculate how many bytes would be wasted at the end of LEB if it was | ||
537 | * fully filled with data nodes of maximum size. This is used in | ||
538 | * calculations when reporting free space. | ||
539 | */ | ||
540 | c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; | ||
533 | return 0; | 541 | return 0; |
534 | } | 542 | } |
535 | 543 | ||
@@ -647,13 +655,11 @@ static int init_constants_late(struct ubifs_info *c) | |||
647 | * internally because it does not make much sense for UBIFS, but it is | 655 | * internally because it does not make much sense for UBIFS, but it is |
648 | * necessary to report something for the 'statfs()' call. | 656 | * necessary to report something for the 'statfs()' call. |
649 | * | 657 | * |
650 | * Subtract the LEB reserved for GC and the LEB which is reserved for | 658 | * Subtract the LEB reserved for GC, the LEB which is reserved for |
651 | * deletions. | 659 | * deletions, and assume only one journal head is available. |
652 | * | ||
653 | * Review 'ubifs_calc_available()' if changing this calculation. | ||
654 | */ | 660 | */ |
655 | tmp64 = c->main_lebs - 2; | 661 | tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1; |
656 | tmp64 *= (uint64_t)c->leb_size - c->dark_wm; | 662 | tmp64 *= (uint64_t)c->leb_size - c->leb_overhead; |
657 | tmp64 = ubifs_reported_space(c, tmp64); | 663 | tmp64 = ubifs_reported_space(c, tmp64); |
658 | c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; | 664 | c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; |
659 | 665 | ||
@@ -842,7 +848,7 @@ enum { | |||
842 | Opt_err, | 848 | Opt_err, |
843 | }; | 849 | }; |
844 | 850 | ||
845 | static match_table_t tokens = { | 851 | static const match_table_t tokens = { |
846 | {Opt_fast_unmount, "fast_unmount"}, | 852 | {Opt_fast_unmount, "fast_unmount"}, |
847 | {Opt_norm_unmount, "norm_unmount"}, | 853 | {Opt_norm_unmount, "norm_unmount"}, |
848 | {Opt_err, NULL}, | 854 | {Opt_err, NULL}, |
@@ -1018,14 +1024,13 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1018 | goto out_dereg; | 1024 | goto out_dereg; |
1019 | } | 1025 | } |
1020 | 1026 | ||
1027 | sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); | ||
1021 | if (!mounted_read_only) { | 1028 | if (!mounted_read_only) { |
1022 | err = alloc_wbufs(c); | 1029 | err = alloc_wbufs(c); |
1023 | if (err) | 1030 | if (err) |
1024 | goto out_cbuf; | 1031 | goto out_cbuf; |
1025 | 1032 | ||
1026 | /* Create background thread */ | 1033 | /* Create background thread */ |
1027 | sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, | ||
1028 | c->vi.vol_id); | ||
1029 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); | 1034 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); |
1030 | if (!c->bgt) | 1035 | if (!c->bgt) |
1031 | c->bgt = ERR_PTR(-EINVAL); | 1036 | c->bgt = ERR_PTR(-EINVAL); |
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index e909f4a96443..7634c5970887 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
@@ -506,7 +506,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, | |||
506 | if (keys_cmp(c, key, &node_key) != 0) | 506 | if (keys_cmp(c, key, &node_key) != 0) |
507 | ret = 0; | 507 | ret = 0; |
508 | } | 508 | } |
509 | if (ret == 0) | 509 | if (ret == 0 && c->replaying) |
510 | dbg_mnt("dangling branch LEB %d:%d len %d, key %s", | 510 | dbg_mnt("dangling branch LEB %d:%d len %d, key %s", |
511 | zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); | 511 | zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); |
512 | return ret; | 512 | return ret; |
@@ -1382,50 +1382,39 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, | |||
1382 | } | 1382 | } |
1383 | 1383 | ||
1384 | /** | 1384 | /** |
1385 | * ubifs_tnc_lookup - look up a file-system node. | 1385 | * maybe_leb_gced - determine if a LEB may have been garbage collected. |
1386 | * @c: UBIFS file-system description object | 1386 | * @c: UBIFS file-system description object |
1387 | * @key: node key to lookup | 1387 | * @lnum: LEB number |
1388 | * @node: the node is returned here | 1388 | * @gc_seq1: garbage collection sequence number |
1389 | * | 1389 | * |
1390 | * This function look up and reads node with key @key. The caller has to make | 1390 | * This function determines if @lnum may have been garbage collected since |
1391 | * sure the @node buffer is large enough to fit the node. Returns zero in case | 1391 | * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise |
1392 | * of success, %-ENOENT if the node was not found, and a negative error code in | 1392 | * %0 is returned. |
1393 | * case of failure. | ||
1394 | */ | 1393 | */ |
1395 | int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, | 1394 | static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) |
1396 | void *node) | ||
1397 | { | 1395 | { |
1398 | int found, n, err; | 1396 | int gc_seq2, gced_lnum; |
1399 | struct ubifs_znode *znode; | ||
1400 | struct ubifs_zbranch zbr, *zt; | ||
1401 | 1397 | ||
1402 | mutex_lock(&c->tnc_mutex); | 1398 | gced_lnum = c->gced_lnum; |
1403 | found = ubifs_lookup_level0(c, key, &znode, &n); | 1399 | smp_rmb(); |
1404 | if (!found) { | 1400 | gc_seq2 = c->gc_seq; |
1405 | err = -ENOENT; | 1401 | /* Same seq means no GC */ |
1406 | goto out; | 1402 | if (gc_seq1 == gc_seq2) |
1407 | } else if (found < 0) { | 1403 | return 0; |
1408 | err = found; | 1404 | /* Different by more than 1 means we don't know */ |
1409 | goto out; | 1405 | if (gc_seq1 + 1 != gc_seq2) |
1410 | } | 1406 | return 1; |
1411 | zt = &znode->zbranch[n]; | 1407 | /* |
1412 | if (is_hash_key(c, key)) { | 1408 | * We have seen the sequence number has increased by 1. Now we need to |
1413 | /* | 1409 | * be sure we read the right LEB number, so read it again. |
1414 | * In this case the leaf node cache gets used, so we pass the | 1410 | */ |
1415 | * address of the zbranch and keep the mutex locked | 1411 | smp_rmb(); |
1416 | */ | 1412 | if (gced_lnum != c->gced_lnum) |
1417 | err = tnc_read_node_nm(c, zt, node); | 1413 | return 1; |
1418 | goto out; | 1414 | /* Finally we can check lnum */ |
1419 | } | 1415 | if (gced_lnum == lnum) |
1420 | zbr = znode->zbranch[n]; | 1416 | return 1; |
1421 | mutex_unlock(&c->tnc_mutex); | 1417 | return 0; |
1422 | |||
1423 | err = ubifs_tnc_read_node(c, &zbr, node); | ||
1424 | return err; | ||
1425 | |||
1426 | out: | ||
1427 | mutex_unlock(&c->tnc_mutex); | ||
1428 | return err; | ||
1429 | } | 1418 | } |
1430 | 1419 | ||
1431 | /** | 1420 | /** |
@@ -1436,16 +1425,19 @@ out: | |||
1436 | * @lnum: LEB number is returned here | 1425 | * @lnum: LEB number is returned here |
1437 | * @offs: offset is returned here | 1426 | * @offs: offset is returned here |
1438 | * | 1427 | * |
1439 | * This function is the same as 'ubifs_tnc_lookup()' but it returns the node | 1428 | * This function look up and reads node with key @key. The caller has to make |
1440 | * location also. See 'ubifs_tnc_lookup()'. | 1429 | * sure the @node buffer is large enough to fit the node. Returns zero in case |
1430 | * of success, %-ENOENT if the node was not found, and a negative error code in | ||
1431 | * case of failure. The node location can be returned in @lnum and @offs. | ||
1441 | */ | 1432 | */ |
1442 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | 1433 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, |
1443 | void *node, int *lnum, int *offs) | 1434 | void *node, int *lnum, int *offs) |
1444 | { | 1435 | { |
1445 | int found, n, err; | 1436 | int found, n, err, safely = 0, gc_seq1; |
1446 | struct ubifs_znode *znode; | 1437 | struct ubifs_znode *znode; |
1447 | struct ubifs_zbranch zbr, *zt; | 1438 | struct ubifs_zbranch zbr, *zt; |
1448 | 1439 | ||
1440 | again: | ||
1449 | mutex_lock(&c->tnc_mutex); | 1441 | mutex_lock(&c->tnc_mutex); |
1450 | found = ubifs_lookup_level0(c, key, &znode, &n); | 1442 | found = ubifs_lookup_level0(c, key, &znode, &n); |
1451 | if (!found) { | 1443 | if (!found) { |
@@ -1456,24 +1448,43 @@ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | |||
1456 | goto out; | 1448 | goto out; |
1457 | } | 1449 | } |
1458 | zt = &znode->zbranch[n]; | 1450 | zt = &znode->zbranch[n]; |
1451 | if (lnum) { | ||
1452 | *lnum = zt->lnum; | ||
1453 | *offs = zt->offs; | ||
1454 | } | ||
1459 | if (is_hash_key(c, key)) { | 1455 | if (is_hash_key(c, key)) { |
1460 | /* | 1456 | /* |
1461 | * In this case the leaf node cache gets used, so we pass the | 1457 | * In this case the leaf node cache gets used, so we pass the |
1462 | * address of the zbranch and keep the mutex locked | 1458 | * address of the zbranch and keep the mutex locked |
1463 | */ | 1459 | */ |
1464 | *lnum = zt->lnum; | ||
1465 | *offs = zt->offs; | ||
1466 | err = tnc_read_node_nm(c, zt, node); | 1460 | err = tnc_read_node_nm(c, zt, node); |
1467 | goto out; | 1461 | goto out; |
1468 | } | 1462 | } |
1463 | if (safely) { | ||
1464 | err = ubifs_tnc_read_node(c, zt, node); | ||
1465 | goto out; | ||
1466 | } | ||
1467 | /* Drop the TNC mutex prematurely and race with garbage collection */ | ||
1469 | zbr = znode->zbranch[n]; | 1468 | zbr = znode->zbranch[n]; |
1469 | gc_seq1 = c->gc_seq; | ||
1470 | mutex_unlock(&c->tnc_mutex); | 1470 | mutex_unlock(&c->tnc_mutex); |
1471 | 1471 | ||
1472 | *lnum = zbr.lnum; | 1472 | if (ubifs_get_wbuf(c, zbr.lnum)) { |
1473 | *offs = zbr.offs; | 1473 | /* We do not GC journal heads */ |
1474 | err = ubifs_tnc_read_node(c, &zbr, node); | ||
1475 | return err; | ||
1476 | } | ||
1474 | 1477 | ||
1475 | err = ubifs_tnc_read_node(c, &zbr, node); | 1478 | err = fallible_read_node(c, key, &zbr, node); |
1476 | return err; | 1479 | if (err <= 0 || maybe_leb_gced(c, zbr.lnum, gc_seq1)) { |
1480 | /* | ||
1481 | * The node may have been GC'ed out from under us so try again | ||
1482 | * while keeping the TNC mutex locked. | ||
1483 | */ | ||
1484 | safely = 1; | ||
1485 | goto again; | ||
1486 | } | ||
1487 | return 0; | ||
1477 | 1488 | ||
1478 | out: | 1489 | out: |
1479 | mutex_unlock(&c->tnc_mutex); | 1490 | mutex_unlock(&c->tnc_mutex); |
@@ -1498,7 +1509,6 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
1498 | { | 1509 | { |
1499 | int found, n, err; | 1510 | int found, n, err; |
1500 | struct ubifs_znode *znode; | 1511 | struct ubifs_znode *znode; |
1501 | struct ubifs_zbranch zbr; | ||
1502 | 1512 | ||
1503 | dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); | 1513 | dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); |
1504 | mutex_lock(&c->tnc_mutex); | 1514 | mutex_lock(&c->tnc_mutex); |
@@ -1522,11 +1532,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
1522 | goto out_unlock; | 1532 | goto out_unlock; |
1523 | } | 1533 | } |
1524 | 1534 | ||
1525 | zbr = znode->zbranch[n]; | 1535 | err = tnc_read_node_nm(c, &znode->zbranch[n], node); |
1526 | mutex_unlock(&c->tnc_mutex); | ||
1527 | |||
1528 | err = tnc_read_node_nm(c, &zbr, node); | ||
1529 | return err; | ||
1530 | 1536 | ||
1531 | out_unlock: | 1537 | out_unlock: |
1532 | mutex_unlock(&c->tnc_mutex); | 1538 | mutex_unlock(&c->tnc_mutex); |
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index bd2121f3426e..a9ecbd9af20d 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h | |||
@@ -87,7 +87,7 @@ | |||
87 | #define UBIFS_SK_LEN 8 | 87 | #define UBIFS_SK_LEN 8 |
88 | 88 | ||
89 | /* Minimum index tree fanout */ | 89 | /* Minimum index tree fanout */ |
90 | #define UBIFS_MIN_FANOUT 2 | 90 | #define UBIFS_MIN_FANOUT 3 |
91 | 91 | ||
92 | /* Maximum number of levels in UBIFS indexing B-tree */ | 92 | /* Maximum number of levels in UBIFS indexing B-tree */ |
93 | #define UBIFS_MAX_LEVELS 512 | 93 | #define UBIFS_MAX_LEVELS 512 |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index d7f706f7a302..17c620b93eec 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -995,6 +995,9 @@ struct ubifs_mount_opts { | |||
995 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary | 995 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary |
996 | * @max_inode_sz: maximum possible inode size in bytes | 996 | * @max_inode_sz: maximum possible inode size in bytes |
997 | * @max_znode_sz: size of znode in bytes | 997 | * @max_znode_sz: size of znode in bytes |
998 | * | ||
999 | * @leb_overhead: how many bytes are wasted in an LEB when it is filled with | ||
1000 | * data nodes of maximum size - used in free space reporting | ||
998 | * @dead_wm: LEB dead space watermark | 1001 | * @dead_wm: LEB dead space watermark |
999 | * @dark_wm: LEB dark space watermark | 1002 | * @dark_wm: LEB dark space watermark |
1000 | * @block_cnt: count of 4KiB blocks on the FS | 1003 | * @block_cnt: count of 4KiB blocks on the FS |
@@ -1028,6 +1031,8 @@ struct ubifs_mount_opts { | |||
1028 | * @sbuf: a buffer of LEB size used by GC and replay for scanning | 1031 | * @sbuf: a buffer of LEB size used by GC and replay for scanning |
1029 | * @idx_gc: list of index LEBs that have been garbage collected | 1032 | * @idx_gc: list of index LEBs that have been garbage collected |
1030 | * @idx_gc_cnt: number of elements on the idx_gc list | 1033 | * @idx_gc_cnt: number of elements on the idx_gc list |
1034 | * @gc_seq: incremented for every non-index LEB garbage collected | ||
1035 | * @gced_lnum: last non-index LEB that was garbage collected | ||
1031 | * | 1036 | * |
1032 | * @infos_list: links all 'ubifs_info' objects | 1037 | * @infos_list: links all 'ubifs_info' objects |
1033 | * @umount_mutex: serializes shrinker and un-mount | 1038 | * @umount_mutex: serializes shrinker and un-mount |
@@ -1224,6 +1229,8 @@ struct ubifs_info { | |||
1224 | int max_idx_node_sz; | 1229 | int max_idx_node_sz; |
1225 | long long max_inode_sz; | 1230 | long long max_inode_sz; |
1226 | int max_znode_sz; | 1231 | int max_znode_sz; |
1232 | |||
1233 | int leb_overhead; | ||
1227 | int dead_wm; | 1234 | int dead_wm; |
1228 | int dark_wm; | 1235 | int dark_wm; |
1229 | int block_cnt; | 1236 | int block_cnt; |
@@ -1257,6 +1264,8 @@ struct ubifs_info { | |||
1257 | void *sbuf; | 1264 | void *sbuf; |
1258 | struct list_head idx_gc; | 1265 | struct list_head idx_gc; |
1259 | int idx_gc_cnt; | 1266 | int idx_gc_cnt; |
1267 | volatile int gc_seq; | ||
1268 | volatile int gced_lnum; | ||
1260 | 1269 | ||
1261 | struct list_head infos_list; | 1270 | struct list_head infos_list; |
1262 | struct mutex umount_mutex; | 1271 | struct mutex umount_mutex; |
@@ -1434,9 +1443,10 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, | |||
1434 | struct ubifs_budget_req *req); | 1443 | struct ubifs_budget_req *req); |
1435 | void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, | 1444 | void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, |
1436 | struct ubifs_budget_req *req); | 1445 | struct ubifs_budget_req *req); |
1437 | long long ubifs_budg_get_free_space(struct ubifs_info *c); | 1446 | long long ubifs_get_free_space(struct ubifs_info *c); |
1438 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c); | 1447 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c); |
1439 | void ubifs_convert_page_budget(struct ubifs_info *c); | 1448 | void ubifs_convert_page_budget(struct ubifs_info *c); |
1449 | long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free); | ||
1440 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); | 1450 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); |
1441 | 1451 | ||
1442 | /* find.c */ | 1452 | /* find.c */ |
@@ -1451,8 +1461,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c); | |||
1451 | /* tnc.c */ | 1461 | /* tnc.c */ |
1452 | int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, | 1462 | int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, |
1453 | struct ubifs_znode **zn, int *n); | 1463 | struct ubifs_znode **zn, int *n); |
1454 | int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, | ||
1455 | void *node); | ||
1456 | int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | 1464 | int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, |
1457 | void *node, const struct qstr *nm); | 1465 | void *node, const struct qstr *nm); |
1458 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | 1466 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, |
diff --git a/fs/udf/file.c b/fs/udf/file.c index 0ed6e146a0d9..eb91f3b70320 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c | |||
@@ -211,6 +211,7 @@ const struct file_operations udf_file_operations = { | |||
211 | .release = udf_release_file, | 211 | .release = udf_release_file, |
212 | .fsync = udf_fsync_file, | 212 | .fsync = udf_fsync_file, |
213 | .splice_read = generic_file_splice_read, | 213 | .splice_read = generic_file_splice_read, |
214 | .llseek = generic_file_llseek, | ||
214 | }; | 215 | }; |
215 | 216 | ||
216 | const struct inode_operations udf_file_inode_operations = { | 217 | const struct inode_operations udf_file_inode_operations = { |
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index eb9cfa23dc3d..a4f2b3ce45b0 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c | |||
@@ -76,11 +76,24 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
76 | *err = -ENOSPC; | 76 | *err = -ENOSPC; |
77 | 77 | ||
78 | iinfo = UDF_I(inode); | 78 | iinfo = UDF_I(inode); |
79 | iinfo->i_unique = 0; | 79 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { |
80 | iinfo->i_lenExtents = 0; | 80 | iinfo->i_efe = 1; |
81 | iinfo->i_next_alloc_block = 0; | 81 | if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev) |
82 | iinfo->i_next_alloc_goal = 0; | 82 | sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE; |
83 | iinfo->i_strat4096 = 0; | 83 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - |
84 | sizeof(struct extendedFileEntry), | ||
85 | GFP_KERNEL); | ||
86 | } else { | ||
87 | iinfo->i_efe = 0; | ||
88 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - | ||
89 | sizeof(struct fileEntry), | ||
90 | GFP_KERNEL); | ||
91 | } | ||
92 | if (!iinfo->i_ext.i_data) { | ||
93 | iput(inode); | ||
94 | *err = -ENOMEM; | ||
95 | return NULL; | ||
96 | } | ||
84 | 97 | ||
85 | block = udf_new_block(dir->i_sb, NULL, | 98 | block = udf_new_block(dir->i_sb, NULL, |
86 | dinfo->i_location.partitionReferenceNum, | 99 | dinfo->i_location.partitionReferenceNum, |
@@ -111,6 +124,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
111 | lvhd->uniqueID = cpu_to_le64(uniqueID); | 124 | lvhd->uniqueID = cpu_to_le64(uniqueID); |
112 | mark_buffer_dirty(sbi->s_lvid_bh); | 125 | mark_buffer_dirty(sbi->s_lvid_bh); |
113 | } | 126 | } |
127 | mutex_unlock(&sbi->s_alloc_mutex); | ||
114 | inode->i_mode = mode; | 128 | inode->i_mode = mode; |
115 | inode->i_uid = current->fsuid; | 129 | inode->i_uid = current->fsuid; |
116 | if (dir->i_mode & S_ISGID) { | 130 | if (dir->i_mode & S_ISGID) { |
@@ -129,25 +143,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
129 | iinfo->i_lenEAttr = 0; | 143 | iinfo->i_lenEAttr = 0; |
130 | iinfo->i_lenAlloc = 0; | 144 | iinfo->i_lenAlloc = 0; |
131 | iinfo->i_use = 0; | 145 | iinfo->i_use = 0; |
132 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { | ||
133 | iinfo->i_efe = 1; | ||
134 | if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev) | ||
135 | sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE; | ||
136 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - | ||
137 | sizeof(struct extendedFileEntry), | ||
138 | GFP_KERNEL); | ||
139 | } else { | ||
140 | iinfo->i_efe = 0; | ||
141 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - | ||
142 | sizeof(struct fileEntry), | ||
143 | GFP_KERNEL); | ||
144 | } | ||
145 | if (!iinfo->i_ext.i_data) { | ||
146 | iput(inode); | ||
147 | *err = -ENOMEM; | ||
148 | mutex_unlock(&sbi->s_alloc_mutex); | ||
149 | return NULL; | ||
150 | } | ||
151 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) | 146 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) |
152 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; | 147 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; |
153 | else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) | 148 | else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) |
@@ -158,7 +153,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
158 | iinfo->i_crtime = current_fs_time(inode->i_sb); | 153 | iinfo->i_crtime = current_fs_time(inode->i_sb); |
159 | insert_inode_hash(inode); | 154 | insert_inode_hash(inode); |
160 | mark_inode_dirty(inode); | 155 | mark_inode_dirty(inode); |
161 | mutex_unlock(&sbi->s_alloc_mutex); | ||
162 | 156 | ||
163 | if (DQUOT_ALLOC_INODE(inode)) { | 157 | if (DQUOT_ALLOC_INODE(inode)) { |
164 | DQUOT_DROP(inode); | 158 | DQUOT_DROP(inode); |
diff --git a/fs/udf/super.c b/fs/udf/super.c index 5698bbf83bbf..e25e7010627b 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
@@ -369,7 +369,7 @@ enum { | |||
369 | Opt_err, Opt_uforget, Opt_uignore, Opt_gforget, Opt_gignore | 369 | Opt_err, Opt_uforget, Opt_uignore, Opt_gforget, Opt_gignore |
370 | }; | 370 | }; |
371 | 371 | ||
372 | static match_table_t tokens = { | 372 | static const match_table_t tokens = { |
373 | {Opt_novrs, "novrs"}, | 373 | {Opt_novrs, "novrs"}, |
374 | {Opt_nostrict, "nostrict"}, | 374 | {Opt_nostrict, "nostrict"}, |
375 | {Opt_bs, "bs=%u"}, | 375 | {Opt_bs, "bs=%u"}, |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 3141969b456d..e65212dfb60e 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
@@ -309,7 +309,7 @@ enum { | |||
309 | Opt_err | 309 | Opt_err |
310 | }; | 310 | }; |
311 | 311 | ||
312 | static match_table_t tokens = { | 312 | static const match_table_t tokens = { |
313 | {Opt_type_old, "ufstype=old"}, | 313 | {Opt_type_old, "ufstype=old"}, |
314 | {Opt_type_sunx86, "ufstype=sunx86"}, | 314 | {Opt_type_sunx86, "ufstype=sunx86"}, |
315 | {Opt_type_sun, "ufstype=sun"}, | 315 | {Opt_type_sun, "ufstype=sun"}, |
@@ -1233,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
1233 | { | 1233 | { |
1234 | struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); | 1234 | struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); |
1235 | unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; | 1235 | unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; |
1236 | struct match_token *tp = tokens; | 1236 | const struct match_token *tp = tokens; |
1237 | 1237 | ||
1238 | while (tp->token != Opt_onerror_panic && tp->token != mval) | 1238 | while (tp->token != Opt_onerror_panic && tp->token != mval) |
1239 | ++tp; | 1239 | ++tp; |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index f42f80a3b1fa..a44d68eb50b5 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -1338,6 +1338,10 @@ __xfs_get_blocks( | |||
1338 | offset = (xfs_off_t)iblock << inode->i_blkbits; | 1338 | offset = (xfs_off_t)iblock << inode->i_blkbits; |
1339 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); | 1339 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); |
1340 | size = bh_result->b_size; | 1340 | size = bh_result->b_size; |
1341 | |||
1342 | if (!create && direct && offset >= i_size_read(inode)) | ||
1343 | return 0; | ||
1344 | |||
1341 | error = xfs_iomap(XFS_I(inode), offset, size, | 1345 | error = xfs_iomap(XFS_I(inode), offset, size, |
1342 | create ? flags : BMAPI_READ, &iomap, &niomap); | 1346 | create ? flags : BMAPI_READ, &iomap, &niomap); |
1343 | if (error) | 1347 | if (error) |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 986061ae1b9b..36d5fcd3f593 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -1001,12 +1001,13 @@ xfs_buf_iodone_work( | |||
1001 | * We can get an EOPNOTSUPP to ordered writes. Here we clear the | 1001 | * We can get an EOPNOTSUPP to ordered writes. Here we clear the |
1002 | * ordered flag and reissue them. Because we can't tell the higher | 1002 | * ordered flag and reissue them. Because we can't tell the higher |
1003 | * layers directly that they should not issue ordered I/O anymore, they | 1003 | * layers directly that they should not issue ordered I/O anymore, they |
1004 | * need to check if the ordered flag was cleared during I/O completion. | 1004 | * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion. |
1005 | */ | 1005 | */ |
1006 | if ((bp->b_error == EOPNOTSUPP) && | 1006 | if ((bp->b_error == EOPNOTSUPP) && |
1007 | (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { | 1007 | (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { |
1008 | XB_TRACE(bp, "ordered_retry", bp->b_iodone); | 1008 | XB_TRACE(bp, "ordered_retry", bp->b_iodone); |
1009 | bp->b_flags &= ~XBF_ORDERED; | 1009 | bp->b_flags &= ~XBF_ORDERED; |
1010 | bp->b_flags |= _XFS_BARRIER_FAILED; | ||
1010 | xfs_buf_iorequest(bp); | 1011 | xfs_buf_iorequest(bp); |
1011 | } else if (bp->b_iodone) | 1012 | } else if (bp->b_iodone) |
1012 | (*(bp->b_iodone))(bp); | 1013 | (*(bp->b_iodone))(bp); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index fe0109956656..456519a088c7 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -85,6 +85,14 @@ typedef enum { | |||
85 | * modifications being lost. | 85 | * modifications being lost. |
86 | */ | 86 | */ |
87 | _XBF_PAGE_LOCKED = (1 << 22), | 87 | _XBF_PAGE_LOCKED = (1 << 22), |
88 | |||
89 | /* | ||
90 | * If we try a barrier write, but it fails we have to communicate | ||
91 | * this to the upper layers. Unfortunately b_error gets overwritten | ||
92 | * when the buffer is re-issued so we have to add another flag to | ||
93 | * keep this information. | ||
94 | */ | ||
95 | _XFS_BARRIER_FAILED = (1 << 23), | ||
88 | } xfs_buf_flags_t; | 96 | } xfs_buf_flags_t; |
89 | 97 | ||
90 | typedef enum { | 98 | typedef enum { |
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 5f60363b9343..5311c1acdd40 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -475,6 +475,7 @@ const struct file_operations xfs_invis_file_operations = { | |||
475 | const struct file_operations xfs_dir_file_operations = { | 475 | const struct file_operations xfs_dir_file_operations = { |
476 | .read = generic_read_dir, | 476 | .read = generic_read_dir, |
477 | .readdir = xfs_file_readdir, | 477 | .readdir = xfs_file_readdir, |
478 | .llseek = generic_file_llseek, | ||
478 | .unlocked_ioctl = xfs_file_ioctl, | 479 | .unlocked_ioctl = xfs_file_ioctl, |
479 | #ifdef CONFIG_COMPAT | 480 | #ifdef CONFIG_COMPAT |
480 | .compat_ioctl = xfs_file_compat_ioctl, | 481 | .compat_ioctl = xfs_file_compat_ioctl, |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 91bcd979242c..095d271f3434 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -355,7 +355,7 @@ xfs_vn_ci_lookup( | |||
355 | /* else case-insensitive match... */ | 355 | /* else case-insensitive match... */ |
356 | dname.name = ci_name.name; | 356 | dname.name = ci_name.name; |
357 | dname.len = ci_name.len; | 357 | dname.len = ci_name.len; |
358 | dentry = d_add_ci(VFS_I(ip), dentry, &dname); | 358 | dentry = d_add_ci(dentry, VFS_I(ip), &dname); |
359 | kmem_free(ci_name.name); | 359 | kmem_free(ci_name.name); |
360 | return dentry; | 360 | return dentry; |
361 | } | 361 | } |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 73c65f19e549..e39013619b26 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -158,7 +158,7 @@ enum { | |||
158 | Opt_barrier, Opt_nobarrier, Opt_err | 158 | Opt_barrier, Opt_nobarrier, Opt_err |
159 | }; | 159 | }; |
160 | 160 | ||
161 | static match_table_t tokens = { | 161 | static const match_table_t tokens = { |
162 | {Opt_barrier, "barrier"}, | 162 | {Opt_barrier, "barrier"}, |
163 | {Opt_nobarrier, "nobarrier"}, | 163 | {Opt_nobarrier, "nobarrier"}, |
164 | {Opt_err, NULL} | 164 | {Opt_err, NULL} |
@@ -1302,9 +1302,29 @@ xfs_fs_remount( | |||
1302 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | 1302 | mp->m_flags &= ~XFS_MOUNT_BARRIER; |
1303 | break; | 1303 | break; |
1304 | default: | 1304 | default: |
1305 | /* | ||
1306 | * Logically we would return an error here to prevent | ||
1307 | * users from believing they might have changed | ||
1308 | * mount options using remount which can't be changed. | ||
1309 | * | ||
1310 | * But unfortunately mount(8) adds all options from | ||
1311 | * mtab and fstab to the mount arguments in some cases | ||
1312 | * so we can't blindly reject options, but have to | ||
1313 | * check for each specified option if it actually | ||
1314 | * differs from the currently set option and only | ||
1315 | * reject it if that's the case. | ||
1316 | * | ||
1317 | * Until that is implemented we return success for | ||
1318 | * every remount request, and silently ignore all | ||
1319 | * options that we can't actually change. | ||
1320 | */ | ||
1321 | #if 0 | ||
1305 | printk(KERN_INFO | 1322 | printk(KERN_INFO |
1306 | "XFS: mount option \"%s\" not supported for remount\n", p); | 1323 | "XFS: mount option \"%s\" not supported for remount\n", p); |
1307 | return -EINVAL; | 1324 | return -EINVAL; |
1325 | #else | ||
1326 | break; | ||
1327 | #endif | ||
1308 | } | 1328 | } |
1309 | } | 1329 | } |
1310 | 1330 | ||
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 608c30c3f76b..002fc2617c8e 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -732,6 +732,7 @@ xfs_buf_item_init( | |||
732 | bip->bli_item.li_ops = &xfs_buf_item_ops; | 732 | bip->bli_item.li_ops = &xfs_buf_item_ops; |
733 | bip->bli_item.li_mountp = mp; | 733 | bip->bli_item.li_mountp = mp; |
734 | bip->bli_buf = bp; | 734 | bip->bli_buf = bp; |
735 | xfs_buf_hold(bp); | ||
735 | bip->bli_format.blf_type = XFS_LI_BUF; | 736 | bip->bli_format.blf_type = XFS_LI_BUF; |
736 | bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); | 737 | bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); |
737 | bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); | 738 | bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); |
@@ -867,6 +868,21 @@ xfs_buf_item_dirty( | |||
867 | return (bip->bli_flags & XFS_BLI_DIRTY); | 868 | return (bip->bli_flags & XFS_BLI_DIRTY); |
868 | } | 869 | } |
869 | 870 | ||
871 | STATIC void | ||
872 | xfs_buf_item_free( | ||
873 | xfs_buf_log_item_t *bip) | ||
874 | { | ||
875 | #ifdef XFS_TRANS_DEBUG | ||
876 | kmem_free(bip->bli_orig); | ||
877 | kmem_free(bip->bli_logged); | ||
878 | #endif /* XFS_TRANS_DEBUG */ | ||
879 | |||
880 | #ifdef XFS_BLI_TRACE | ||
881 | ktrace_free(bip->bli_trace); | ||
882 | #endif | ||
883 | kmem_zone_free(xfs_buf_item_zone, bip); | ||
884 | } | ||
885 | |||
870 | /* | 886 | /* |
871 | * This is called when the buf log item is no longer needed. It should | 887 | * This is called when the buf log item is no longer needed. It should |
872 | * free the buf log item associated with the given buffer and clear | 888 | * free the buf log item associated with the given buffer and clear |
@@ -887,18 +903,8 @@ xfs_buf_item_relse( | |||
887 | (XFS_BUF_IODONE_FUNC(bp) != NULL)) { | 903 | (XFS_BUF_IODONE_FUNC(bp) != NULL)) { |
888 | XFS_BUF_CLR_IODONE_FUNC(bp); | 904 | XFS_BUF_CLR_IODONE_FUNC(bp); |
889 | } | 905 | } |
890 | 906 | xfs_buf_rele(bp); | |
891 | #ifdef XFS_TRANS_DEBUG | 907 | xfs_buf_item_free(bip); |
892 | kmem_free(bip->bli_orig); | ||
893 | bip->bli_orig = NULL; | ||
894 | kmem_free(bip->bli_logged); | ||
895 | bip->bli_logged = NULL; | ||
896 | #endif /* XFS_TRANS_DEBUG */ | ||
897 | |||
898 | #ifdef XFS_BLI_TRACE | ||
899 | ktrace_free(bip->bli_trace); | ||
900 | #endif | ||
901 | kmem_zone_free(xfs_buf_item_zone, bip); | ||
902 | } | 908 | } |
903 | 909 | ||
904 | 910 | ||
@@ -1120,6 +1126,7 @@ xfs_buf_iodone( | |||
1120 | 1126 | ||
1121 | ASSERT(bip->bli_buf == bp); | 1127 | ASSERT(bip->bli_buf == bp); |
1122 | 1128 | ||
1129 | xfs_buf_rele(bp); | ||
1123 | mp = bip->bli_item.li_mountp; | 1130 | mp = bip->bli_item.li_mountp; |
1124 | 1131 | ||
1125 | /* | 1132 | /* |
@@ -1136,18 +1143,7 @@ xfs_buf_iodone( | |||
1136 | * xfs_trans_delete_ail() drops the AIL lock. | 1143 | * xfs_trans_delete_ail() drops the AIL lock. |
1137 | */ | 1144 | */ |
1138 | xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); | 1145 | xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); |
1139 | 1146 | xfs_buf_item_free(bip); | |
1140 | #ifdef XFS_TRANS_DEBUG | ||
1141 | kmem_free(bip->bli_orig); | ||
1142 | bip->bli_orig = NULL; | ||
1143 | kmem_free(bip->bli_logged); | ||
1144 | bip->bli_logged = NULL; | ||
1145 | #endif /* XFS_TRANS_DEBUG */ | ||
1146 | |||
1147 | #ifdef XFS_BLI_TRACE | ||
1148 | ktrace_free(bip->bli_trace); | ||
1149 | #endif | ||
1150 | kmem_zone_free(xfs_buf_item_zone, bip); | ||
1151 | } | 1147 | } |
1152 | 1148 | ||
1153 | #if defined(XFS_BLI_TRACE) | 1149 | #if defined(XFS_BLI_TRACE) |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 760f4c5b5160..75b0cd4da0ea 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -149,7 +149,14 @@ xfs_swap_extents( | |||
149 | 149 | ||
150 | sbp = &sxp->sx_stat; | 150 | sbp = &sxp->sx_stat; |
151 | 151 | ||
152 | xfs_lock_two_inodes(ip, tip, lock_flags); | 152 | /* |
153 | * we have to do two separate lock calls here to keep lockdep | ||
154 | * happy. If we try to get all the locks in one call, lock will | ||
155 | * report false positives when we drop the ILOCK and regain them | ||
156 | * below. | ||
157 | */ | ||
158 | xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); | ||
159 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); | ||
153 | locked = 1; | 160 | locked = 1; |
154 | 161 | ||
155 | /* Verify that both files have the same format */ | 162 | /* Verify that both files have the same format */ |
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h index cdc2d3464a1a..2813cdd72375 100644 --- a/fs/xfs/xfs_dmapi.h +++ b/fs/xfs/xfs_dmapi.h | |||
@@ -18,7 +18,6 @@ | |||
18 | #ifndef __XFS_DMAPI_H__ | 18 | #ifndef __XFS_DMAPI_H__ |
19 | #define __XFS_DMAPI_H__ | 19 | #define __XFS_DMAPI_H__ |
20 | 20 | ||
21 | #include <linux/version.h> | ||
22 | /* Values used to define the on-disk version of dm_attrname_t. All | 21 | /* Values used to define the on-disk version of dm_attrname_t. All |
23 | * on-disk attribute names start with the 8-byte string "SGI_DMI_". | 22 | * on-disk attribute names start with the 8-byte string "SGI_DMI_". |
24 | * | 23 | * |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 00e80df9dd9d..dbd9cef852ec 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -4118,7 +4118,7 @@ xfs_iext_indirect_to_direct( | |||
4118 | ASSERT(nextents <= XFS_LINEAR_EXTS); | 4118 | ASSERT(nextents <= XFS_LINEAR_EXTS); |
4119 | size = nextents * sizeof(xfs_bmbt_rec_t); | 4119 | size = nextents * sizeof(xfs_bmbt_rec_t); |
4120 | 4120 | ||
4121 | xfs_iext_irec_compact_full(ifp); | 4121 | xfs_iext_irec_compact_pages(ifp); |
4122 | ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); | 4122 | ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); |
4123 | 4123 | ||
4124 | ep = ifp->if_u1.if_ext_irec->er_extbuf; | 4124 | ep = ifp->if_u1.if_ext_irec->er_extbuf; |
@@ -4449,8 +4449,7 @@ xfs_iext_irec_remove( | |||
4449 | * compaction policy is as follows: | 4449 | * compaction policy is as follows: |
4450 | * | 4450 | * |
4451 | * Full Compaction: Extents fit into a single page (or inline buffer) | 4451 | * Full Compaction: Extents fit into a single page (or inline buffer) |
4452 | * Full Compaction: Extents occupy less than 10% of allocated space | 4452 | * Partial Compaction: Extents occupy less than 50% of allocated space |
4453 | * Partial Compaction: Extents occupy > 10% and < 50% of allocated space | ||
4454 | * No Compaction: Extents occupy at least 50% of allocated space | 4453 | * No Compaction: Extents occupy at least 50% of allocated space |
4455 | */ | 4454 | */ |
4456 | void | 4455 | void |
@@ -4471,8 +4470,6 @@ xfs_iext_irec_compact( | |||
4471 | xfs_iext_direct_to_inline(ifp, nextents); | 4470 | xfs_iext_direct_to_inline(ifp, nextents); |
4472 | } else if (nextents <= XFS_LINEAR_EXTS) { | 4471 | } else if (nextents <= XFS_LINEAR_EXTS) { |
4473 | xfs_iext_indirect_to_direct(ifp); | 4472 | xfs_iext_indirect_to_direct(ifp); |
4474 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) { | ||
4475 | xfs_iext_irec_compact_full(ifp); | ||
4476 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { | 4473 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { |
4477 | xfs_iext_irec_compact_pages(ifp); | 4474 | xfs_iext_irec_compact_pages(ifp); |
4478 | } | 4475 | } |
@@ -4496,7 +4493,7 @@ xfs_iext_irec_compact_pages( | |||
4496 | erp_next = erp + 1; | 4493 | erp_next = erp + 1; |
4497 | if (erp_next->er_extcount <= | 4494 | if (erp_next->er_extcount <= |
4498 | (XFS_LINEAR_EXTS - erp->er_extcount)) { | 4495 | (XFS_LINEAR_EXTS - erp->er_extcount)) { |
4499 | memmove(&erp->er_extbuf[erp->er_extcount], | 4496 | memcpy(&erp->er_extbuf[erp->er_extcount], |
4500 | erp_next->er_extbuf, erp_next->er_extcount * | 4497 | erp_next->er_extbuf, erp_next->er_extcount * |
4501 | sizeof(xfs_bmbt_rec_t)); | 4498 | sizeof(xfs_bmbt_rec_t)); |
4502 | erp->er_extcount += erp_next->er_extcount; | 4499 | erp->er_extcount += erp_next->er_extcount; |
@@ -4516,91 +4513,6 @@ xfs_iext_irec_compact_pages( | |||
4516 | } | 4513 | } |
4517 | 4514 | ||
4518 | /* | 4515 | /* |
4519 | * Fully compact the extent records managed by the indirection array. | ||
4520 | */ | ||
4521 | void | ||
4522 | xfs_iext_irec_compact_full( | ||
4523 | xfs_ifork_t *ifp) /* inode fork pointer */ | ||
4524 | { | ||
4525 | xfs_bmbt_rec_host_t *ep, *ep_next; /* extent record pointers */ | ||
4526 | xfs_ext_irec_t *erp, *erp_next; /* extent irec pointers */ | ||
4527 | int erp_idx = 0; /* extent irec index */ | ||
4528 | int ext_avail; /* empty entries in ex list */ | ||
4529 | int ext_diff; /* number of exts to add */ | ||
4530 | int nlists; /* number of irec's (ex lists) */ | ||
4531 | |||
4532 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
4533 | |||
4534 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
4535 | erp = ifp->if_u1.if_ext_irec; | ||
4536 | ep = &erp->er_extbuf[erp->er_extcount]; | ||
4537 | erp_next = erp + 1; | ||
4538 | ep_next = erp_next->er_extbuf; | ||
4539 | |||
4540 | while (erp_idx < nlists - 1) { | ||
4541 | /* | ||
4542 | * Check how many extent records are available in this irec. | ||
4543 | * If there is none skip the whole exercise. | ||
4544 | */ | ||
4545 | ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; | ||
4546 | if (ext_avail) { | ||
4547 | |||
4548 | /* | ||
4549 | * Copy over as many as possible extent records into | ||
4550 | * the previous page. | ||
4551 | */ | ||
4552 | ext_diff = MIN(ext_avail, erp_next->er_extcount); | ||
4553 | memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t)); | ||
4554 | erp->er_extcount += ext_diff; | ||
4555 | erp_next->er_extcount -= ext_diff; | ||
4556 | |||
4557 | /* | ||
4558 | * If the next irec is empty now we can simply | ||
4559 | * remove it. | ||
4560 | */ | ||
4561 | if (erp_next->er_extcount == 0) { | ||
4562 | /* | ||
4563 | * Free page before removing extent record | ||
4564 | * so er_extoffs don't get modified in | ||
4565 | * xfs_iext_irec_remove. | ||
4566 | */ | ||
4567 | kmem_free(erp_next->er_extbuf); | ||
4568 | erp_next->er_extbuf = NULL; | ||
4569 | xfs_iext_irec_remove(ifp, erp_idx + 1); | ||
4570 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
4571 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
4572 | |||
4573 | /* | ||
4574 | * If the next irec is not empty move up the content | ||
4575 | * that has not been copied to the previous page to | ||
4576 | * the beggining of this one. | ||
4577 | */ | ||
4578 | } else { | ||
4579 | memmove(erp_next->er_extbuf, &ep_next[ext_diff], | ||
4580 | erp_next->er_extcount * | ||
4581 | sizeof(xfs_bmbt_rec_t)); | ||
4582 | ep_next = erp_next->er_extbuf; | ||
4583 | memset(&ep_next[erp_next->er_extcount], 0, | ||
4584 | (XFS_LINEAR_EXTS - | ||
4585 | erp_next->er_extcount) * | ||
4586 | sizeof(xfs_bmbt_rec_t)); | ||
4587 | } | ||
4588 | } | ||
4589 | |||
4590 | if (erp->er_extcount == XFS_LINEAR_EXTS) { | ||
4591 | erp_idx++; | ||
4592 | if (erp_idx < nlists) | ||
4593 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
4594 | else | ||
4595 | break; | ||
4596 | } | ||
4597 | ep = &erp->er_extbuf[erp->er_extcount]; | ||
4598 | erp_next = erp + 1; | ||
4599 | ep_next = erp_next->er_extbuf; | ||
4600 | } | ||
4601 | } | ||
4602 | |||
4603 | /* | ||
4604 | * This is called to update the er_extoff field in the indirection | 4516 | * This is called to update the er_extoff field in the indirection |
4605 | * array when extents have been added or removed from one of the | 4517 | * array when extents have been added or removed from one of the |
4606 | * extent lists. erp_idx contains the irec index to begin updating | 4518 | * extent lists. erp_idx contains the irec index to begin updating |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index ccba14eb9dbe..0b02c6443551 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -124,16 +124,27 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, | |||
124 | STATIC int xlog_iclogs_empty(xlog_t *log); | 124 | STATIC int xlog_iclogs_empty(xlog_t *log); |
125 | 125 | ||
126 | #if defined(XFS_LOG_TRACE) | 126 | #if defined(XFS_LOG_TRACE) |
127 | |||
128 | #define XLOG_TRACE_LOGGRANT_SIZE 2048 | ||
129 | #define XLOG_TRACE_ICLOG_SIZE 256 | ||
130 | |||
131 | void | ||
132 | xlog_trace_loggrant_alloc(xlog_t *log) | ||
133 | { | ||
134 | log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS); | ||
135 | } | ||
136 | |||
137 | void | ||
138 | xlog_trace_loggrant_dealloc(xlog_t *log) | ||
139 | { | ||
140 | ktrace_free(log->l_grant_trace); | ||
141 | } | ||
142 | |||
127 | void | 143 | void |
128 | xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) | 144 | xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) |
129 | { | 145 | { |
130 | unsigned long cnts; | 146 | unsigned long cnts; |
131 | 147 | ||
132 | if (!log->l_grant_trace) { | ||
133 | log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP); | ||
134 | if (!log->l_grant_trace) | ||
135 | return; | ||
136 | } | ||
137 | /* ticket counts are 1 byte each */ | 148 | /* ticket counts are 1 byte each */ |
138 | cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; | 149 | cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; |
139 | 150 | ||
@@ -157,10 +168,20 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) | |||
157 | } | 168 | } |
158 | 169 | ||
159 | void | 170 | void |
171 | xlog_trace_iclog_alloc(xlog_in_core_t *iclog) | ||
172 | { | ||
173 | iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS); | ||
174 | } | ||
175 | |||
176 | void | ||
177 | xlog_trace_iclog_dealloc(xlog_in_core_t *iclog) | ||
178 | { | ||
179 | ktrace_free(iclog->ic_trace); | ||
180 | } | ||
181 | |||
182 | void | ||
160 | xlog_trace_iclog(xlog_in_core_t *iclog, uint state) | 183 | xlog_trace_iclog(xlog_in_core_t *iclog, uint state) |
161 | { | 184 | { |
162 | if (!iclog->ic_trace) | ||
163 | iclog->ic_trace = ktrace_alloc(256, KM_NOFS); | ||
164 | ktrace_enter(iclog->ic_trace, | 185 | ktrace_enter(iclog->ic_trace, |
165 | (void *)((unsigned long)state), | 186 | (void *)((unsigned long)state), |
166 | (void *)((unsigned long)current_pid()), | 187 | (void *)((unsigned long)current_pid()), |
@@ -170,8 +191,15 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state) | |||
170 | (void *)NULL, (void *)NULL); | 191 | (void *)NULL, (void *)NULL); |
171 | } | 192 | } |
172 | #else | 193 | #else |
194 | |||
195 | #define xlog_trace_loggrant_alloc(log) | ||
196 | #define xlog_trace_loggrant_dealloc(log) | ||
173 | #define xlog_trace_loggrant(log,tic,string) | 197 | #define xlog_trace_loggrant(log,tic,string) |
198 | |||
199 | #define xlog_trace_iclog_alloc(iclog) | ||
200 | #define xlog_trace_iclog_dealloc(iclog) | ||
174 | #define xlog_trace_iclog(iclog,state) | 201 | #define xlog_trace_iclog(iclog,state) |
202 | |||
175 | #endif /* XFS_LOG_TRACE */ | 203 | #endif /* XFS_LOG_TRACE */ |
176 | 204 | ||
177 | 205 | ||
@@ -1005,11 +1033,12 @@ xlog_iodone(xfs_buf_t *bp) | |||
1005 | l = iclog->ic_log; | 1033 | l = iclog->ic_log; |
1006 | 1034 | ||
1007 | /* | 1035 | /* |
1008 | * If the ordered flag has been removed by a lower | 1036 | * If the _XFS_BARRIER_FAILED flag was set by a lower |
1009 | * layer, it means the underlyin device no longer supports | 1037 | * layer, it means the underlying device no longer supports |
1010 | * barrier I/O. Warn loudly and turn off barriers. | 1038 | * barrier I/O. Warn loudly and turn off barriers. |
1011 | */ | 1039 | */ |
1012 | if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ORDERED(bp)) { | 1040 | if (bp->b_flags & _XFS_BARRIER_FAILED) { |
1041 | bp->b_flags &= ~_XFS_BARRIER_FAILED; | ||
1013 | l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; | 1042 | l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; |
1014 | xfs_fs_cmn_err(CE_WARN, l->l_mp, | 1043 | xfs_fs_cmn_err(CE_WARN, l->l_mp, |
1015 | "xlog_iodone: Barriers are no longer supported" | 1044 | "xlog_iodone: Barriers are no longer supported" |
@@ -1231,6 +1260,7 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1231 | spin_lock_init(&log->l_grant_lock); | 1260 | spin_lock_init(&log->l_grant_lock); |
1232 | sv_init(&log->l_flush_wait, 0, "flush_wait"); | 1261 | sv_init(&log->l_flush_wait, 0, "flush_wait"); |
1233 | 1262 | ||
1263 | xlog_trace_loggrant_alloc(log); | ||
1234 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ | 1264 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ |
1235 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); | 1265 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); |
1236 | 1266 | ||
@@ -1285,6 +1315,8 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1285 | sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); | 1315 | sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); |
1286 | sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); | 1316 | sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); |
1287 | 1317 | ||
1318 | xlog_trace_iclog_alloc(iclog); | ||
1319 | |||
1288 | iclogp = &iclog->ic_next; | 1320 | iclogp = &iclog->ic_next; |
1289 | } | 1321 | } |
1290 | *iclogp = log->l_iclog; /* complete ring */ | 1322 | *iclogp = log->l_iclog; /* complete ring */ |
@@ -1565,11 +1597,7 @@ xlog_dealloc_log(xlog_t *log) | |||
1565 | sv_destroy(&iclog->ic_force_wait); | 1597 | sv_destroy(&iclog->ic_force_wait); |
1566 | sv_destroy(&iclog->ic_write_wait); | 1598 | sv_destroy(&iclog->ic_write_wait); |
1567 | xfs_buf_free(iclog->ic_bp); | 1599 | xfs_buf_free(iclog->ic_bp); |
1568 | #ifdef XFS_LOG_TRACE | 1600 | xlog_trace_iclog_dealloc(iclog); |
1569 | if (iclog->ic_trace != NULL) { | ||
1570 | ktrace_free(iclog->ic_trace); | ||
1571 | } | ||
1572 | #endif | ||
1573 | next_iclog = iclog->ic_next; | 1601 | next_iclog = iclog->ic_next; |
1574 | kmem_free(iclog); | 1602 | kmem_free(iclog); |
1575 | iclog = next_iclog; | 1603 | iclog = next_iclog; |
@@ -1578,14 +1606,7 @@ xlog_dealloc_log(xlog_t *log) | |||
1578 | spinlock_destroy(&log->l_grant_lock); | 1606 | spinlock_destroy(&log->l_grant_lock); |
1579 | 1607 | ||
1580 | xfs_buf_free(log->l_xbuf); | 1608 | xfs_buf_free(log->l_xbuf); |
1581 | #ifdef XFS_LOG_TRACE | 1609 | xlog_trace_loggrant_dealloc(log); |
1582 | if (log->l_trace != NULL) { | ||
1583 | ktrace_free(log->l_trace); | ||
1584 | } | ||
1585 | if (log->l_grant_trace != NULL) { | ||
1586 | ktrace_free(log->l_grant_trace); | ||
1587 | } | ||
1588 | #endif | ||
1589 | log->l_mp->m_log = NULL; | 1610 | log->l_mp->m_log = NULL; |
1590 | kmem_free(log); | 1611 | kmem_free(log); |
1591 | } /* xlog_dealloc_log */ | 1612 | } /* xlog_dealloc_log */ |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index c8a5b22ee3e3..e7d8f84443fa 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -448,7 +448,6 @@ typedef struct log { | |||
448 | int l_grant_write_bytes; | 448 | int l_grant_write_bytes; |
449 | 449 | ||
450 | #ifdef XFS_LOG_TRACE | 450 | #ifdef XFS_LOG_TRACE |
451 | struct ktrace *l_trace; | ||
452 | struct ktrace *l_grant_trace; | 451 | struct ktrace *l_grant_trace; |
453 | #endif | 452 | #endif |
454 | 453 | ||
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index aa238c8fbd7a..8b6812f66a15 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -1838,6 +1838,12 @@ again: | |||
1838 | #endif | 1838 | #endif |
1839 | } | 1839 | } |
1840 | 1840 | ||
1841 | /* | ||
1842 | * xfs_lock_two_inodes() can only be used to lock one type of lock | ||
1843 | * at a time - the iolock or the ilock, but not both at once. If | ||
1844 | * we lock both at once, lockdep will report false positives saying | ||
1845 | * we have violated locking orders. | ||
1846 | */ | ||
1841 | void | 1847 | void |
1842 | xfs_lock_two_inodes( | 1848 | xfs_lock_two_inodes( |
1843 | xfs_inode_t *ip0, | 1849 | xfs_inode_t *ip0, |
@@ -1848,6 +1854,8 @@ xfs_lock_two_inodes( | |||
1848 | int attempts = 0; | 1854 | int attempts = 0; |
1849 | xfs_log_item_t *lp; | 1855 | xfs_log_item_t *lp; |
1850 | 1856 | ||
1857 | if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) | ||
1858 | ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); | ||
1851 | ASSERT(ip0->i_ino != ip1->i_ino); | 1859 | ASSERT(ip0->i_ino != ip1->i_ino); |
1852 | 1860 | ||
1853 | if (ip0->i_ino > ip1->i_ino) { | 1861 | if (ip0->i_ino > ip1->i_ino) { |
@@ -3152,6 +3160,13 @@ error1: /* Just cancel transaction */ | |||
3152 | /* | 3160 | /* |
3153 | * Zero file bytes between startoff and endoff inclusive. | 3161 | * Zero file bytes between startoff and endoff inclusive. |
3154 | * The iolock is held exclusive and no blocks are buffered. | 3162 | * The iolock is held exclusive and no blocks are buffered. |
3163 | * | ||
3164 | * This function is used by xfs_free_file_space() to zero | ||
3165 | * partial blocks when the range to free is not block aligned. | ||
3166 | * When unreserving space with boundaries that are not block | ||
3167 | * aligned we round up the start and round down the end | ||
3168 | * boundaries and then use this function to zero the parts of | ||
3169 | * the blocks that got dropped during the rounding. | ||
3155 | */ | 3170 | */ |
3156 | STATIC int | 3171 | STATIC int |
3157 | xfs_zero_remaining_bytes( | 3172 | xfs_zero_remaining_bytes( |
@@ -3168,6 +3183,17 @@ xfs_zero_remaining_bytes( | |||
3168 | int nimap; | 3183 | int nimap; |
3169 | int error = 0; | 3184 | int error = 0; |
3170 | 3185 | ||
3186 | /* | ||
3187 | * Avoid doing I/O beyond eof - it's not necessary | ||
3188 | * since nothing can read beyond eof. The space will | ||
3189 | * be zeroed when the file is extended anyway. | ||
3190 | */ | ||
3191 | if (startoff >= ip->i_size) | ||
3192 | return 0; | ||
3193 | |||
3194 | if (endoff > ip->i_size) | ||
3195 | endoff = ip->i_size; | ||
3196 | |||
3171 | bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, | 3197 | bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, |
3172 | XFS_IS_REALTIME_INODE(ip) ? | 3198 | XFS_IS_REALTIME_INODE(ip) ? |
3173 | mp->m_rtdev_targp : mp->m_ddev_targp); | 3199 | mp->m_rtdev_targp : mp->m_ddev_targp); |