diff options
Diffstat (limited to 'fs')
269 files changed, 14497 insertions, 5547 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 635f3e286ad8..219ec06a8c7e 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -236,6 +236,7 @@ config JBD_DEBUG | |||
236 | 236 | ||
237 | config JBD2 | 237 | config JBD2 |
238 | tristate | 238 | tristate |
239 | select CRC32 | ||
239 | help | 240 | help |
240 | This is a generic journaling layer for block devices that support | 241 | This is a generic journaling layer for block devices that support |
241 | both 32-bit and 64-bit block numbers. It is currently used by | 242 | both 32-bit and 64-bit block numbers. It is currently used by |
@@ -440,14 +441,8 @@ config OCFS2_FS | |||
440 | Tools web page: http://oss.oracle.com/projects/ocfs2-tools | 441 | Tools web page: http://oss.oracle.com/projects/ocfs2-tools |
441 | OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/ | 442 | OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/ |
442 | 443 | ||
443 | Note: Features which OCFS2 does not support yet: | 444 | For more information on OCFS2, see the file |
444 | - extended attributes | 445 | <file:Documentation/filesystems/ocfs2.txt>. |
445 | - quotas | ||
446 | - cluster aware flock | ||
447 | - Directory change notification (F_NOTIFY) | ||
448 | - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) | ||
449 | - POSIX ACLs | ||
450 | - readpages / writepages (not user visible) | ||
451 | 446 | ||
452 | config OCFS2_DEBUG_MASKLOG | 447 | config OCFS2_DEBUG_MASKLOG |
453 | bool "OCFS2 logging support" | 448 | bool "OCFS2 logging support" |
@@ -1028,8 +1023,8 @@ config HUGETLB_PAGE | |||
1028 | def_bool HUGETLBFS | 1023 | def_bool HUGETLBFS |
1029 | 1024 | ||
1030 | config CONFIGFS_FS | 1025 | config CONFIGFS_FS |
1031 | tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)" | 1026 | tristate "Userspace-driven configuration filesystem" |
1032 | depends on SYSFS && EXPERIMENTAL | 1027 | depends on SYSFS |
1033 | help | 1028 | help |
1034 | configfs is a ram-based filesystem that provides the converse | 1029 | configfs is a ram-based filesystem that provides the converse |
1035 | of sysfs's functionality. Where sysfs is a filesystem-based | 1030 | of sysfs's functionality. Where sysfs is a filesystem-based |
@@ -1112,8 +1107,8 @@ config HFS_FS | |||
1112 | help | 1107 | help |
1113 | If you say Y here, you will be able to mount Macintosh-formatted | 1108 | If you say Y here, you will be able to mount Macintosh-formatted |
1114 | floppy disks and hard drive partitions with full read-write access. | 1109 | floppy disks and hard drive partitions with full read-write access. |
1115 | Please read <file:fs/hfs/HFS.txt> to learn about the available mount | 1110 | Please read <file:Documentation/filesystems/hfs.txt> to learn about |
1116 | options. | 1111 | the available mount options. |
1117 | 1112 | ||
1118 | To compile this file system support as a module, choose M here: the | 1113 | To compile this file system support as a module, choose M here: the |
1119 | module will be called hfs. | 1114 | module will be called hfs. |
@@ -1305,7 +1300,7 @@ config JFFS2_COMPRESSION_OPTIONS | |||
1305 | help | 1300 | help |
1306 | Enabling this option allows you to explicitly choose which | 1301 | Enabling this option allows you to explicitly choose which |
1307 | compression modules, if any, are enabled in JFFS2. Removing | 1302 | compression modules, if any, are enabled in JFFS2. Removing |
1308 | compressors and mean you cannot read existing file systems, | 1303 | compressors can mean you cannot read existing file systems, |
1309 | and enabling experimental compressors can mean that you | 1304 | and enabling experimental compressors can mean that you |
1310 | write a file system which cannot be read by a standard kernel. | 1305 | write a file system which cannot be read by a standard kernel. |
1311 | 1306 | ||
@@ -1905,13 +1900,15 @@ config CIFS | |||
1905 | file servers such as Windows 2000 (including Windows 2003, NT 4 | 1900 | file servers such as Windows 2000 (including Windows 2003, NT 4 |
1906 | and Windows XP) as well by Samba (which provides excellent CIFS | 1901 | and Windows XP) as well by Samba (which provides excellent CIFS |
1907 | server support for Linux and many other operating systems). Limited | 1902 | server support for Linux and many other operating systems). Limited |
1908 | support for OS/2 and Windows ME and similar servers is provided as well. | 1903 | support for OS/2 and Windows ME and similar servers is provided as |
1909 | 1904 | well. | |
1910 | The intent of the cifs module is to provide an advanced | 1905 | |
1911 | network file system client for mounting to CIFS compliant servers, | 1906 | The cifs module provides an advanced network file system |
1912 | including support for dfs (hierarchical name space), secure per-user | 1907 | client for mounting to CIFS compliant servers. It includes |
1913 | session establishment, safe distributed caching (oplock), optional | 1908 | support for DFS (hierarchical name space), secure per-user |
1914 | packet signing, Unicode and other internationalization improvements. | 1909 | session establishment via Kerberos or NTLM or NTLMv2, |
1910 | safe distributed caching (oplock), optional packet | ||
1911 | signing, Unicode and other internationalization improvements. | ||
1915 | If you need to mount to Samba or Windows from this machine, say Y. | 1912 | If you need to mount to Samba or Windows from this machine, say Y. |
1916 | 1913 | ||
1917 | config CIFS_STATS | 1914 | config CIFS_STATS |
@@ -1943,7 +1940,8 @@ config CIFS_WEAK_PW_HASH | |||
1943 | (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos) | 1940 | (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos) |
1944 | security mechanisms. These hash the password more securely | 1941 | security mechanisms. These hash the password more securely |
1945 | than the mechanisms used in the older LANMAN version of the | 1942 | than the mechanisms used in the older LANMAN version of the |
1946 | SMB protocol needed to establish sessions with old SMB servers. | 1943 | SMB protocol but LANMAN based authentication is needed to |
1944 | establish sessions with some old SMB servers. | ||
1947 | 1945 | ||
1948 | Enabling this option allows the cifs module to mount to older | 1946 | Enabling this option allows the cifs module to mount to older |
1949 | LANMAN based servers such as OS/2 and Windows 95, but such | 1947 | LANMAN based servers such as OS/2 and Windows 95, but such |
@@ -1951,8 +1949,8 @@ config CIFS_WEAK_PW_HASH | |||
1951 | security mechanisms if you are on a public network. Unless you | 1949 | security mechanisms if you are on a public network. Unless you |
1952 | have a need to access old SMB servers (and are on a private | 1950 | have a need to access old SMB servers (and are on a private |
1953 | network) you probably want to say N. Even if this support | 1951 | network) you probably want to say N. Even if this support |
1954 | is enabled in the kernel build, they will not be used | 1952 | is enabled in the kernel build, LANMAN authentication will not be |
1955 | automatically. At runtime LANMAN mounts are disabled but | 1953 | used automatically. At runtime LANMAN mounts are disabled but |
1956 | can be set to required (or optional) either in | 1954 | can be set to required (or optional) either in |
1957 | /proc/fs/cifs (see fs/cifs/README for more detail) or via an | 1955 | /proc/fs/cifs (see fs/cifs/README for more detail) or via an |
1958 | option on the mount command. This support is disabled by | 1956 | option on the mount command. This support is disabled by |
@@ -2018,12 +2016,22 @@ config CIFS_UPCALL | |||
2018 | depends on CIFS_EXPERIMENTAL | 2016 | depends on CIFS_EXPERIMENTAL |
2019 | depends on KEYS | 2017 | depends on KEYS |
2020 | help | 2018 | help |
2021 | Enables an upcall mechanism for CIFS which will be used to contact | 2019 | Enables an upcall mechanism for CIFS which accesses |
2022 | userspace helper utilities to provide SPNEGO packaged Kerberos | 2020 | userspace helper utilities to provide SPNEGO packaged (RFC 4178) |
2023 | tickets which are needed to mount to certain secure servers | 2021 | Kerberos tickets which are needed to mount to certain secure servers |
2024 | (for which more secure Kerberos authentication is required). If | 2022 | (for which more secure Kerberos authentication is required). If |
2025 | unsure, say N. | 2023 | unsure, say N. |
2026 | 2024 | ||
2025 | config CIFS_DFS_UPCALL | ||
2026 | bool "DFS feature support (EXPERIMENTAL)" | ||
2027 | depends on CIFS_EXPERIMENTAL | ||
2028 | depends on KEYS | ||
2029 | help | ||
2030 | Enables an upcall mechanism for CIFS which contacts userspace | ||
2031 | helper utilities to provide server name resolution (host names to | ||
2032 | IP addresses) which is needed for implicit mounts of DFS junction | ||
2033 | points. If unsure, say N. | ||
2034 | |||
2027 | config NCP_FS | 2035 | config NCP_FS |
2028 | tristate "NCP file system support (to mount NetWare volumes)" | 2036 | tristate "NCP file system support (to mount NetWare volumes)" |
2029 | depends on IPX!=n || INET | 2037 | depends on IPX!=n || INET |
@@ -2130,4 +2138,3 @@ source "fs/nls/Kconfig" | |||
2130 | source "fs/dlm/Kconfig" | 2138 | source "fs/dlm/Kconfig" |
2131 | 2139 | ||
2132 | endmenu | 2140 | endmenu |
2133 | |||
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index d4fc6095466d..7c3d5f923da1 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt | |||
@@ -23,6 +23,10 @@ config BINFMT_ELF | |||
23 | ld.so (check the file <file:Documentation/Changes> for location and | 23 | ld.so (check the file <file:Documentation/Changes> for location and |
24 | latest version). | 24 | latest version). |
25 | 25 | ||
26 | config COMPAT_BINFMT_ELF | ||
27 | bool | ||
28 | depends on COMPAT && MMU | ||
29 | |||
26 | config BINFMT_ELF_FDPIC | 30 | config BINFMT_ELF_FDPIC |
27 | bool "Kernel support for FDPIC ELF binaries" | 31 | bool "Kernel support for FDPIC ELF binaries" |
28 | default y | 32 | default y |
diff --git a/fs/Makefile b/fs/Makefile index 500cf15cdb4b..1e7a11bd4da1 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -39,6 +39,7 @@ obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o | |||
39 | obj-y += binfmt_script.o | 39 | obj-y += binfmt_script.o |
40 | 40 | ||
41 | obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o | 41 | obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o |
42 | obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o | ||
42 | obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o | 43 | obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o |
43 | obj-$(CONFIG_BINFMT_SOM) += binfmt_som.o | 44 | obj-$(CONFIG_BINFMT_SOM) += binfmt_som.o |
44 | obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o | 45 | obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o |
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 33fe39ad4e03..0cc3597c1197 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
@@ -546,11 +546,11 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, | |||
546 | dentry->d_op = &afs_fs_dentry_operations; | 546 | dentry->d_op = &afs_fs_dentry_operations; |
547 | 547 | ||
548 | d_add(dentry, inode); | 548 | d_add(dentry, inode); |
549 | _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%lu }", | 549 | _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }", |
550 | fid.vnode, | 550 | fid.vnode, |
551 | fid.unique, | 551 | fid.unique, |
552 | dentry->d_inode->i_ino, | 552 | dentry->d_inode->i_ino, |
553 | dentry->d_inode->i_version); | 553 | (unsigned long long)dentry->d_inode->i_version); |
554 | 554 | ||
555 | return NULL; | 555 | return NULL; |
556 | } | 556 | } |
@@ -630,9 +630,10 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
630 | * been deleted and replaced, and the original vnode ID has | 630 | * been deleted and replaced, and the original vnode ID has |
631 | * been reused */ | 631 | * been reused */ |
632 | if (fid.unique != vnode->fid.unique) { | 632 | if (fid.unique != vnode->fid.unique) { |
633 | _debug("%s: file deleted (uq %u -> %u I:%lu)", | 633 | _debug("%s: file deleted (uq %u -> %u I:%llu)", |
634 | dentry->d_name.name, fid.unique, | 634 | dentry->d_name.name, fid.unique, |
635 | vnode->fid.unique, dentry->d_inode->i_version); | 635 | vnode->fid.unique, |
636 | (unsigned long long)dentry->d_inode->i_version); | ||
636 | spin_lock(&vnode->lock); | 637 | spin_lock(&vnode->lock); |
637 | set_bit(AFS_VNODE_DELETED, &vnode->flags); | 638 | set_bit(AFS_VNODE_DELETED, &vnode->flags); |
638 | spin_unlock(&vnode->lock); | 639 | spin_unlock(&vnode->lock); |
diff --git a/fs/afs/inode.c b/fs/afs/inode.c index d196840127c6..84750c8e9f95 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c | |||
@@ -301,7 +301,8 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
301 | 301 | ||
302 | inode = dentry->d_inode; | 302 | inode = dentry->d_inode; |
303 | 303 | ||
304 | _enter("{ ino=%lu v=%lu }", inode->i_ino, inode->i_version); | 304 | _enter("{ ino=%lu v=%llu }", inode->i_ino, |
305 | (unsigned long long)inode->i_version); | ||
305 | 306 | ||
306 | generic_fillattr(inode, stat); | 307 | generic_fillattr(inode, stat); |
307 | return 0; | 308 | return 0; |
@@ -397,7 +397,7 @@ void fastcall __put_ioctx(struct kioctx *ctx) | |||
397 | * This prevents races between the aio code path referencing the | 397 | * This prevents races between the aio code path referencing the |
398 | * req (after submitting it) and aio_complete() freeing the req. | 398 | * req (after submitting it) and aio_complete() freeing the req. |
399 | */ | 399 | */ |
400 | static struct kiocb *FASTCALL(__aio_get_req(struct kioctx *ctx)); | 400 | static struct kiocb *__aio_get_req(struct kioctx *ctx); |
401 | static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx) | 401 | static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx) |
402 | { | 402 | { |
403 | struct kiocb *req = NULL; | 403 | struct kiocb *req = NULL; |
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index e176d195e7e5..7596e1e94cde 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c | |||
@@ -319,7 +319,6 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) | |||
319 | current->mm->free_area_cache = current->mm->mmap_base; | 319 | current->mm->free_area_cache = current->mm->mmap_base; |
320 | current->mm->cached_hole_size = 0; | 320 | current->mm->cached_hole_size = 0; |
321 | 321 | ||
322 | current->mm->mmap = NULL; | ||
323 | compute_creds(bprm); | 322 | compute_creds(bprm); |
324 | current->flags &= ~PF_FORKNOEXEC; | 323 | current->flags &= ~PF_FORKNOEXEC; |
325 | #ifdef __sparc__ | 324 | #ifdef __sparc__ |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index ba8de7ca260b..18ed6dd906c1 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -45,7 +45,8 @@ | |||
45 | 45 | ||
46 | static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); | 46 | static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); |
47 | static int load_elf_library(struct file *); | 47 | static int load_elf_library(struct file *); |
48 | static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int); | 48 | static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, |
49 | int, int, unsigned long); | ||
49 | 50 | ||
50 | /* | 51 | /* |
51 | * If we don't support core dumping, then supply a NULL so we | 52 | * If we don't support core dumping, then supply a NULL so we |
@@ -298,33 +299,70 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, | |||
298 | #ifndef elf_map | 299 | #ifndef elf_map |
299 | 300 | ||
300 | static unsigned long elf_map(struct file *filep, unsigned long addr, | 301 | static unsigned long elf_map(struct file *filep, unsigned long addr, |
301 | struct elf_phdr *eppnt, int prot, int type) | 302 | struct elf_phdr *eppnt, int prot, int type, |
303 | unsigned long total_size) | ||
302 | { | 304 | { |
303 | unsigned long map_addr; | 305 | unsigned long map_addr; |
304 | unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr); | 306 | unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr); |
307 | unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr); | ||
308 | addr = ELF_PAGESTART(addr); | ||
309 | size = ELF_PAGEALIGN(size); | ||
305 | 310 | ||
306 | down_write(¤t->mm->mmap_sem); | ||
307 | /* mmap() will return -EINVAL if given a zero size, but a | 311 | /* mmap() will return -EINVAL if given a zero size, but a |
308 | * segment with zero filesize is perfectly valid */ | 312 | * segment with zero filesize is perfectly valid */ |
309 | if (eppnt->p_filesz + pageoffset) | 313 | if (!size) |
310 | map_addr = do_mmap(filep, ELF_PAGESTART(addr), | 314 | return addr; |
311 | eppnt->p_filesz + pageoffset, prot, type, | 315 | |
312 | eppnt->p_offset - pageoffset); | 316 | down_write(¤t->mm->mmap_sem); |
313 | else | 317 | /* |
314 | map_addr = ELF_PAGESTART(addr); | 318 | * total_size is the size of the ELF (interpreter) image. |
319 | * The _first_ mmap needs to know the full size, otherwise | ||
320 | * randomization might put this image into an overlapping | ||
321 | * position with the ELF binary image. (since size < total_size) | ||
322 | * So we first map the 'big' image - and unmap the remainder at | ||
323 | * the end. (which unmap is needed for ELF images with holes.) | ||
324 | */ | ||
325 | if (total_size) { | ||
326 | total_size = ELF_PAGEALIGN(total_size); | ||
327 | map_addr = do_mmap(filep, addr, total_size, prot, type, off); | ||
328 | if (!BAD_ADDR(map_addr)) | ||
329 | do_munmap(current->mm, map_addr+size, total_size-size); | ||
330 | } else | ||
331 | map_addr = do_mmap(filep, addr, size, prot, type, off); | ||
332 | |||
315 | up_write(¤t->mm->mmap_sem); | 333 | up_write(¤t->mm->mmap_sem); |
316 | return(map_addr); | 334 | return(map_addr); |
317 | } | 335 | } |
318 | 336 | ||
319 | #endif /* !elf_map */ | 337 | #endif /* !elf_map */ |
320 | 338 | ||
339 | static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr) | ||
340 | { | ||
341 | int i, first_idx = -1, last_idx = -1; | ||
342 | |||
343 | for (i = 0; i < nr; i++) { | ||
344 | if (cmds[i].p_type == PT_LOAD) { | ||
345 | last_idx = i; | ||
346 | if (first_idx == -1) | ||
347 | first_idx = i; | ||
348 | } | ||
349 | } | ||
350 | if (first_idx == -1) | ||
351 | return 0; | ||
352 | |||
353 | return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz - | ||
354 | ELF_PAGESTART(cmds[first_idx].p_vaddr); | ||
355 | } | ||
356 | |||
357 | |||
321 | /* This is much more generalized than the library routine read function, | 358 | /* This is much more generalized than the library routine read function, |
322 | so we keep this separate. Technically the library read function | 359 | so we keep this separate. Technically the library read function |
323 | is only provided so that we can read a.out libraries that have | 360 | is only provided so that we can read a.out libraries that have |
324 | an ELF header */ | 361 | an ELF header */ |
325 | 362 | ||
326 | static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | 363 | static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, |
327 | struct file *interpreter, unsigned long *interp_load_addr) | 364 | struct file *interpreter, unsigned long *interp_map_addr, |
365 | unsigned long no_base) | ||
328 | { | 366 | { |
329 | struct elf_phdr *elf_phdata; | 367 | struct elf_phdr *elf_phdata; |
330 | struct elf_phdr *eppnt; | 368 | struct elf_phdr *eppnt; |
@@ -332,6 +370,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
332 | int load_addr_set = 0; | 370 | int load_addr_set = 0; |
333 | unsigned long last_bss = 0, elf_bss = 0; | 371 | unsigned long last_bss = 0, elf_bss = 0; |
334 | unsigned long error = ~0UL; | 372 | unsigned long error = ~0UL; |
373 | unsigned long total_size; | ||
335 | int retval, i, size; | 374 | int retval, i, size; |
336 | 375 | ||
337 | /* First of all, some simple consistency checks */ | 376 | /* First of all, some simple consistency checks */ |
@@ -370,6 +409,12 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
370 | goto out_close; | 409 | goto out_close; |
371 | } | 410 | } |
372 | 411 | ||
412 | total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum); | ||
413 | if (!total_size) { | ||
414 | error = -EINVAL; | ||
415 | goto out_close; | ||
416 | } | ||
417 | |||
373 | eppnt = elf_phdata; | 418 | eppnt = elf_phdata; |
374 | for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { | 419 | for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { |
375 | if (eppnt->p_type == PT_LOAD) { | 420 | if (eppnt->p_type == PT_LOAD) { |
@@ -387,9 +432,14 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
387 | vaddr = eppnt->p_vaddr; | 432 | vaddr = eppnt->p_vaddr; |
388 | if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) | 433 | if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) |
389 | elf_type |= MAP_FIXED; | 434 | elf_type |= MAP_FIXED; |
435 | else if (no_base && interp_elf_ex->e_type == ET_DYN) | ||
436 | load_addr = -vaddr; | ||
390 | 437 | ||
391 | map_addr = elf_map(interpreter, load_addr + vaddr, | 438 | map_addr = elf_map(interpreter, load_addr + vaddr, |
392 | eppnt, elf_prot, elf_type); | 439 | eppnt, elf_prot, elf_type, total_size); |
440 | total_size = 0; | ||
441 | if (!*interp_map_addr) | ||
442 | *interp_map_addr = map_addr; | ||
393 | error = map_addr; | 443 | error = map_addr; |
394 | if (BAD_ADDR(map_addr)) | 444 | if (BAD_ADDR(map_addr)) |
395 | goto out_close; | 445 | goto out_close; |
@@ -455,8 +505,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
455 | goto out_close; | 505 | goto out_close; |
456 | } | 506 | } |
457 | 507 | ||
458 | *interp_load_addr = load_addr; | 508 | error = load_addr; |
459 | error = ((unsigned long)interp_elf_ex->e_entry) + load_addr; | ||
460 | 509 | ||
461 | out_close: | 510 | out_close: |
462 | kfree(elf_phdata); | 511 | kfree(elf_phdata); |
@@ -546,14 +595,14 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
546 | int load_addr_set = 0; | 595 | int load_addr_set = 0; |
547 | char * elf_interpreter = NULL; | 596 | char * elf_interpreter = NULL; |
548 | unsigned int interpreter_type = INTERPRETER_NONE; | 597 | unsigned int interpreter_type = INTERPRETER_NONE; |
549 | unsigned char ibcs2_interpreter = 0; | ||
550 | unsigned long error; | 598 | unsigned long error; |
551 | struct elf_phdr *elf_ppnt, *elf_phdata; | 599 | struct elf_phdr *elf_ppnt, *elf_phdata; |
552 | unsigned long elf_bss, elf_brk; | 600 | unsigned long elf_bss, elf_brk; |
553 | int elf_exec_fileno; | 601 | int elf_exec_fileno; |
554 | int retval, i; | 602 | int retval, i; |
555 | unsigned int size; | 603 | unsigned int size; |
556 | unsigned long elf_entry, interp_load_addr = 0; | 604 | unsigned long elf_entry; |
605 | unsigned long interp_load_addr = 0; | ||
557 | unsigned long start_code, end_code, start_data, end_data; | 606 | unsigned long start_code, end_code, start_data, end_data; |
558 | unsigned long reloc_func_desc = 0; | 607 | unsigned long reloc_func_desc = 0; |
559 | char passed_fileno[6]; | 608 | char passed_fileno[6]; |
@@ -663,14 +712,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
663 | if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') | 712 | if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') |
664 | goto out_free_interp; | 713 | goto out_free_interp; |
665 | 714 | ||
666 | /* If the program interpreter is one of these two, | ||
667 | * then assume an iBCS2 image. Otherwise assume | ||
668 | * a native linux image. | ||
669 | */ | ||
670 | if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 || | ||
671 | strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0) | ||
672 | ibcs2_interpreter = 1; | ||
673 | |||
674 | /* | 715 | /* |
675 | * The early SET_PERSONALITY here is so that the lookup | 716 | * The early SET_PERSONALITY here is so that the lookup |
676 | * for the interpreter happens in the namespace of the | 717 | * for the interpreter happens in the namespace of the |
@@ -690,7 +731,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
690 | * switch really is going to happen - do this in | 731 | * switch really is going to happen - do this in |
691 | * flush_thread(). - akpm | 732 | * flush_thread(). - akpm |
692 | */ | 733 | */ |
693 | SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter); | 734 | SET_PERSONALITY(loc->elf_ex, 0); |
694 | 735 | ||
695 | interpreter = open_exec(elf_interpreter); | 736 | interpreter = open_exec(elf_interpreter); |
696 | retval = PTR_ERR(interpreter); | 737 | retval = PTR_ERR(interpreter); |
@@ -769,7 +810,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
769 | goto out_free_dentry; | 810 | goto out_free_dentry; |
770 | } else { | 811 | } else { |
771 | /* Executables without an interpreter also need a personality */ | 812 | /* Executables without an interpreter also need a personality */ |
772 | SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter); | 813 | SET_PERSONALITY(loc->elf_ex, 0); |
773 | } | 814 | } |
774 | 815 | ||
775 | /* OK, we are done with that, now set up the arg stuff, | 816 | /* OK, we are done with that, now set up the arg stuff, |
@@ -803,7 +844,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
803 | 844 | ||
804 | /* Do this immediately, since STACK_TOP as used in setup_arg_pages | 845 | /* Do this immediately, since STACK_TOP as used in setup_arg_pages |
805 | may depend on the personality. */ | 846 | may depend on the personality. */ |
806 | SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter); | 847 | SET_PERSONALITY(loc->elf_ex, 0); |
807 | if (elf_read_implies_exec(loc->elf_ex, executable_stack)) | 848 | if (elf_read_implies_exec(loc->elf_ex, executable_stack)) |
808 | current->personality |= READ_IMPLIES_EXEC; | 849 | current->personality |= READ_IMPLIES_EXEC; |
809 | 850 | ||
@@ -825,9 +866,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
825 | current->mm->start_stack = bprm->p; | 866 | current->mm->start_stack = bprm->p; |
826 | 867 | ||
827 | /* Now we do a little grungy work by mmaping the ELF image into | 868 | /* Now we do a little grungy work by mmaping the ELF image into |
828 | the correct location in memory. At this point, we assume that | 869 | the correct location in memory. */ |
829 | the image should be loaded at fixed address, not at a variable | ||
830 | address. */ | ||
831 | for(i = 0, elf_ppnt = elf_phdata; | 870 | for(i = 0, elf_ppnt = elf_phdata; |
832 | i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { | 871 | i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { |
833 | int elf_prot = 0, elf_flags; | 872 | int elf_prot = 0, elf_flags; |
@@ -881,11 +920,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
881 | * default mmap base, as well as whatever program they | 920 | * default mmap base, as well as whatever program they |
882 | * might try to exec. This is because the brk will | 921 | * might try to exec. This is because the brk will |
883 | * follow the loader, and is not movable. */ | 922 | * follow the loader, and is not movable. */ |
923 | #ifdef CONFIG_X86 | ||
924 | load_bias = 0; | ||
925 | #else | ||
884 | load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); | 926 | load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); |
927 | #endif | ||
885 | } | 928 | } |
886 | 929 | ||
887 | error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, | 930 | error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, |
888 | elf_prot, elf_flags); | 931 | elf_prot, elf_flags, 0); |
889 | if (BAD_ADDR(error)) { | 932 | if (BAD_ADDR(error)) { |
890 | send_sig(SIGKILL, current, 0); | 933 | send_sig(SIGKILL, current, 0); |
891 | retval = IS_ERR((void *)error) ? | 934 | retval = IS_ERR((void *)error) ? |
@@ -961,13 +1004,25 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
961 | } | 1004 | } |
962 | 1005 | ||
963 | if (elf_interpreter) { | 1006 | if (elf_interpreter) { |
964 | if (interpreter_type == INTERPRETER_AOUT) | 1007 | if (interpreter_type == INTERPRETER_AOUT) { |
965 | elf_entry = load_aout_interp(&loc->interp_ex, | 1008 | elf_entry = load_aout_interp(&loc->interp_ex, |
966 | interpreter); | 1009 | interpreter); |
967 | else | 1010 | } else { |
1011 | unsigned long uninitialized_var(interp_map_addr); | ||
1012 | |||
968 | elf_entry = load_elf_interp(&loc->interp_elf_ex, | 1013 | elf_entry = load_elf_interp(&loc->interp_elf_ex, |
969 | interpreter, | 1014 | interpreter, |
970 | &interp_load_addr); | 1015 | &interp_map_addr, |
1016 | load_bias); | ||
1017 | if (!IS_ERR((void *)elf_entry)) { | ||
1018 | /* | ||
1019 | * load_elf_interp() returns relocation | ||
1020 | * adjustment | ||
1021 | */ | ||
1022 | interp_load_addr = elf_entry; | ||
1023 | elf_entry += loc->interp_elf_ex.e_entry; | ||
1024 | } | ||
1025 | } | ||
971 | if (BAD_ADDR(elf_entry)) { | 1026 | if (BAD_ADDR(elf_entry)) { |
972 | force_sig(SIGSEGV, current); | 1027 | force_sig(SIGSEGV, current); |
973 | retval = IS_ERR((void *)elf_entry) ? | 1028 | retval = IS_ERR((void *)elf_entry) ? |
@@ -1021,6 +1076,12 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
1021 | current->mm->end_data = end_data; | 1076 | current->mm->end_data = end_data; |
1022 | current->mm->start_stack = bprm->p; | 1077 | current->mm->start_stack = bprm->p; |
1023 | 1078 | ||
1079 | #ifdef arch_randomize_brk | ||
1080 | if (current->flags & PF_RANDOMIZE) | ||
1081 | current->mm->brk = current->mm->start_brk = | ||
1082 | arch_randomize_brk(current->mm); | ||
1083 | #endif | ||
1084 | |||
1024 | if (current->personality & MMAP_PAGE_ZERO) { | 1085 | if (current->personality & MMAP_PAGE_ZERO) { |
1025 | /* Why this, you ask??? Well SVr4 maps page 0 as read-only, | 1086 | /* Why this, you ask??? Well SVr4 maps page 0 as read-only, |
1026 | and some applications "depend" upon this behavior. | 1087 | and some applications "depend" upon this behavior. |
@@ -1325,7 +1386,8 @@ static int writenote(struct memelfnote *men, struct file *file, | |||
1325 | if (!dump_seek(file, (off))) \ | 1386 | if (!dump_seek(file, (off))) \ |
1326 | goto end_coredump; | 1387 | goto end_coredump; |
1327 | 1388 | ||
1328 | static void fill_elf_header(struct elfhdr *elf, int segs) | 1389 | static void fill_elf_header(struct elfhdr *elf, int segs, |
1390 | u16 machine, u32 flags, u8 osabi) | ||
1329 | { | 1391 | { |
1330 | memcpy(elf->e_ident, ELFMAG, SELFMAG); | 1392 | memcpy(elf->e_ident, ELFMAG, SELFMAG); |
1331 | elf->e_ident[EI_CLASS] = ELF_CLASS; | 1393 | elf->e_ident[EI_CLASS] = ELF_CLASS; |
@@ -1335,12 +1397,12 @@ static void fill_elf_header(struct elfhdr *elf, int segs) | |||
1335 | memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); | 1397 | memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); |
1336 | 1398 | ||
1337 | elf->e_type = ET_CORE; | 1399 | elf->e_type = ET_CORE; |
1338 | elf->e_machine = ELF_ARCH; | 1400 | elf->e_machine = machine; |
1339 | elf->e_version = EV_CURRENT; | 1401 | elf->e_version = EV_CURRENT; |
1340 | elf->e_entry = 0; | 1402 | elf->e_entry = 0; |
1341 | elf->e_phoff = sizeof(struct elfhdr); | 1403 | elf->e_phoff = sizeof(struct elfhdr); |
1342 | elf->e_shoff = 0; | 1404 | elf->e_shoff = 0; |
1343 | elf->e_flags = ELF_CORE_EFLAGS; | 1405 | elf->e_flags = flags; |
1344 | elf->e_ehsize = sizeof(struct elfhdr); | 1406 | elf->e_ehsize = sizeof(struct elfhdr); |
1345 | elf->e_phentsize = sizeof(struct elf_phdr); | 1407 | elf->e_phentsize = sizeof(struct elf_phdr); |
1346 | elf->e_phnum = segs; | 1408 | elf->e_phnum = segs; |
@@ -1384,7 +1446,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus, | |||
1384 | prstatus->pr_sigpend = p->pending.signal.sig[0]; | 1446 | prstatus->pr_sigpend = p->pending.signal.sig[0]; |
1385 | prstatus->pr_sighold = p->blocked.sig[0]; | 1447 | prstatus->pr_sighold = p->blocked.sig[0]; |
1386 | prstatus->pr_pid = task_pid_vnr(p); | 1448 | prstatus->pr_pid = task_pid_vnr(p); |
1387 | prstatus->pr_ppid = task_pid_vnr(p->parent); | 1449 | prstatus->pr_ppid = task_pid_vnr(p->real_parent); |
1388 | prstatus->pr_pgrp = task_pgrp_vnr(p); | 1450 | prstatus->pr_pgrp = task_pgrp_vnr(p); |
1389 | prstatus->pr_sid = task_session_vnr(p); | 1451 | prstatus->pr_sid = task_session_vnr(p); |
1390 | if (thread_group_leader(p)) { | 1452 | if (thread_group_leader(p)) { |
@@ -1430,7 +1492,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, | |||
1430 | psinfo->pr_psargs[len] = 0; | 1492 | psinfo->pr_psargs[len] = 0; |
1431 | 1493 | ||
1432 | psinfo->pr_pid = task_pid_vnr(p); | 1494 | psinfo->pr_pid = task_pid_vnr(p); |
1433 | psinfo->pr_ppid = task_pid_vnr(p->parent); | 1495 | psinfo->pr_ppid = task_pid_vnr(p->real_parent); |
1434 | psinfo->pr_pgrp = task_pgrp_vnr(p); | 1496 | psinfo->pr_pgrp = task_pgrp_vnr(p); |
1435 | psinfo->pr_sid = task_session_vnr(p); | 1497 | psinfo->pr_sid = task_session_vnr(p); |
1436 | 1498 | ||
@@ -1447,6 +1509,238 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, | |||
1447 | return 0; | 1509 | return 0; |
1448 | } | 1510 | } |
1449 | 1511 | ||
1512 | static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm) | ||
1513 | { | ||
1514 | elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv; | ||
1515 | int i = 0; | ||
1516 | do | ||
1517 | i += 2; | ||
1518 | while (auxv[i - 2] != AT_NULL); | ||
1519 | fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv); | ||
1520 | } | ||
1521 | |||
1522 | #ifdef CORE_DUMP_USE_REGSET | ||
1523 | #include <linux/regset.h> | ||
1524 | |||
1525 | struct elf_thread_core_info { | ||
1526 | struct elf_thread_core_info *next; | ||
1527 | struct task_struct *task; | ||
1528 | struct elf_prstatus prstatus; | ||
1529 | struct memelfnote notes[0]; | ||
1530 | }; | ||
1531 | |||
1532 | struct elf_note_info { | ||
1533 | struct elf_thread_core_info *thread; | ||
1534 | struct memelfnote psinfo; | ||
1535 | struct memelfnote auxv; | ||
1536 | size_t size; | ||
1537 | int thread_notes; | ||
1538 | }; | ||
1539 | |||
1540 | static int fill_thread_core_info(struct elf_thread_core_info *t, | ||
1541 | const struct user_regset_view *view, | ||
1542 | long signr, size_t *total) | ||
1543 | { | ||
1544 | unsigned int i; | ||
1545 | |||
1546 | /* | ||
1547 | * NT_PRSTATUS is the one special case, because the regset data | ||
1548 | * goes into the pr_reg field inside the note contents, rather | ||
1549 | * than being the whole note contents. We fill the reset in here. | ||
1550 | * We assume that regset 0 is NT_PRSTATUS. | ||
1551 | */ | ||
1552 | fill_prstatus(&t->prstatus, t->task, signr); | ||
1553 | (void) view->regsets[0].get(t->task, &view->regsets[0], | ||
1554 | 0, sizeof(t->prstatus.pr_reg), | ||
1555 | &t->prstatus.pr_reg, NULL); | ||
1556 | |||
1557 | fill_note(&t->notes[0], "CORE", NT_PRSTATUS, | ||
1558 | sizeof(t->prstatus), &t->prstatus); | ||
1559 | *total += notesize(&t->notes[0]); | ||
1560 | |||
1561 | /* | ||
1562 | * Each other regset might generate a note too. For each regset | ||
1563 | * that has no core_note_type or is inactive, we leave t->notes[i] | ||
1564 | * all zero and we'll know to skip writing it later. | ||
1565 | */ | ||
1566 | for (i = 1; i < view->n; ++i) { | ||
1567 | const struct user_regset *regset = &view->regsets[i]; | ||
1568 | if (regset->core_note_type && | ||
1569 | (!regset->active || regset->active(t->task, regset))) { | ||
1570 | int ret; | ||
1571 | size_t size = regset->n * regset->size; | ||
1572 | void *data = kmalloc(size, GFP_KERNEL); | ||
1573 | if (unlikely(!data)) | ||
1574 | return 0; | ||
1575 | ret = regset->get(t->task, regset, | ||
1576 | 0, size, data, NULL); | ||
1577 | if (unlikely(ret)) | ||
1578 | kfree(data); | ||
1579 | else { | ||
1580 | if (regset->core_note_type != NT_PRFPREG) | ||
1581 | fill_note(&t->notes[i], "LINUX", | ||
1582 | regset->core_note_type, | ||
1583 | size, data); | ||
1584 | else { | ||
1585 | t->prstatus.pr_fpvalid = 1; | ||
1586 | fill_note(&t->notes[i], "CORE", | ||
1587 | NT_PRFPREG, size, data); | ||
1588 | } | ||
1589 | *total += notesize(&t->notes[i]); | ||
1590 | } | ||
1591 | } | ||
1592 | } | ||
1593 | |||
1594 | return 1; | ||
1595 | } | ||
1596 | |||
1597 | static int fill_note_info(struct elfhdr *elf, int phdrs, | ||
1598 | struct elf_note_info *info, | ||
1599 | long signr, struct pt_regs *regs) | ||
1600 | { | ||
1601 | struct task_struct *dump_task = current; | ||
1602 | const struct user_regset_view *view = task_user_regset_view(dump_task); | ||
1603 | struct elf_thread_core_info *t; | ||
1604 | struct elf_prpsinfo *psinfo; | ||
1605 | struct task_struct *g, *p; | ||
1606 | unsigned int i; | ||
1607 | |||
1608 | info->size = 0; | ||
1609 | info->thread = NULL; | ||
1610 | |||
1611 | psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL); | ||
1612 | fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); | ||
1613 | |||
1614 | if (psinfo == NULL) | ||
1615 | return 0; | ||
1616 | |||
1617 | /* | ||
1618 | * Figure out how many notes we're going to need for each thread. | ||
1619 | */ | ||
1620 | info->thread_notes = 0; | ||
1621 | for (i = 0; i < view->n; ++i) | ||
1622 | if (view->regsets[i].core_note_type != 0) | ||
1623 | ++info->thread_notes; | ||
1624 | |||
1625 | /* | ||
1626 | * Sanity check. We rely on regset 0 being in NT_PRSTATUS, | ||
1627 | * since it is our one special case. | ||
1628 | */ | ||
1629 | if (unlikely(info->thread_notes == 0) || | ||
1630 | unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) { | ||
1631 | WARN_ON(1); | ||
1632 | return 0; | ||
1633 | } | ||
1634 | |||
1635 | /* | ||
1636 | * Initialize the ELF file header. | ||
1637 | */ | ||
1638 | fill_elf_header(elf, phdrs, | ||
1639 | view->e_machine, view->e_flags, view->ei_osabi); | ||
1640 | |||
1641 | /* | ||
1642 | * Allocate a structure for each thread. | ||
1643 | */ | ||
1644 | rcu_read_lock(); | ||
1645 | do_each_thread(g, p) | ||
1646 | if (p->mm == dump_task->mm) { | ||
1647 | t = kzalloc(offsetof(struct elf_thread_core_info, | ||
1648 | notes[info->thread_notes]), | ||
1649 | GFP_ATOMIC); | ||
1650 | if (unlikely(!t)) { | ||
1651 | rcu_read_unlock(); | ||
1652 | return 0; | ||
1653 | } | ||
1654 | t->task = p; | ||
1655 | if (p == dump_task || !info->thread) { | ||
1656 | t->next = info->thread; | ||
1657 | info->thread = t; | ||
1658 | } else { | ||
1659 | /* | ||
1660 | * Make sure to keep the original task at | ||
1661 | * the head of the list. | ||
1662 | */ | ||
1663 | t->next = info->thread->next; | ||
1664 | info->thread->next = t; | ||
1665 | } | ||
1666 | } | ||
1667 | while_each_thread(g, p); | ||
1668 | rcu_read_unlock(); | ||
1669 | |||
1670 | /* | ||
1671 | * Now fill in each thread's information. | ||
1672 | */ | ||
1673 | for (t = info->thread; t != NULL; t = t->next) | ||
1674 | if (!fill_thread_core_info(t, view, signr, &info->size)) | ||
1675 | return 0; | ||
1676 | |||
1677 | /* | ||
1678 | * Fill in the two process-wide notes. | ||
1679 | */ | ||
1680 | fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm); | ||
1681 | info->size += notesize(&info->psinfo); | ||
1682 | |||
1683 | fill_auxv_note(&info->auxv, current->mm); | ||
1684 | info->size += notesize(&info->auxv); | ||
1685 | |||
1686 | return 1; | ||
1687 | } | ||
1688 | |||
1689 | static size_t get_note_info_size(struct elf_note_info *info) | ||
1690 | { | ||
1691 | return info->size; | ||
1692 | } | ||
1693 | |||
1694 | /* | ||
1695 | * Write all the notes for each thread. When writing the first thread, the | ||
1696 | * process-wide notes are interleaved after the first thread-specific note. | ||
1697 | */ | ||
1698 | static int write_note_info(struct elf_note_info *info, | ||
1699 | struct file *file, loff_t *foffset) | ||
1700 | { | ||
1701 | bool first = 1; | ||
1702 | struct elf_thread_core_info *t = info->thread; | ||
1703 | |||
1704 | do { | ||
1705 | int i; | ||
1706 | |||
1707 | if (!writenote(&t->notes[0], file, foffset)) | ||
1708 | return 0; | ||
1709 | |||
1710 | if (first && !writenote(&info->psinfo, file, foffset)) | ||
1711 | return 0; | ||
1712 | if (first && !writenote(&info->auxv, file, foffset)) | ||
1713 | return 0; | ||
1714 | |||
1715 | for (i = 1; i < info->thread_notes; ++i) | ||
1716 | if (t->notes[i].data && | ||
1717 | !writenote(&t->notes[i], file, foffset)) | ||
1718 | return 0; | ||
1719 | |||
1720 | first = 0; | ||
1721 | t = t->next; | ||
1722 | } while (t); | ||
1723 | |||
1724 | return 1; | ||
1725 | } | ||
1726 | |||
1727 | static void free_note_info(struct elf_note_info *info) | ||
1728 | { | ||
1729 | struct elf_thread_core_info *threads = info->thread; | ||
1730 | while (threads) { | ||
1731 | unsigned int i; | ||
1732 | struct elf_thread_core_info *t = threads; | ||
1733 | threads = t->next; | ||
1734 | WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus); | ||
1735 | for (i = 1; i < info->thread_notes; ++i) | ||
1736 | kfree(t->notes[i].data); | ||
1737 | kfree(t); | ||
1738 | } | ||
1739 | kfree(info->psinfo.data); | ||
1740 | } | ||
1741 | |||
1742 | #else | ||
1743 | |||
1450 | /* Here is the structure in which status of each thread is captured. */ | 1744 | /* Here is the structure in which status of each thread is captured. */ |
1451 | struct elf_thread_status | 1745 | struct elf_thread_status |
1452 | { | 1746 | { |
@@ -1499,6 +1793,176 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t) | |||
1499 | return sz; | 1793 | return sz; |
1500 | } | 1794 | } |
1501 | 1795 | ||
1796 | struct elf_note_info { | ||
1797 | struct memelfnote *notes; | ||
1798 | struct elf_prstatus *prstatus; /* NT_PRSTATUS */ | ||
1799 | struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */ | ||
1800 | struct list_head thread_list; | ||
1801 | elf_fpregset_t *fpu; | ||
1802 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1803 | elf_fpxregset_t *xfpu; | ||
1804 | #endif | ||
1805 | int thread_status_size; | ||
1806 | int numnote; | ||
1807 | }; | ||
1808 | |||
1809 | static int fill_note_info(struct elfhdr *elf, int phdrs, | ||
1810 | struct elf_note_info *info, | ||
1811 | long signr, struct pt_regs *regs) | ||
1812 | { | ||
1813 | #define NUM_NOTES 6 | ||
1814 | struct list_head *t; | ||
1815 | struct task_struct *g, *p; | ||
1816 | |||
1817 | info->notes = NULL; | ||
1818 | info->prstatus = NULL; | ||
1819 | info->psinfo = NULL; | ||
1820 | info->fpu = NULL; | ||
1821 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1822 | info->xfpu = NULL; | ||
1823 | #endif | ||
1824 | INIT_LIST_HEAD(&info->thread_list); | ||
1825 | |||
1826 | info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), | ||
1827 | GFP_KERNEL); | ||
1828 | if (!info->notes) | ||
1829 | return 0; | ||
1830 | info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); | ||
1831 | if (!info->psinfo) | ||
1832 | return 0; | ||
1833 | info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); | ||
1834 | if (!info->prstatus) | ||
1835 | return 0; | ||
1836 | info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); | ||
1837 | if (!info->fpu) | ||
1838 | return 0; | ||
1839 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1840 | info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); | ||
1841 | if (!info->xfpu) | ||
1842 | return 0; | ||
1843 | #endif | ||
1844 | |||
1845 | info->thread_status_size = 0; | ||
1846 | if (signr) { | ||
1847 | struct elf_thread_status *tmp; | ||
1848 | rcu_read_lock(); | ||
1849 | do_each_thread(g, p) | ||
1850 | if (current->mm == p->mm && current != p) { | ||
1851 | tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); | ||
1852 | if (!tmp) { | ||
1853 | rcu_read_unlock(); | ||
1854 | return 0; | ||
1855 | } | ||
1856 | tmp->thread = p; | ||
1857 | list_add(&tmp->list, &info->thread_list); | ||
1858 | } | ||
1859 | while_each_thread(g, p); | ||
1860 | rcu_read_unlock(); | ||
1861 | list_for_each(t, &info->thread_list) { | ||
1862 | struct elf_thread_status *tmp; | ||
1863 | int sz; | ||
1864 | |||
1865 | tmp = list_entry(t, struct elf_thread_status, list); | ||
1866 | sz = elf_dump_thread_status(signr, tmp); | ||
1867 | info->thread_status_size += sz; | ||
1868 | } | ||
1869 | } | ||
1870 | /* now collect the dump for the current */ | ||
1871 | memset(info->prstatus, 0, sizeof(*info->prstatus)); | ||
1872 | fill_prstatus(info->prstatus, current, signr); | ||
1873 | elf_core_copy_regs(&info->prstatus->pr_reg, regs); | ||
1874 | |||
1875 | /* Set up header */ | ||
1876 | fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI); | ||
1877 | |||
1878 | /* | ||
1879 | * Set up the notes in similar form to SVR4 core dumps made | ||
1880 | * with info from their /proc. | ||
1881 | */ | ||
1882 | |||
1883 | fill_note(info->notes + 0, "CORE", NT_PRSTATUS, | ||
1884 | sizeof(*info->prstatus), info->prstatus); | ||
1885 | fill_psinfo(info->psinfo, current->group_leader, current->mm); | ||
1886 | fill_note(info->notes + 1, "CORE", NT_PRPSINFO, | ||
1887 | sizeof(*info->psinfo), info->psinfo); | ||
1888 | |||
1889 | info->numnote = 2; | ||
1890 | |||
1891 | fill_auxv_note(&info->notes[info->numnote++], current->mm); | ||
1892 | |||
1893 | /* Try to dump the FPU. */ | ||
1894 | info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, | ||
1895 | info->fpu); | ||
1896 | if (info->prstatus->pr_fpvalid) | ||
1897 | fill_note(info->notes + info->numnote++, | ||
1898 | "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu); | ||
1899 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1900 | if (elf_core_copy_task_xfpregs(current, info->xfpu)) | ||
1901 | fill_note(info->notes + info->numnote++, | ||
1902 | "LINUX", ELF_CORE_XFPREG_TYPE, | ||
1903 | sizeof(*info->xfpu), info->xfpu); | ||
1904 | #endif | ||
1905 | |||
1906 | return 1; | ||
1907 | |||
1908 | #undef NUM_NOTES | ||
1909 | } | ||
1910 | |||
1911 | static size_t get_note_info_size(struct elf_note_info *info) | ||
1912 | { | ||
1913 | int sz = 0; | ||
1914 | int i; | ||
1915 | |||
1916 | for (i = 0; i < info->numnote; i++) | ||
1917 | sz += notesize(info->notes + i); | ||
1918 | |||
1919 | sz += info->thread_status_size; | ||
1920 | |||
1921 | return sz; | ||
1922 | } | ||
1923 | |||
1924 | static int write_note_info(struct elf_note_info *info, | ||
1925 | struct file *file, loff_t *foffset) | ||
1926 | { | ||
1927 | int i; | ||
1928 | struct list_head *t; | ||
1929 | |||
1930 | for (i = 0; i < info->numnote; i++) | ||
1931 | if (!writenote(info->notes + i, file, foffset)) | ||
1932 | return 0; | ||
1933 | |||
1934 | /* write out the thread status notes section */ | ||
1935 | list_for_each(t, &info->thread_list) { | ||
1936 | struct elf_thread_status *tmp = | ||
1937 | list_entry(t, struct elf_thread_status, list); | ||
1938 | |||
1939 | for (i = 0; i < tmp->num_notes; i++) | ||
1940 | if (!writenote(&tmp->notes[i], file, foffset)) | ||
1941 | return 0; | ||
1942 | } | ||
1943 | |||
1944 | return 1; | ||
1945 | } | ||
1946 | |||
1947 | static void free_note_info(struct elf_note_info *info) | ||
1948 | { | ||
1949 | while (!list_empty(&info->thread_list)) { | ||
1950 | struct list_head *tmp = info->thread_list.next; | ||
1951 | list_del(tmp); | ||
1952 | kfree(list_entry(tmp, struct elf_thread_status, list)); | ||
1953 | } | ||
1954 | |||
1955 | kfree(info->prstatus); | ||
1956 | kfree(info->psinfo); | ||
1957 | kfree(info->notes); | ||
1958 | kfree(info->fpu); | ||
1959 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1960 | kfree(info->xfpu); | ||
1961 | #endif | ||
1962 | } | ||
1963 | |||
1964 | #endif | ||
1965 | |||
1502 | static struct vm_area_struct *first_vma(struct task_struct *tsk, | 1966 | static struct vm_area_struct *first_vma(struct task_struct *tsk, |
1503 | struct vm_area_struct *gate_vma) | 1967 | struct vm_area_struct *gate_vma) |
1504 | { | 1968 | { |
@@ -1534,29 +1998,15 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma, | |||
1534 | */ | 1998 | */ |
1535 | static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit) | 1999 | static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit) |
1536 | { | 2000 | { |
1537 | #define NUM_NOTES 6 | ||
1538 | int has_dumped = 0; | 2001 | int has_dumped = 0; |
1539 | mm_segment_t fs; | 2002 | mm_segment_t fs; |
1540 | int segs; | 2003 | int segs; |
1541 | size_t size = 0; | 2004 | size_t size = 0; |
1542 | int i; | ||
1543 | struct vm_area_struct *vma, *gate_vma; | 2005 | struct vm_area_struct *vma, *gate_vma; |
1544 | struct elfhdr *elf = NULL; | 2006 | struct elfhdr *elf = NULL; |
1545 | loff_t offset = 0, dataoff, foffset; | 2007 | loff_t offset = 0, dataoff, foffset; |
1546 | int numnote; | ||
1547 | struct memelfnote *notes = NULL; | ||
1548 | struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */ | ||
1549 | struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */ | ||
1550 | struct task_struct *g, *p; | ||
1551 | LIST_HEAD(thread_list); | ||
1552 | struct list_head *t; | ||
1553 | elf_fpregset_t *fpu = NULL; | ||
1554 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1555 | elf_fpxregset_t *xfpu = NULL; | ||
1556 | #endif | ||
1557 | int thread_status_size = 0; | ||
1558 | elf_addr_t *auxv; | ||
1559 | unsigned long mm_flags; | 2008 | unsigned long mm_flags; |
2009 | struct elf_note_info info; | ||
1560 | 2010 | ||
1561 | /* | 2011 | /* |
1562 | * We no longer stop all VM operations. | 2012 | * We no longer stop all VM operations. |
@@ -1574,52 +2024,6 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un | |||
1574 | elf = kmalloc(sizeof(*elf), GFP_KERNEL); | 2024 | elf = kmalloc(sizeof(*elf), GFP_KERNEL); |
1575 | if (!elf) | 2025 | if (!elf) |
1576 | goto cleanup; | 2026 | goto cleanup; |
1577 | prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL); | ||
1578 | if (!prstatus) | ||
1579 | goto cleanup; | ||
1580 | psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL); | ||
1581 | if (!psinfo) | ||
1582 | goto cleanup; | ||
1583 | notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL); | ||
1584 | if (!notes) | ||
1585 | goto cleanup; | ||
1586 | fpu = kmalloc(sizeof(*fpu), GFP_KERNEL); | ||
1587 | if (!fpu) | ||
1588 | goto cleanup; | ||
1589 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1590 | xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL); | ||
1591 | if (!xfpu) | ||
1592 | goto cleanup; | ||
1593 | #endif | ||
1594 | |||
1595 | if (signr) { | ||
1596 | struct elf_thread_status *tmp; | ||
1597 | rcu_read_lock(); | ||
1598 | do_each_thread(g,p) | ||
1599 | if (current->mm == p->mm && current != p) { | ||
1600 | tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); | ||
1601 | if (!tmp) { | ||
1602 | rcu_read_unlock(); | ||
1603 | goto cleanup; | ||
1604 | } | ||
1605 | tmp->thread = p; | ||
1606 | list_add(&tmp->list, &thread_list); | ||
1607 | } | ||
1608 | while_each_thread(g,p); | ||
1609 | rcu_read_unlock(); | ||
1610 | list_for_each(t, &thread_list) { | ||
1611 | struct elf_thread_status *tmp; | ||
1612 | int sz; | ||
1613 | |||
1614 | tmp = list_entry(t, struct elf_thread_status, list); | ||
1615 | sz = elf_dump_thread_status(signr, tmp); | ||
1616 | thread_status_size += sz; | ||
1617 | } | ||
1618 | } | ||
1619 | /* now collect the dump for the current */ | ||
1620 | memset(prstatus, 0, sizeof(*prstatus)); | ||
1621 | fill_prstatus(prstatus, current, signr); | ||
1622 | elf_core_copy_regs(&prstatus->pr_reg, regs); | ||
1623 | 2027 | ||
1624 | segs = current->mm->map_count; | 2028 | segs = current->mm->map_count; |
1625 | #ifdef ELF_CORE_EXTRA_PHDRS | 2029 | #ifdef ELF_CORE_EXTRA_PHDRS |
@@ -1630,42 +2034,16 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un | |||
1630 | if (gate_vma != NULL) | 2034 | if (gate_vma != NULL) |
1631 | segs++; | 2035 | segs++; |
1632 | 2036 | ||
1633 | /* Set up header */ | ||
1634 | fill_elf_header(elf, segs + 1); /* including notes section */ | ||
1635 | |||
1636 | has_dumped = 1; | ||
1637 | current->flags |= PF_DUMPCORE; | ||
1638 | |||
1639 | /* | 2037 | /* |
1640 | * Set up the notes in similar form to SVR4 core dumps made | 2038 | * Collect all the non-memory information about the process for the |
1641 | * with info from their /proc. | 2039 | * notes. This also sets up the file header. |
1642 | */ | 2040 | */ |
2041 | if (!fill_note_info(elf, segs + 1, /* including notes section */ | ||
2042 | &info, signr, regs)) | ||
2043 | goto cleanup; | ||
1643 | 2044 | ||
1644 | fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus); | 2045 | has_dumped = 1; |
1645 | fill_psinfo(psinfo, current->group_leader, current->mm); | 2046 | current->flags |= PF_DUMPCORE; |
1646 | fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); | ||
1647 | |||
1648 | numnote = 2; | ||
1649 | |||
1650 | auxv = (elf_addr_t *)current->mm->saved_auxv; | ||
1651 | |||
1652 | i = 0; | ||
1653 | do | ||
1654 | i += 2; | ||
1655 | while (auxv[i - 2] != AT_NULL); | ||
1656 | fill_note(¬es[numnote++], "CORE", NT_AUXV, | ||
1657 | i * sizeof(elf_addr_t), auxv); | ||
1658 | |||
1659 | /* Try to dump the FPU. */ | ||
1660 | if ((prstatus->pr_fpvalid = | ||
1661 | elf_core_copy_task_fpregs(current, regs, fpu))) | ||
1662 | fill_note(notes + numnote++, | ||
1663 | "CORE", NT_PRFPREG, sizeof(*fpu), fpu); | ||
1664 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1665 | if (elf_core_copy_task_xfpregs(current, xfpu)) | ||
1666 | fill_note(notes + numnote++, | ||
1667 | "LINUX", ELF_CORE_XFPREG_TYPE, sizeof(*xfpu), xfpu); | ||
1668 | #endif | ||
1669 | 2047 | ||
1670 | fs = get_fs(); | 2048 | fs = get_fs(); |
1671 | set_fs(KERNEL_DS); | 2049 | set_fs(KERNEL_DS); |
@@ -1678,12 +2056,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un | |||
1678 | /* Write notes phdr entry */ | 2056 | /* Write notes phdr entry */ |
1679 | { | 2057 | { |
1680 | struct elf_phdr phdr; | 2058 | struct elf_phdr phdr; |
1681 | int sz = 0; | 2059 | size_t sz = get_note_info_size(&info); |
1682 | |||
1683 | for (i = 0; i < numnote; i++) | ||
1684 | sz += notesize(notes + i); | ||
1685 | |||
1686 | sz += thread_status_size; | ||
1687 | 2060 | ||
1688 | sz += elf_coredump_extra_notes_size(); | 2061 | sz += elf_coredump_extra_notes_size(); |
1689 | 2062 | ||
@@ -1728,23 +2101,12 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un | |||
1728 | #endif | 2101 | #endif |
1729 | 2102 | ||
1730 | /* write out the notes section */ | 2103 | /* write out the notes section */ |
1731 | for (i = 0; i < numnote; i++) | 2104 | if (!write_note_info(&info, file, &foffset)) |
1732 | if (!writenote(notes + i, file, &foffset)) | 2105 | goto end_coredump; |
1733 | goto end_coredump; | ||
1734 | 2106 | ||
1735 | if (elf_coredump_extra_notes_write(file, &foffset)) | 2107 | if (elf_coredump_extra_notes_write(file, &foffset)) |
1736 | goto end_coredump; | 2108 | goto end_coredump; |
1737 | 2109 | ||
1738 | /* write out the thread status notes section */ | ||
1739 | list_for_each(t, &thread_list) { | ||
1740 | struct elf_thread_status *tmp = | ||
1741 | list_entry(t, struct elf_thread_status, list); | ||
1742 | |||
1743 | for (i = 0; i < tmp->num_notes; i++) | ||
1744 | if (!writenote(&tmp->notes[i], file, &foffset)) | ||
1745 | goto end_coredump; | ||
1746 | } | ||
1747 | |||
1748 | /* Align to page */ | 2110 | /* Align to page */ |
1749 | DUMP_SEEK(dataoff - foffset); | 2111 | DUMP_SEEK(dataoff - foffset); |
1750 | 2112 | ||
@@ -1795,22 +2157,9 @@ end_coredump: | |||
1795 | set_fs(fs); | 2157 | set_fs(fs); |
1796 | 2158 | ||
1797 | cleanup: | 2159 | cleanup: |
1798 | while (!list_empty(&thread_list)) { | ||
1799 | struct list_head *tmp = thread_list.next; | ||
1800 | list_del(tmp); | ||
1801 | kfree(list_entry(tmp, struct elf_thread_status, list)); | ||
1802 | } | ||
1803 | |||
1804 | kfree(elf); | 2160 | kfree(elf); |
1805 | kfree(prstatus); | 2161 | free_note_info(&info); |
1806 | kfree(psinfo); | ||
1807 | kfree(notes); | ||
1808 | kfree(fpu); | ||
1809 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1810 | kfree(xfpu); | ||
1811 | #endif | ||
1812 | return has_dumped; | 2162 | return has_dumped; |
1813 | #undef NUM_NOTES | ||
1814 | } | 2163 | } |
1815 | 2164 | ||
1816 | #endif /* USE_ELF_CORE_DUMP */ | 2165 | #endif /* USE_ELF_CORE_DUMP */ |
@@ -248,11 +248,13 @@ inline int bio_hw_segments(struct request_queue *q, struct bio *bio) | |||
248 | */ | 248 | */ |
249 | void __bio_clone(struct bio *bio, struct bio *bio_src) | 249 | void __bio_clone(struct bio *bio, struct bio *bio_src) |
250 | { | 250 | { |
251 | struct request_queue *q = bdev_get_queue(bio_src->bi_bdev); | ||
252 | |||
253 | memcpy(bio->bi_io_vec, bio_src->bi_io_vec, | 251 | memcpy(bio->bi_io_vec, bio_src->bi_io_vec, |
254 | bio_src->bi_max_vecs * sizeof(struct bio_vec)); | 252 | bio_src->bi_max_vecs * sizeof(struct bio_vec)); |
255 | 253 | ||
254 | /* | ||
255 | * most users will be overriding ->bi_bdev with a new target, | ||
256 | * so we don't set nor calculate new physical/hw segment counts here | ||
257 | */ | ||
256 | bio->bi_sector = bio_src->bi_sector; | 258 | bio->bi_sector = bio_src->bi_sector; |
257 | bio->bi_bdev = bio_src->bi_bdev; | 259 | bio->bi_bdev = bio_src->bi_bdev; |
258 | bio->bi_flags |= 1 << BIO_CLONED; | 260 | bio->bi_flags |= 1 << BIO_CLONED; |
@@ -260,8 +262,6 @@ void __bio_clone(struct bio *bio, struct bio *bio_src) | |||
260 | bio->bi_vcnt = bio_src->bi_vcnt; | 262 | bio->bi_vcnt = bio_src->bi_vcnt; |
261 | bio->bi_size = bio_src->bi_size; | 263 | bio->bi_size = bio_src->bi_size; |
262 | bio->bi_idx = bio_src->bi_idx; | 264 | bio->bi_idx = bio_src->bi_idx; |
263 | bio_phys_segments(q, bio); | ||
264 | bio_hw_segments(q, bio); | ||
265 | } | 265 | } |
266 | 266 | ||
267 | /** | 267 | /** |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 993f78c55221..e48a630ae266 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -738,9 +738,9 @@ EXPORT_SYMBOL(bd_release); | |||
738 | static struct kobject *bdev_get_kobj(struct block_device *bdev) | 738 | static struct kobject *bdev_get_kobj(struct block_device *bdev) |
739 | { | 739 | { |
740 | if (bdev->bd_contains != bdev) | 740 | if (bdev->bd_contains != bdev) |
741 | return kobject_get(&bdev->bd_part->kobj); | 741 | return kobject_get(&bdev->bd_part->dev.kobj); |
742 | else | 742 | else |
743 | return kobject_get(&bdev->bd_disk->kobj); | 743 | return kobject_get(&bdev->bd_disk->dev.kobj); |
744 | } | 744 | } |
745 | 745 | ||
746 | static struct kobject *bdev_get_holder(struct block_device *bdev) | 746 | static struct kobject *bdev_get_holder(struct block_device *bdev) |
@@ -1176,7 +1176,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
1176 | ret = -ENXIO; | 1176 | ret = -ENXIO; |
1177 | goto out_first; | 1177 | goto out_first; |
1178 | } | 1178 | } |
1179 | kobject_get(&p->kobj); | 1179 | kobject_get(&p->dev.kobj); |
1180 | bdev->bd_part = p; | 1180 | bdev->bd_part = p; |
1181 | bd_set_size(bdev, (loff_t) p->nr_sects << 9); | 1181 | bd_set_size(bdev, (loff_t) p->nr_sects << 9); |
1182 | } | 1182 | } |
@@ -1299,7 +1299,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part) | |||
1299 | module_put(owner); | 1299 | module_put(owner); |
1300 | 1300 | ||
1301 | if (bdev->bd_contains != bdev) { | 1301 | if (bdev->bd_contains != bdev) { |
1302 | kobject_put(&bdev->bd_part->kobj); | 1302 | kobject_put(&bdev->bd_part->dev.kobj); |
1303 | bdev->bd_part = NULL; | 1303 | bdev->bd_part = NULL; |
1304 | } | 1304 | } |
1305 | bdev->bd_disk = NULL; | 1305 | bdev->bd_disk = NULL; |
diff --git a/fs/buffer.c b/fs/buffer.c index 7249e014819e..456c9ab7705b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -3213,6 +3213,50 @@ static int buffer_cpu_notify(struct notifier_block *self, | |||
3213 | return NOTIFY_OK; | 3213 | return NOTIFY_OK; |
3214 | } | 3214 | } |
3215 | 3215 | ||
3216 | /** | ||
3217 | * bh_uptodate_or_lock: Test whether the buffer is uptodate | ||
3218 | * @bh: struct buffer_head | ||
3219 | * | ||
3220 | * Return true if the buffer is up-to-date and false, | ||
3221 | * with the buffer locked, if not. | ||
3222 | */ | ||
3223 | int bh_uptodate_or_lock(struct buffer_head *bh) | ||
3224 | { | ||
3225 | if (!buffer_uptodate(bh)) { | ||
3226 | lock_buffer(bh); | ||
3227 | if (!buffer_uptodate(bh)) | ||
3228 | return 0; | ||
3229 | unlock_buffer(bh); | ||
3230 | } | ||
3231 | return 1; | ||
3232 | } | ||
3233 | EXPORT_SYMBOL(bh_uptodate_or_lock); | ||
3234 | |||
3235 | /** | ||
3236 | * bh_submit_read: Submit a locked buffer for reading | ||
3237 | * @bh: struct buffer_head | ||
3238 | * | ||
3239 | * Returns zero on success and -EIO on error. | ||
3240 | */ | ||
3241 | int bh_submit_read(struct buffer_head *bh) | ||
3242 | { | ||
3243 | BUG_ON(!buffer_locked(bh)); | ||
3244 | |||
3245 | if (buffer_uptodate(bh)) { | ||
3246 | unlock_buffer(bh); | ||
3247 | return 0; | ||
3248 | } | ||
3249 | |||
3250 | get_bh(bh); | ||
3251 | bh->b_end_io = end_buffer_read_sync; | ||
3252 | submit_bh(READ, bh); | ||
3253 | wait_on_buffer(bh); | ||
3254 | if (buffer_uptodate(bh)) | ||
3255 | return 0; | ||
3256 | return -EIO; | ||
3257 | } | ||
3258 | EXPORT_SYMBOL(bh_submit_read); | ||
3259 | |||
3216 | void __init buffer_init(void) | 3260 | void __init buffer_init(void) |
3217 | { | 3261 | { |
3218 | int nrpages; | 3262 | int nrpages; |
diff --git a/fs/char_dev.c b/fs/char_dev.c index c3bfa76765c4..2c7a8b5b4598 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
@@ -510,9 +510,8 @@ struct cdev *cdev_alloc(void) | |||
510 | { | 510 | { |
511 | struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL); | 511 | struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL); |
512 | if (p) { | 512 | if (p) { |
513 | p->kobj.ktype = &ktype_cdev_dynamic; | ||
514 | INIT_LIST_HEAD(&p->list); | 513 | INIT_LIST_HEAD(&p->list); |
515 | kobject_init(&p->kobj); | 514 | kobject_init(&p->kobj, &ktype_cdev_dynamic); |
516 | } | 515 | } |
517 | return p; | 516 | return p; |
518 | } | 517 | } |
@@ -529,8 +528,7 @@ void cdev_init(struct cdev *cdev, const struct file_operations *fops) | |||
529 | { | 528 | { |
530 | memset(cdev, 0, sizeof *cdev); | 529 | memset(cdev, 0, sizeof *cdev); |
531 | INIT_LIST_HEAD(&cdev->list); | 530 | INIT_LIST_HEAD(&cdev->list); |
532 | cdev->kobj.ktype = &ktype_cdev_default; | 531 | kobject_init(&cdev->kobj, &ktype_cdev_default); |
533 | kobject_init(&cdev->kobj); | ||
534 | cdev->ops = fops; | 532 | cdev->ops = fops; |
535 | } | 533 | } |
536 | 534 | ||
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index a609599287aa..edd248367b36 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES | |||
@@ -3,7 +3,10 @@ Version 1.52 | |||
3 | Fix oops on second mount to server when null auth is used. | 3 | Fix oops on second mount to server when null auth is used. |
4 | Enable experimental Kerberos support. Return writebehind errors on flush | 4 | Enable experimental Kerberos support. Return writebehind errors on flush |
5 | and sync so that events like out of disk space get reported properly on | 5 | and sync so that events like out of disk space get reported properly on |
6 | cached files. | 6 | cached files. Fix setxattr failure to certain Samba versions. Fix mount |
7 | of second share to disconnected server session (autoreconnect on this). | ||
8 | Add ability to modify cifs acls for handling chmod (when mounted with | ||
9 | cifsacl flag). | ||
7 | 10 | ||
8 | Version 1.51 | 11 | Version 1.51 |
9 | ------------ | 12 | ------------ |
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 45e42fb97c19..6ba43fb346fb 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile | |||
@@ -9,3 +9,5 @@ cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ | |||
9 | readdir.o ioctl.o sess.o export.o cifsacl.o | 9 | readdir.o ioctl.o sess.o export.o cifsacl.o |
10 | 10 | ||
11 | cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o | 11 | cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o |
12 | |||
13 | cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o | ||
diff --git a/fs/cifs/README b/fs/cifs/README index bf11329ac784..c623e2f9c5db 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -56,7 +56,8 @@ the CIFS VFS web site) copy it to the same directory in which mount.smbfs and | |||
56 | similar files reside (usually /sbin). Although the helper software is not | 56 | similar files reside (usually /sbin). Although the helper software is not |
57 | required, mount.cifs is recommended. Eventually the Samba 3.0 utility program | 57 | required, mount.cifs is recommended. Eventually the Samba 3.0 utility program |
58 | "net" may also be helpful since it may someday provide easier mount syntax for | 58 | "net" may also be helpful since it may someday provide easier mount syntax for |
59 | users who are used to Windows e.g. net use <mount point> <UNC name or cifs URL> | 59 | users who are used to Windows e.g. |
60 | net use <mount point> <UNC name or cifs URL> | ||
60 | Note that running the Winbind pam/nss module (logon service) on all of your | 61 | Note that running the Winbind pam/nss module (logon service) on all of your |
61 | Linux clients is useful in mapping Uids and Gids consistently across the | 62 | Linux clients is useful in mapping Uids and Gids consistently across the |
62 | domain to the proper network user. The mount.cifs mount helper can be | 63 | domain to the proper network user. The mount.cifs mount helper can be |
@@ -248,7 +249,7 @@ A partial list of the supported mount options follows: | |||
248 | the CIFS session. | 249 | the CIFS session. |
249 | password The user password. If the mount helper is | 250 | password The user password. If the mount helper is |
250 | installed, the user will be prompted for password | 251 | installed, the user will be prompted for password |
251 | if it is not supplied. | 252 | if not supplied. |
252 | ip The ip address of the target server | 253 | ip The ip address of the target server |
253 | unc The target server Universal Network Name (export) to | 254 | unc The target server Universal Network Name (export) to |
254 | mount. | 255 | mount. |
@@ -283,7 +284,7 @@ A partial list of the supported mount options follows: | |||
283 | can be enabled by specifying file_mode and dir_mode on | 284 | can be enabled by specifying file_mode and dir_mode on |
284 | the client. Note that the mount.cifs helper must be | 285 | the client. Note that the mount.cifs helper must be |
285 | at version 1.10 or higher to support specifying the uid | 286 | at version 1.10 or higher to support specifying the uid |
286 | (or gid) in non-numberic form. | 287 | (or gid) in non-numeric form. |
287 | gid Set the default gid for inodes (similar to above). | 288 | gid Set the default gid for inodes (similar to above). |
288 | file_mode If CIFS Unix extensions are not supported by the server | 289 | file_mode If CIFS Unix extensions are not supported by the server |
289 | this overrides the default mode for file inodes. | 290 | this overrides the default mode for file inodes. |
@@ -417,9 +418,10 @@ A partial list of the supported mount options follows: | |||
417 | acl Allow setfacl and getfacl to manage posix ACLs if server | 418 | acl Allow setfacl and getfacl to manage posix ACLs if server |
418 | supports them. (default) | 419 | supports them. (default) |
419 | noacl Do not allow setfacl and getfacl calls on this mount | 420 | noacl Do not allow setfacl and getfacl calls on this mount |
420 | user_xattr Allow getting and setting user xattrs as OS/2 EAs (extended | 421 | user_xattr Allow getting and setting user xattrs (those attributes whose |
421 | attributes) to the server (default) e.g. via setfattr | 422 | name begins with "user." or "os2.") as OS/2 EAs (extended |
422 | and getfattr utilities. | 423 | attributes) to the server. This allows support of the |
424 | setfattr and getfattr utilities. (default) | ||
423 | nouser_xattr Do not allow getfattr/setfattr to get/set/list xattrs | 425 | nouser_xattr Do not allow getfattr/setfattr to get/set/list xattrs |
424 | mapchars Translate six of the seven reserved characters (not backslash) | 426 | mapchars Translate six of the seven reserved characters (not backslash) |
425 | *?<>|: | 427 | *?<>|: |
@@ -434,6 +436,7 @@ A partial list of the supported mount options follows: | |||
434 | nomapchars Do not translate any of these seven characters (default). | 436 | nomapchars Do not translate any of these seven characters (default). |
435 | nocase Request case insensitive path name matching (case | 437 | nocase Request case insensitive path name matching (case |
436 | sensitive is the default if the server suports it). | 438 | sensitive is the default if the server suports it). |
439 | (mount option "ignorecase" is identical to "nocase") | ||
437 | posixpaths If CIFS Unix extensions are supported, attempt to | 440 | posixpaths If CIFS Unix extensions are supported, attempt to |
438 | negotiate posix path name support which allows certain | 441 | negotiate posix path name support which allows certain |
439 | characters forbidden in typical CIFS filenames, without | 442 | characters forbidden in typical CIFS filenames, without |
@@ -485,6 +488,9 @@ A partial list of the supported mount options follows: | |||
485 | ntlmv2i Use NTLMv2 password hashing with packet signing | 488 | ntlmv2i Use NTLMv2 password hashing with packet signing |
486 | lanman (if configured in kernel config) use older | 489 | lanman (if configured in kernel config) use older |
487 | lanman hash | 490 | lanman hash |
491 | hard Retry file operations if server is not responding | ||
492 | soft Limit retries to unresponsive servers (usually only | ||
493 | one retry) before returning an error. (default) | ||
488 | 494 | ||
489 | The mount.cifs mount helper also accepts a few mount options before -o | 495 | The mount.cifs mount helper also accepts a few mount options before -o |
490 | including: | 496 | including: |
@@ -535,8 +541,8 @@ SecurityFlags Flags which control security negotiation and | |||
535 | must use NTLM 0x02002 | 541 | must use NTLM 0x02002 |
536 | may use NTLMv2 0x00004 | 542 | may use NTLMv2 0x00004 |
537 | must use NTLMv2 0x04004 | 543 | must use NTLMv2 0x04004 |
538 | may use Kerberos security (not implemented yet) 0x00008 | 544 | may use Kerberos security 0x00008 |
539 | must use Kerberos (not implemented yet) 0x08008 | 545 | must use Kerberos 0x08008 |
540 | may use lanman (weak) password hash 0x00010 | 546 | may use lanman (weak) password hash 0x00010 |
541 | must use lanman password hash 0x10010 | 547 | must use lanman password hash 0x10010 |
542 | may use plaintext passwords 0x00020 | 548 | may use plaintext passwords 0x00020 |
@@ -626,6 +632,6 @@ returned success. | |||
626 | 632 | ||
627 | Also note that "cat /proc/fs/cifs/DebugData" will display information about | 633 | Also note that "cat /proc/fs/cifs/DebugData" will display information about |
628 | the active sessions and the shares that are mounted. | 634 | the active sessions and the shares that are mounted. |
629 | Enabling Kerberos (extended security) works when CONFIG_CIFS_EXPERIMENTAL is enabled | 635 | Enabling Kerberos (extended security) works when CONFIG_CIFS_EXPERIMENTAL is |
630 | but requires a user space helper (from the Samba project). NTLM and NTLMv2 and | 636 | on but requires a user space helper (from the Samba project). NTLM and NTLMv2 and |
631 | LANMAN support do not require this helpr. | 637 | LANMAN support do not require this helper. |
diff --git a/fs/cifs/TODO b/fs/cifs/TODO index a8852c200728..92c9feac440f 100644 --- a/fs/cifs/TODO +++ b/fs/cifs/TODO | |||
@@ -1,4 +1,4 @@ | |||
1 | Version 1.49 April 26, 2007 | 1 | Version 1.52 January 3, 2008 |
2 | 2 | ||
3 | A Partial List of Missing Features | 3 | A Partial List of Missing Features |
4 | ================================== | 4 | ================================== |
@@ -16,16 +16,14 @@ SecurityDescriptors | |||
16 | c) Better pam/winbind integration (e.g. to handle uid mapping | 16 | c) Better pam/winbind integration (e.g. to handle uid mapping |
17 | better) | 17 | better) |
18 | 18 | ||
19 | d) Verify that Kerberos signing works | 19 | d) Cleanup now unneeded SessSetup code in |
20 | |||
21 | e) Cleanup now unneeded SessSetup code in | ||
22 | fs/cifs/connect.c and add back in NTLMSSP code if any servers | 20 | fs/cifs/connect.c and add back in NTLMSSP code if any servers |
23 | need it | 21 | need it |
24 | 22 | ||
25 | f) MD5-HMAC signing SMB PDUs when SPNEGO style SessionSetup | 23 | e) ms-dfs and ms-dfs host name resolution cleanup |
26 | used (Kerberos or NTLMSSP). Signing alreadyimplemented for NTLM | 24 | |
27 | and raw NTLMSSP already. This is important when enabling | 25 | f) fix NTLMv2 signing when two mounts with different users to same |
28 | extended security and mounting to Windows 2003 Servers | 26 | server. |
29 | 27 | ||
30 | g) Directory entry caching relies on a 1 second timer, rather than | 28 | g) Directory entry caching relies on a 1 second timer, rather than |
31 | using FindNotify or equivalent. - (started) | 29 | using FindNotify or equivalent. - (started) |
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c new file mode 100644 index 000000000000..413ee2349d1a --- /dev/null +++ b/fs/cifs/cifs_dfs_ref.c | |||
@@ -0,0 +1,377 @@ | |||
1 | /* | ||
2 | * Contains the CIFS DFS referral mounting routines used for handling | ||
3 | * traversal via DFS junction point | ||
4 | * | ||
5 | * Copyright (c) 2007 Igor Mammedov | ||
6 | * Copyright (C) International Business Machines Corp., 2008 | ||
7 | * Author(s): Igor Mammedov (niallain@gmail.com) | ||
8 | * Steve French (sfrench@us.ibm.com) | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License | ||
11 | * as published by the Free Software Foundation; either version | ||
12 | * 2 of the License, or (at your option) any later version. | ||
13 | */ | ||
14 | |||
15 | #include <linux/dcache.h> | ||
16 | #include <linux/mount.h> | ||
17 | #include <linux/namei.h> | ||
18 | #include <linux/vfs.h> | ||
19 | #include <linux/fs.h> | ||
20 | #include "cifsglob.h" | ||
21 | #include "cifsproto.h" | ||
22 | #include "cifsfs.h" | ||
23 | #include "dns_resolve.h" | ||
24 | #include "cifs_debug.h" | ||
25 | |||
26 | LIST_HEAD(cifs_dfs_automount_list); | ||
27 | |||
28 | /* | ||
29 | * DFS functions | ||
30 | */ | ||
31 | |||
32 | void dfs_shrink_umount_helper(struct vfsmount *vfsmnt) | ||
33 | { | ||
34 | mark_mounts_for_expiry(&cifs_dfs_automount_list); | ||
35 | mark_mounts_for_expiry(&cifs_dfs_automount_list); | ||
36 | shrink_submounts(vfsmnt, &cifs_dfs_automount_list); | ||
37 | } | ||
38 | |||
39 | /** | ||
40 | * cifs_get_share_name - extracts share name from UNC | ||
41 | * @node_name: pointer to UNC string | ||
42 | * | ||
43 | * Extracts sharename form full UNC. | ||
44 | * i.e. strips from UNC trailing path that is not part of share | ||
45 | * name and fixup missing '\' in the begining of DFS node refferal | ||
46 | * if neccessary. | ||
47 | * Returns pointer to share name on success or NULL on error. | ||
48 | * Caller is responsible for freeing returned string. | ||
49 | */ | ||
50 | static char *cifs_get_share_name(const char *node_name) | ||
51 | { | ||
52 | int len; | ||
53 | char *UNC; | ||
54 | char *pSep; | ||
55 | |||
56 | len = strlen(node_name); | ||
57 | UNC = kmalloc(len+2 /*for term null and additional \ if it's missed */, | ||
58 | GFP_KERNEL); | ||
59 | if (!UNC) | ||
60 | return NULL; | ||
61 | |||
62 | /* get share name and server name */ | ||
63 | if (node_name[1] != '\\') { | ||
64 | UNC[0] = '\\'; | ||
65 | strncpy(UNC+1, node_name, len); | ||
66 | len++; | ||
67 | UNC[len] = 0; | ||
68 | } else { | ||
69 | strncpy(UNC, node_name, len); | ||
70 | UNC[len] = 0; | ||
71 | } | ||
72 | |||
73 | /* find server name end */ | ||
74 | pSep = memchr(UNC+2, '\\', len-2); | ||
75 | if (!pSep) { | ||
76 | cERROR(1, ("%s: no server name end in node name: %s", | ||
77 | __FUNCTION__, node_name)); | ||
78 | kfree(UNC); | ||
79 | return NULL; | ||
80 | } | ||
81 | |||
82 | /* find sharename end */ | ||
83 | pSep++; | ||
84 | pSep = memchr(UNC+(pSep-UNC), '\\', len-(pSep-UNC)); | ||
85 | if (!pSep) { | ||
86 | cERROR(1, ("%s:2 cant find share name in node name: %s", | ||
87 | __FUNCTION__, node_name)); | ||
88 | kfree(UNC); | ||
89 | return NULL; | ||
90 | } | ||
91 | /* trim path up to sharename end | ||
92 | * * now we have share name in UNC */ | ||
93 | *pSep = 0; | ||
94 | |||
95 | return UNC; | ||
96 | } | ||
97 | |||
98 | |||
99 | /** | ||
100 | * compose_mount_options - creates mount options for refferral | ||
101 | * @sb_mountdata: parent/root DFS mount options (template) | ||
102 | * @ref_unc: refferral server UNC | ||
103 | * @devname: pointer for saving device name | ||
104 | * | ||
105 | * creates mount options for submount based on template options sb_mountdata | ||
106 | * and replacing unc,ip,prefixpath options with ones we've got form ref_unc. | ||
107 | * | ||
108 | * Returns: pointer to new mount options or ERR_PTR. | ||
109 | * Caller is responcible for freeing retunrned value if it is not error. | ||
110 | */ | ||
111 | static char *compose_mount_options(const char *sb_mountdata, | ||
112 | const char *ref_unc, | ||
113 | char **devname) | ||
114 | { | ||
115 | int rc; | ||
116 | char *mountdata; | ||
117 | int md_len; | ||
118 | char *tkn_e; | ||
119 | char *srvIP = NULL; | ||
120 | char sep = ','; | ||
121 | int off, noff; | ||
122 | |||
123 | if (sb_mountdata == NULL) | ||
124 | return ERR_PTR(-EINVAL); | ||
125 | |||
126 | *devname = cifs_get_share_name(ref_unc); | ||
127 | rc = dns_resolve_server_name_to_ip(*devname, &srvIP); | ||
128 | if (rc != 0) { | ||
129 | cERROR(1, ("%s: Failed to resolve server part of %s to IP", | ||
130 | __FUNCTION__, *devname)); | ||
131 | mountdata = ERR_PTR(rc); | ||
132 | goto compose_mount_options_out; | ||
133 | } | ||
134 | md_len = strlen(sb_mountdata) + strlen(srvIP) + strlen(ref_unc) + 3; | ||
135 | mountdata = kzalloc(md_len+1, GFP_KERNEL); | ||
136 | if (mountdata == NULL) { | ||
137 | mountdata = ERR_PTR(-ENOMEM); | ||
138 | goto compose_mount_options_out; | ||
139 | } | ||
140 | |||
141 | /* copy all options except of unc,ip,prefixpath */ | ||
142 | off = 0; | ||
143 | if (strncmp(sb_mountdata, "sep=", 4) == 0) { | ||
144 | sep = sb_mountdata[4]; | ||
145 | strncpy(mountdata, sb_mountdata, 5); | ||
146 | off += 5; | ||
147 | } | ||
148 | while ((tkn_e = strchr(sb_mountdata+off, sep))) { | ||
149 | noff = (tkn_e - (sb_mountdata+off)) + 1; | ||
150 | if (strnicmp(sb_mountdata+off, "unc=", 4) == 0) { | ||
151 | off += noff; | ||
152 | continue; | ||
153 | } | ||
154 | if (strnicmp(sb_mountdata+off, "ip=", 3) == 0) { | ||
155 | off += noff; | ||
156 | continue; | ||
157 | } | ||
158 | if (strnicmp(sb_mountdata+off, "prefixpath=", 3) == 0) { | ||
159 | off += noff; | ||
160 | continue; | ||
161 | } | ||
162 | strncat(mountdata, sb_mountdata+off, noff); | ||
163 | off += noff; | ||
164 | } | ||
165 | strcat(mountdata, sb_mountdata+off); | ||
166 | mountdata[md_len] = '\0'; | ||
167 | |||
168 | /* copy new IP and ref share name */ | ||
169 | strcat(mountdata, ",ip="); | ||
170 | strcat(mountdata, srvIP); | ||
171 | strcat(mountdata, ",unc="); | ||
172 | strcat(mountdata, *devname); | ||
173 | |||
174 | /* find & copy prefixpath */ | ||
175 | tkn_e = strchr(ref_unc+2, '\\'); | ||
176 | if (tkn_e) { | ||
177 | tkn_e = strchr(tkn_e+1, '\\'); | ||
178 | if (tkn_e) { | ||
179 | strcat(mountdata, ",prefixpath="); | ||
180 | strcat(mountdata, tkn_e); | ||
181 | } | ||
182 | } | ||
183 | |||
184 | /*cFYI(1,("%s: parent mountdata: %s", __FUNCTION__,sb_mountdata));*/ | ||
185 | /*cFYI(1, ("%s: submount mountdata: %s", __FUNCTION__, mountdata ));*/ | ||
186 | |||
187 | compose_mount_options_out: | ||
188 | kfree(srvIP); | ||
189 | return mountdata; | ||
190 | } | ||
191 | |||
192 | |||
193 | static struct vfsmount *cifs_dfs_do_refmount(const struct vfsmount *mnt_parent, | ||
194 | struct dentry *dentry, char *ref_unc) | ||
195 | { | ||
196 | struct cifs_sb_info *cifs_sb; | ||
197 | struct vfsmount *mnt; | ||
198 | char *mountdata; | ||
199 | char *devname = NULL; | ||
200 | |||
201 | cifs_sb = CIFS_SB(dentry->d_inode->i_sb); | ||
202 | mountdata = compose_mount_options(cifs_sb->mountdata, | ||
203 | ref_unc, &devname); | ||
204 | |||
205 | if (IS_ERR(mountdata)) | ||
206 | return (struct vfsmount *)mountdata; | ||
207 | |||
208 | mnt = vfs_kern_mount(&cifs_fs_type, 0, devname, mountdata); | ||
209 | kfree(mountdata); | ||
210 | kfree(devname); | ||
211 | return mnt; | ||
212 | |||
213 | } | ||
214 | |||
215 | static char *build_full_dfs_path_from_dentry(struct dentry *dentry) | ||
216 | { | ||
217 | char *full_path = NULL; | ||
218 | char *search_path; | ||
219 | char *tmp_path; | ||
220 | size_t l_max_len; | ||
221 | struct cifs_sb_info *cifs_sb; | ||
222 | |||
223 | if (dentry->d_inode == NULL) | ||
224 | return NULL; | ||
225 | |||
226 | cifs_sb = CIFS_SB(dentry->d_inode->i_sb); | ||
227 | |||
228 | if (cifs_sb->tcon == NULL) | ||
229 | return NULL; | ||
230 | |||
231 | search_path = build_path_from_dentry(dentry); | ||
232 | if (search_path == NULL) | ||
233 | return NULL; | ||
234 | |||
235 | if (cifs_sb->tcon->Flags & SMB_SHARE_IS_IN_DFS) { | ||
236 | /* we should use full path name to correct working with DFS */ | ||
237 | l_max_len = strnlen(cifs_sb->tcon->treeName, MAX_TREE_SIZE+1) + | ||
238 | strnlen(search_path, MAX_PATHCONF) + 1; | ||
239 | tmp_path = kmalloc(l_max_len, GFP_KERNEL); | ||
240 | if (tmp_path == NULL) { | ||
241 | kfree(search_path); | ||
242 | return NULL; | ||
243 | } | ||
244 | strncpy(tmp_path, cifs_sb->tcon->treeName, l_max_len); | ||
245 | strcat(tmp_path, search_path); | ||
246 | tmp_path[l_max_len-1] = 0; | ||
247 | full_path = tmp_path; | ||
248 | kfree(search_path); | ||
249 | } else { | ||
250 | full_path = search_path; | ||
251 | } | ||
252 | return full_path; | ||
253 | } | ||
254 | |||
255 | static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd, | ||
256 | struct list_head *mntlist) | ||
257 | { | ||
258 | /* stolen from afs code */ | ||
259 | int err; | ||
260 | |||
261 | mntget(newmnt); | ||
262 | err = do_add_mount(newmnt, nd, nd->mnt->mnt_flags, mntlist); | ||
263 | switch (err) { | ||
264 | case 0: | ||
265 | dput(nd->dentry); | ||
266 | mntput(nd->mnt); | ||
267 | nd->mnt = newmnt; | ||
268 | nd->dentry = dget(newmnt->mnt_root); | ||
269 | break; | ||
270 | case -EBUSY: | ||
271 | /* someone else made a mount here whilst we were busy */ | ||
272 | while (d_mountpoint(nd->dentry) && | ||
273 | follow_down(&nd->mnt, &nd->dentry)) | ||
274 | ; | ||
275 | err = 0; | ||
276 | default: | ||
277 | mntput(newmnt); | ||
278 | break; | ||
279 | } | ||
280 | return err; | ||
281 | } | ||
282 | |||
283 | static void dump_referral(const struct dfs_info3_param *ref) | ||
284 | { | ||
285 | cFYI(1, ("DFS: ref path: %s", ref->path_name)); | ||
286 | cFYI(1, ("DFS: node path: %s", ref->node_name)); | ||
287 | cFYI(1, ("DFS: fl: %hd, srv_type: %hd", ref->flags, ref->server_type)); | ||
288 | cFYI(1, ("DFS: ref_flags: %hd, path_consumed: %hd", ref->ref_flag, | ||
289 | ref->PathConsumed)); | ||
290 | } | ||
291 | |||
292 | |||
293 | static void* | ||
294 | cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) | ||
295 | { | ||
296 | struct dfs_info3_param *referrals = NULL; | ||
297 | unsigned int num_referrals = 0; | ||
298 | struct cifs_sb_info *cifs_sb; | ||
299 | struct cifsSesInfo *ses; | ||
300 | char *full_path = NULL; | ||
301 | int xid, i; | ||
302 | int rc = 0; | ||
303 | struct vfsmount *mnt = ERR_PTR(-ENOENT); | ||
304 | |||
305 | cFYI(1, ("in %s", __FUNCTION__)); | ||
306 | BUG_ON(IS_ROOT(dentry)); | ||
307 | |||
308 | xid = GetXid(); | ||
309 | |||
310 | dput(nd->dentry); | ||
311 | nd->dentry = dget(dentry); | ||
312 | |||
313 | cifs_sb = CIFS_SB(dentry->d_inode->i_sb); | ||
314 | ses = cifs_sb->tcon->ses; | ||
315 | |||
316 | if (!ses) { | ||
317 | rc = -EINVAL; | ||
318 | goto out_err; | ||
319 | } | ||
320 | |||
321 | full_path = build_full_dfs_path_from_dentry(dentry); | ||
322 | if (full_path == NULL) { | ||
323 | rc = -ENOMEM; | ||
324 | goto out_err; | ||
325 | } | ||
326 | |||
327 | rc = get_dfs_path(xid, ses , full_path, cifs_sb->local_nls, | ||
328 | &num_referrals, &referrals, | ||
329 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
330 | |||
331 | for (i = 0; i < num_referrals; i++) { | ||
332 | dump_referral(referrals+i); | ||
333 | /* connect to a storage node */ | ||
334 | if (referrals[i].flags & DFSREF_STORAGE_SERVER) { | ||
335 | int len; | ||
336 | len = strlen(referrals[i].node_name); | ||
337 | if (len < 2) { | ||
338 | cERROR(1, ("%s: Net Address path too short: %s", | ||
339 | __FUNCTION__, referrals[i].node_name)); | ||
340 | rc = -EINVAL; | ||
341 | goto out_err; | ||
342 | } | ||
343 | mnt = cifs_dfs_do_refmount(nd->mnt, nd->dentry, | ||
344 | referrals[i].node_name); | ||
345 | cFYI(1, ("%s: cifs_dfs_do_refmount:%s , mnt:%p", | ||
346 | __FUNCTION__, | ||
347 | referrals[i].node_name, mnt)); | ||
348 | |||
349 | /* complete mount procedure if we accured submount */ | ||
350 | if (!IS_ERR(mnt)) | ||
351 | break; | ||
352 | } | ||
353 | } | ||
354 | |||
355 | /* we need it cause for() above could exit without valid submount */ | ||
356 | rc = PTR_ERR(mnt); | ||
357 | if (IS_ERR(mnt)) | ||
358 | goto out_err; | ||
359 | |||
360 | nd->mnt->mnt_flags |= MNT_SHRINKABLE; | ||
361 | rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list); | ||
362 | |||
363 | out: | ||
364 | FreeXid(xid); | ||
365 | free_dfs_info_array(referrals, num_referrals); | ||
366 | kfree(full_path); | ||
367 | cFYI(1, ("leaving %s" , __FUNCTION__)); | ||
368 | return ERR_PTR(rc); | ||
369 | out_err: | ||
370 | path_release(nd); | ||
371 | goto out; | ||
372 | } | ||
373 | |||
374 | struct inode_operations cifs_dfs_referral_inode_operations = { | ||
375 | .follow_link = cifs_dfs_follow_mountpoint, | ||
376 | }; | ||
377 | |||
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 34af556cdd8d..8ad2330ba061 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h | |||
@@ -43,6 +43,9 @@ struct cifs_sb_info { | |||
43 | mode_t mnt_dir_mode; | 43 | mode_t mnt_dir_mode; |
44 | int mnt_cifs_flags; | 44 | int mnt_cifs_flags; |
45 | int prepathlen; | 45 | int prepathlen; |
46 | char *prepath; | 46 | char *prepath; /* relative path under the share to mount to */ |
47 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
48 | char *mountdata; /* mount options received at mount time */ | ||
49 | #endif | ||
47 | }; | 50 | }; |
48 | #endif /* _CIFS_FS_SB_H */ | 51 | #endif /* _CIFS_FS_SB_H */ |
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 1529d2b12e9c..d543accc10dd 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c | |||
@@ -122,11 +122,13 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) | |||
122 | cFYI(1, ("key description = %s", description)); | 122 | cFYI(1, ("key description = %s", description)); |
123 | spnego_key = request_key(&cifs_spnego_key_type, description, ""); | 123 | spnego_key = request_key(&cifs_spnego_key_type, description, ""); |
124 | 124 | ||
125 | #ifdef CONFIG_CIFS_DEBUG2 | ||
125 | if (cifsFYI && !IS_ERR(spnego_key)) { | 126 | if (cifsFYI && !IS_ERR(spnego_key)) { |
126 | struct cifs_spnego_msg *msg = spnego_key->payload.data; | 127 | struct cifs_spnego_msg *msg = spnego_key->payload.data; |
127 | cifs_dump_mem("SPNEGO reply blob:", msg->data, | 128 | cifs_dump_mem("SPNEGO reply blob:", msg->data, min(1024, |
128 | msg->secblob_len + msg->sesskey_len); | 129 | msg->secblob_len + msg->sesskey_len)); |
129 | } | 130 | } |
131 | #endif /* CONFIG_CIFS_DEBUG2 */ | ||
130 | 132 | ||
131 | out: | 133 | out: |
132 | kfree(description); | 134 | kfree(description); |
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index c312adcba4fc..a7035bd18e4e 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c | |||
@@ -129,6 +129,54 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid) | |||
129 | return (1); /* sids compare/match */ | 129 | return (1); /* sids compare/match */ |
130 | } | 130 | } |
131 | 131 | ||
132 | |||
133 | /* copy ntsd, owner sid, and group sid from a security descriptor to another */ | ||
134 | static void copy_sec_desc(const struct cifs_ntsd *pntsd, | ||
135 | struct cifs_ntsd *pnntsd, __u32 sidsoffset) | ||
136 | { | ||
137 | int i; | ||
138 | |||
139 | struct cifs_sid *owner_sid_ptr, *group_sid_ptr; | ||
140 | struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr; | ||
141 | |||
142 | /* copy security descriptor control portion */ | ||
143 | pnntsd->revision = pntsd->revision; | ||
144 | pnntsd->type = pntsd->type; | ||
145 | pnntsd->dacloffset = cpu_to_le32(sizeof(struct cifs_ntsd)); | ||
146 | pnntsd->sacloffset = 0; | ||
147 | pnntsd->osidoffset = cpu_to_le32(sidsoffset); | ||
148 | pnntsd->gsidoffset = cpu_to_le32(sidsoffset + sizeof(struct cifs_sid)); | ||
149 | |||
150 | /* copy owner sid */ | ||
151 | owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + | ||
152 | le32_to_cpu(pntsd->osidoffset)); | ||
153 | nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset); | ||
154 | |||
155 | nowner_sid_ptr->revision = owner_sid_ptr->revision; | ||
156 | nowner_sid_ptr->num_subauth = owner_sid_ptr->num_subauth; | ||
157 | for (i = 0; i < 6; i++) | ||
158 | nowner_sid_ptr->authority[i] = owner_sid_ptr->authority[i]; | ||
159 | for (i = 0; i < 5; i++) | ||
160 | nowner_sid_ptr->sub_auth[i] = owner_sid_ptr->sub_auth[i]; | ||
161 | |||
162 | /* copy group sid */ | ||
163 | group_sid_ptr = (struct cifs_sid *)((char *)pntsd + | ||
164 | le32_to_cpu(pntsd->gsidoffset)); | ||
165 | ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset + | ||
166 | sizeof(struct cifs_sid)); | ||
167 | |||
168 | ngroup_sid_ptr->revision = group_sid_ptr->revision; | ||
169 | ngroup_sid_ptr->num_subauth = group_sid_ptr->num_subauth; | ||
170 | for (i = 0; i < 6; i++) | ||
171 | ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i]; | ||
172 | for (i = 0; i < 5; i++) | ||
173 | ngroup_sid_ptr->sub_auth[i] = | ||
174 | cpu_to_le32(group_sid_ptr->sub_auth[i]); | ||
175 | |||
176 | return; | ||
177 | } | ||
178 | |||
179 | |||
132 | /* | 180 | /* |
133 | change posix mode to reflect permissions | 181 | change posix mode to reflect permissions |
134 | pmode is the existing mode (we only want to overwrite part of this | 182 | pmode is the existing mode (we only want to overwrite part of this |
@@ -220,6 +268,33 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use, | |||
220 | return; | 268 | return; |
221 | } | 269 | } |
222 | 270 | ||
271 | static __le16 fill_ace_for_sid(struct cifs_ace *pntace, | ||
272 | const struct cifs_sid *psid, __u64 nmode, umode_t bits) | ||
273 | { | ||
274 | int i; | ||
275 | __u16 size = 0; | ||
276 | __u32 access_req = 0; | ||
277 | |||
278 | pntace->type = ACCESS_ALLOWED; | ||
279 | pntace->flags = 0x0; | ||
280 | mode_to_access_flags(nmode, bits, &access_req); | ||
281 | if (!access_req) | ||
282 | access_req = SET_MINIMUM_RIGHTS; | ||
283 | pntace->access_req = cpu_to_le32(access_req); | ||
284 | |||
285 | pntace->sid.revision = psid->revision; | ||
286 | pntace->sid.num_subauth = psid->num_subauth; | ||
287 | for (i = 0; i < 6; i++) | ||
288 | pntace->sid.authority[i] = psid->authority[i]; | ||
289 | for (i = 0; i < psid->num_subauth; i++) | ||
290 | pntace->sid.sub_auth[i] = psid->sub_auth[i]; | ||
291 | |||
292 | size = 1 + 1 + 2 + 4 + 1 + 1 + 6 + (psid->num_subauth * 4); | ||
293 | pntace->size = cpu_to_le16(size); | ||
294 | |||
295 | return (size); | ||
296 | } | ||
297 | |||
223 | 298 | ||
224 | #ifdef CONFIG_CIFS_DEBUG2 | 299 | #ifdef CONFIG_CIFS_DEBUG2 |
225 | static void dump_ace(struct cifs_ace *pace, char *end_of_acl) | 300 | static void dump_ace(struct cifs_ace *pace, char *end_of_acl) |
@@ -243,7 +318,7 @@ static void dump_ace(struct cifs_ace *pace, char *end_of_acl) | |||
243 | int i; | 318 | int i; |
244 | cFYI(1, ("ACE revision %d num_auth %d type %d flags %d size %d", | 319 | cFYI(1, ("ACE revision %d num_auth %d type %d flags %d size %d", |
245 | pace->sid.revision, pace->sid.num_subauth, pace->type, | 320 | pace->sid.revision, pace->sid.num_subauth, pace->type, |
246 | pace->flags, pace->size)); | 321 | pace->flags, le16_to_cpu(pace->size))); |
247 | for (i = 0; i < num_subauth; ++i) { | 322 | for (i = 0; i < num_subauth; ++i) { |
248 | cFYI(1, ("ACE sub_auth[%d]: 0x%x", i, | 323 | cFYI(1, ("ACE sub_auth[%d]: 0x%x", i, |
249 | le32_to_cpu(pace->sid.sub_auth[i]))); | 324 | le32_to_cpu(pace->sid.sub_auth[i]))); |
@@ -346,6 +421,28 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, | |||
346 | } | 421 | } |
347 | 422 | ||
348 | 423 | ||
424 | static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid, | ||
425 | struct cifs_sid *pgrpsid, __u64 nmode) | ||
426 | { | ||
427 | __le16 size = 0; | ||
428 | struct cifs_acl *pnndacl; | ||
429 | |||
430 | pnndacl = (struct cifs_acl *)((char *)pndacl + sizeof(struct cifs_acl)); | ||
431 | |||
432 | size += fill_ace_for_sid((struct cifs_ace *) ((char *)pnndacl + size), | ||
433 | pownersid, nmode, S_IRWXU); | ||
434 | size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size), | ||
435 | pgrpsid, nmode, S_IRWXG); | ||
436 | size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size), | ||
437 | &sid_everyone, nmode, S_IRWXO); | ||
438 | |||
439 | pndacl->size = cpu_to_le16(size + sizeof(struct cifs_acl)); | ||
440 | pndacl->num_aces = 3; | ||
441 | |||
442 | return (0); | ||
443 | } | ||
444 | |||
445 | |||
349 | static int parse_sid(struct cifs_sid *psid, char *end_of_acl) | 446 | static int parse_sid(struct cifs_sid *psid, char *end_of_acl) |
350 | { | 447 | { |
351 | /* BB need to add parm so we can store the SID BB */ | 448 | /* BB need to add parm so we can store the SID BB */ |
@@ -432,6 +529,46 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len, | |||
432 | } | 529 | } |
433 | 530 | ||
434 | 531 | ||
532 | /* Convert permission bits from mode to equivalent CIFS ACL */ | ||
533 | static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, | ||
534 | int acl_len, struct inode *inode, __u64 nmode) | ||
535 | { | ||
536 | int rc = 0; | ||
537 | __u32 dacloffset; | ||
538 | __u32 ndacloffset; | ||
539 | __u32 sidsoffset; | ||
540 | struct cifs_sid *owner_sid_ptr, *group_sid_ptr; | ||
541 | struct cifs_acl *dacl_ptr = NULL; /* no need for SACL ptr */ | ||
542 | struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */ | ||
543 | |||
544 | if ((inode == NULL) || (pntsd == NULL) || (pnntsd == NULL)) | ||
545 | return (-EIO); | ||
546 | |||
547 | owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + | ||
548 | le32_to_cpu(pntsd->osidoffset)); | ||
549 | group_sid_ptr = (struct cifs_sid *)((char *)pntsd + | ||
550 | le32_to_cpu(pntsd->gsidoffset)); | ||
551 | |||
552 | dacloffset = le32_to_cpu(pntsd->dacloffset); | ||
553 | dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset); | ||
554 | |||
555 | ndacloffset = sizeof(struct cifs_ntsd); | ||
556 | ndacl_ptr = (struct cifs_acl *)((char *)pnntsd + ndacloffset); | ||
557 | ndacl_ptr->revision = dacl_ptr->revision; | ||
558 | ndacl_ptr->size = 0; | ||
559 | ndacl_ptr->num_aces = 0; | ||
560 | |||
561 | rc = set_chmod_dacl(ndacl_ptr, owner_sid_ptr, group_sid_ptr, nmode); | ||
562 | |||
563 | sidsoffset = ndacloffset + le16_to_cpu(ndacl_ptr->size); | ||
564 | |||
565 | /* copy security descriptor control portion and owner and group sid */ | ||
566 | copy_sec_desc(pntsd, pnntsd, sidsoffset); | ||
567 | |||
568 | return (rc); | ||
569 | } | ||
570 | |||
571 | |||
435 | /* Retrieve an ACL from the server */ | 572 | /* Retrieve an ACL from the server */ |
436 | static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode, | 573 | static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode, |
437 | const char *path) | 574 | const char *path) |
@@ -487,6 +624,64 @@ static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode, | |||
487 | return pntsd; | 624 | return pntsd; |
488 | } | 625 | } |
489 | 626 | ||
627 | /* Set an ACL on the server */ | ||
628 | static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, | ||
629 | struct inode *inode, const char *path) | ||
630 | { | ||
631 | struct cifsFileInfo *open_file; | ||
632 | int unlock_file = FALSE; | ||
633 | int xid; | ||
634 | int rc = -EIO; | ||
635 | __u16 fid; | ||
636 | struct super_block *sb; | ||
637 | struct cifs_sb_info *cifs_sb; | ||
638 | |||
639 | #ifdef CONFIG_CIFS_DEBUG2 | ||
640 | cFYI(1, ("set ACL for %s from mode 0x%x", path, inode->i_mode)); | ||
641 | #endif | ||
642 | |||
643 | if (!inode) | ||
644 | return (rc); | ||
645 | |||
646 | sb = inode->i_sb; | ||
647 | if (sb == NULL) | ||
648 | return (rc); | ||
649 | |||
650 | cifs_sb = CIFS_SB(sb); | ||
651 | xid = GetXid(); | ||
652 | |||
653 | open_file = find_readable_file(CIFS_I(inode)); | ||
654 | if (open_file) { | ||
655 | unlock_file = TRUE; | ||
656 | fid = open_file->netfid; | ||
657 | } else { | ||
658 | int oplock = FALSE; | ||
659 | /* open file */ | ||
660 | rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, | ||
661 | WRITE_DAC, 0, &fid, &oplock, NULL, | ||
662 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | ||
663 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
664 | if (rc != 0) { | ||
665 | cERROR(1, ("Unable to open file to set ACL")); | ||
666 | FreeXid(xid); | ||
667 | return (rc); | ||
668 | } | ||
669 | } | ||
670 | |||
671 | rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen); | ||
672 | #ifdef CONFIG_CIFS_DEBUG2 | ||
673 | cFYI(1, ("SetCIFSACL rc = %d", rc)); | ||
674 | #endif | ||
675 | if (unlock_file == TRUE) | ||
676 | atomic_dec(&open_file->wrtPending); | ||
677 | else | ||
678 | CIFSSMBClose(xid, cifs_sb->tcon, fid); | ||
679 | |||
680 | FreeXid(xid); | ||
681 | |||
682 | return (rc); | ||
683 | } | ||
684 | |||
490 | /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ | 685 | /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ |
491 | void acl_to_uid_mode(struct inode *inode, const char *path) | 686 | void acl_to_uid_mode(struct inode *inode, const char *path) |
492 | { | 687 | { |
@@ -510,24 +705,53 @@ void acl_to_uid_mode(struct inode *inode, const char *path) | |||
510 | } | 705 | } |
511 | 706 | ||
512 | /* Convert mode bits to an ACL so we can update the ACL on the server */ | 707 | /* Convert mode bits to an ACL so we can update the ACL on the server */ |
513 | int mode_to_acl(struct inode *inode, const char *path) | 708 | int mode_to_acl(struct inode *inode, const char *path, __u64 nmode) |
514 | { | 709 | { |
515 | int rc = 0; | 710 | int rc = 0; |
516 | __u32 acllen = 0; | 711 | __u32 acllen = 0; |
517 | struct cifs_ntsd *pntsd = NULL; | 712 | struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */ |
713 | struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */ | ||
518 | 714 | ||
715 | #ifdef CONFIG_CIFS_DEBUG2 | ||
519 | cFYI(1, ("set ACL from mode for %s", path)); | 716 | cFYI(1, ("set ACL from mode for %s", path)); |
717 | #endif | ||
520 | 718 | ||
521 | /* Get the security descriptor */ | 719 | /* Get the security descriptor */ |
522 | pntsd = get_cifs_acl(&acllen, inode, path); | 720 | pntsd = get_cifs_acl(&acllen, inode, path); |
523 | 721 | ||
524 | /* Add/Modify the three ACEs for owner, group, everyone | 722 | /* Add three ACEs for owner, group, everyone getting rid of |
525 | while retaining the other ACEs */ | 723 | other ACEs as chmod disables ACEs and set the security descriptor */ |
526 | 724 | ||
527 | /* Set the security descriptor */ | 725 | if (pntsd) { |
726 | /* allocate memory for the smb header, | ||
727 | set security descriptor request security descriptor | ||
728 | parameters, and secuirty descriptor itself */ | ||
528 | 729 | ||
730 | pnntsd = kmalloc(acllen, GFP_KERNEL); | ||
731 | if (!pnntsd) { | ||
732 | cERROR(1, ("Unable to allocate security descriptor")); | ||
733 | kfree(pntsd); | ||
734 | return (-ENOMEM); | ||
735 | } | ||
529 | 736 | ||
530 | kfree(pntsd); | 737 | rc = build_sec_desc(pntsd, pnntsd, acllen, inode, nmode); |
531 | return rc; | 738 | |
739 | #ifdef CONFIG_CIFS_DEBUG2 | ||
740 | cFYI(1, ("build_sec_desc rc: %d", rc)); | ||
741 | #endif | ||
742 | |||
743 | if (!rc) { | ||
744 | /* Set the security descriptor */ | ||
745 | rc = set_cifs_acl(pnntsd, acllen, inode, path); | ||
746 | #ifdef CONFIG_CIFS_DEBUG2 | ||
747 | cFYI(1, ("set_cifs_acl rc: %d", rc)); | ||
748 | #endif | ||
749 | } | ||
750 | |||
751 | kfree(pnntsd); | ||
752 | kfree(pntsd); | ||
753 | } | ||
754 | |||
755 | return (rc); | ||
532 | } | 756 | } |
533 | #endif /* CONFIG_CIFS_EXPERIMENTAL */ | 757 | #endif /* CONFIG_CIFS_EXPERIMENTAL */ |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 093beaa3900d..e9f4ec701092 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include "cifs_fs_sb.h" | 44 | #include "cifs_fs_sb.h" |
45 | #include <linux/mm.h> | 45 | #include <linux/mm.h> |
46 | #include <linux/key-type.h> | 46 | #include <linux/key-type.h> |
47 | #include "dns_resolve.h" | ||
47 | #include "cifs_spnego.h" | 48 | #include "cifs_spnego.h" |
48 | #define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ | 49 | #define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ |
49 | 50 | ||
@@ -96,6 +97,9 @@ cifs_read_super(struct super_block *sb, void *data, | |||
96 | { | 97 | { |
97 | struct inode *inode; | 98 | struct inode *inode; |
98 | struct cifs_sb_info *cifs_sb; | 99 | struct cifs_sb_info *cifs_sb; |
100 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
101 | int len; | ||
102 | #endif | ||
99 | int rc = 0; | 103 | int rc = 0; |
100 | 104 | ||
101 | /* BB should we make this contingent on mount parm? */ | 105 | /* BB should we make this contingent on mount parm? */ |
@@ -105,6 +109,25 @@ cifs_read_super(struct super_block *sb, void *data, | |||
105 | if (cifs_sb == NULL) | 109 | if (cifs_sb == NULL) |
106 | return -ENOMEM; | 110 | return -ENOMEM; |
107 | 111 | ||
112 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
113 | /* copy mount params to sb for use in submounts */ | ||
114 | /* BB: should we move this after the mount so we | ||
115 | * do not have to do the copy on failed mounts? | ||
116 | * BB: May be it is better to do simple copy before | ||
117 | * complex operation (mount), and in case of fail | ||
118 | * just exit instead of doing mount and attempting | ||
119 | * undo it if this copy fails?*/ | ||
120 | len = strlen(data); | ||
121 | cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL); | ||
122 | if (cifs_sb->mountdata == NULL) { | ||
123 | kfree(sb->s_fs_info); | ||
124 | sb->s_fs_info = NULL; | ||
125 | return -ENOMEM; | ||
126 | } | ||
127 | strncpy(cifs_sb->mountdata, data, len + 1); | ||
128 | cifs_sb->mountdata[len] = '\0'; | ||
129 | #endif | ||
130 | |||
108 | rc = cifs_mount(sb, cifs_sb, data, devname); | 131 | rc = cifs_mount(sb, cifs_sb, data, devname); |
109 | 132 | ||
110 | if (rc) { | 133 | if (rc) { |
@@ -154,6 +177,12 @@ out_no_root: | |||
154 | 177 | ||
155 | out_mount_failed: | 178 | out_mount_failed: |
156 | if (cifs_sb) { | 179 | if (cifs_sb) { |
180 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
181 | if (cifs_sb->mountdata) { | ||
182 | kfree(cifs_sb->mountdata); | ||
183 | cifs_sb->mountdata = NULL; | ||
184 | } | ||
185 | #endif | ||
157 | if (cifs_sb->local_nls) | 186 | if (cifs_sb->local_nls) |
158 | unload_nls(cifs_sb->local_nls); | 187 | unload_nls(cifs_sb->local_nls); |
159 | kfree(cifs_sb); | 188 | kfree(cifs_sb); |
@@ -177,6 +206,13 @@ cifs_put_super(struct super_block *sb) | |||
177 | if (rc) { | 206 | if (rc) { |
178 | cERROR(1, ("cifs_umount failed with return code %d", rc)); | 207 | cERROR(1, ("cifs_umount failed with return code %d", rc)); |
179 | } | 208 | } |
209 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
210 | if (cifs_sb->mountdata) { | ||
211 | kfree(cifs_sb->mountdata); | ||
212 | cifs_sb->mountdata = NULL; | ||
213 | } | ||
214 | #endif | ||
215 | |||
180 | unload_nls(cifs_sb->local_nls); | 216 | unload_nls(cifs_sb->local_nls); |
181 | kfree(cifs_sb); | 217 | kfree(cifs_sb); |
182 | return; | 218 | return; |
@@ -435,6 +471,10 @@ static void cifs_umount_begin(struct vfsmount *vfsmnt, int flags) | |||
435 | struct cifs_sb_info *cifs_sb; | 471 | struct cifs_sb_info *cifs_sb; |
436 | struct cifsTconInfo *tcon; | 472 | struct cifsTconInfo *tcon; |
437 | 473 | ||
474 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
475 | dfs_shrink_umount_helper(vfsmnt); | ||
476 | #endif /* CONFIG CIFS_DFS_UPCALL */ | ||
477 | |||
438 | if (!(flags & MNT_FORCE)) | 478 | if (!(flags & MNT_FORCE)) |
439 | return; | 479 | return; |
440 | cifs_sb = CIFS_SB(vfsmnt->mnt_sb); | 480 | cifs_sb = CIFS_SB(vfsmnt->mnt_sb); |
@@ -552,7 +592,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin) | |||
552 | return remote_llseek(file, offset, origin); | 592 | return remote_llseek(file, offset, origin); |
553 | } | 593 | } |
554 | 594 | ||
555 | static struct file_system_type cifs_fs_type = { | 595 | struct file_system_type cifs_fs_type = { |
556 | .owner = THIS_MODULE, | 596 | .owner = THIS_MODULE, |
557 | .name = "cifs", | 597 | .name = "cifs", |
558 | .get_sb = cifs_get_sb, | 598 | .get_sb = cifs_get_sb, |
@@ -1015,11 +1055,16 @@ init_cifs(void) | |||
1015 | if (rc) | 1055 | if (rc) |
1016 | goto out_unregister_filesystem; | 1056 | goto out_unregister_filesystem; |
1017 | #endif | 1057 | #endif |
1058 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
1059 | rc = register_key_type(&key_type_dns_resolver); | ||
1060 | if (rc) | ||
1061 | goto out_unregister_key_type; | ||
1062 | #endif | ||
1018 | oplockThread = kthread_run(cifs_oplock_thread, NULL, "cifsoplockd"); | 1063 | oplockThread = kthread_run(cifs_oplock_thread, NULL, "cifsoplockd"); |
1019 | if (IS_ERR(oplockThread)) { | 1064 | if (IS_ERR(oplockThread)) { |
1020 | rc = PTR_ERR(oplockThread); | 1065 | rc = PTR_ERR(oplockThread); |
1021 | cERROR(1, ("error %d create oplock thread", rc)); | 1066 | cERROR(1, ("error %d create oplock thread", rc)); |
1022 | goto out_unregister_key_type; | 1067 | goto out_unregister_dfs_key_type; |
1023 | } | 1068 | } |
1024 | 1069 | ||
1025 | dnotifyThread = kthread_run(cifs_dnotify_thread, NULL, "cifsdnotifyd"); | 1070 | dnotifyThread = kthread_run(cifs_dnotify_thread, NULL, "cifsdnotifyd"); |
@@ -1033,7 +1078,11 @@ init_cifs(void) | |||
1033 | 1078 | ||
1034 | out_stop_oplock_thread: | 1079 | out_stop_oplock_thread: |
1035 | kthread_stop(oplockThread); | 1080 | kthread_stop(oplockThread); |
1081 | out_unregister_dfs_key_type: | ||
1082 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
1083 | unregister_key_type(&key_type_dns_resolver); | ||
1036 | out_unregister_key_type: | 1084 | out_unregister_key_type: |
1085 | #endif | ||
1037 | #ifdef CONFIG_CIFS_UPCALL | 1086 | #ifdef CONFIG_CIFS_UPCALL |
1038 | unregister_key_type(&cifs_spnego_key_type); | 1087 | unregister_key_type(&cifs_spnego_key_type); |
1039 | out_unregister_filesystem: | 1088 | out_unregister_filesystem: |
@@ -1059,6 +1108,9 @@ exit_cifs(void) | |||
1059 | #ifdef CONFIG_PROC_FS | 1108 | #ifdef CONFIG_PROC_FS |
1060 | cifs_proc_clean(); | 1109 | cifs_proc_clean(); |
1061 | #endif | 1110 | #endif |
1111 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
1112 | unregister_key_type(&key_type_dns_resolver); | ||
1113 | #endif | ||
1062 | #ifdef CONFIG_CIFS_UPCALL | 1114 | #ifdef CONFIG_CIFS_UPCALL |
1063 | unregister_key_type(&cifs_spnego_key_type); | 1115 | unregister_key_type(&cifs_spnego_key_type); |
1064 | #endif | 1116 | #endif |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 2a21dc66f0de..195b14de5567 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -32,6 +32,7 @@ | |||
32 | #define TRUE 1 | 32 | #define TRUE 1 |
33 | #endif | 33 | #endif |
34 | 34 | ||
35 | extern struct file_system_type cifs_fs_type; | ||
35 | extern const struct address_space_operations cifs_addr_ops; | 36 | extern const struct address_space_operations cifs_addr_ops; |
36 | extern const struct address_space_operations cifs_addr_ops_smallbuf; | 37 | extern const struct address_space_operations cifs_addr_ops_smallbuf; |
37 | 38 | ||
@@ -60,6 +61,10 @@ extern int cifs_setattr(struct dentry *, struct iattr *); | |||
60 | 61 | ||
61 | extern const struct inode_operations cifs_file_inode_ops; | 62 | extern const struct inode_operations cifs_file_inode_ops; |
62 | extern const struct inode_operations cifs_symlink_inode_ops; | 63 | extern const struct inode_operations cifs_symlink_inode_ops; |
64 | extern struct list_head cifs_dfs_automount_list; | ||
65 | extern struct inode_operations cifs_dfs_referral_inode_operations; | ||
66 | |||
67 | |||
63 | 68 | ||
64 | /* Functions related to files and directories */ | 69 | /* Functions related to files and directories */ |
65 | extern const struct file_operations cifs_file_ops; | 70 | extern const struct file_operations cifs_file_ops; |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 1fde2197ad76..5d32d8ddc82e 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * fs/cifs/cifsglob.h | 2 | * fs/cifs/cifsglob.h |
3 | * | 3 | * |
4 | * Copyright (C) International Business Machines Corp., 2002,2007 | 4 | * Copyright (C) International Business Machines Corp., 2002,2008 |
5 | * Author(s): Steve French (sfrench@us.ibm.com) | 5 | * Author(s): Steve French (sfrench@us.ibm.com) |
6 | * Jeremy Allison (jra@samba.org) | 6 | * Jeremy Allison (jra@samba.org) |
7 | * | 7 | * |
@@ -70,14 +70,6 @@ | |||
70 | #endif | 70 | #endif |
71 | 71 | ||
72 | /* | 72 | /* |
73 | * This information is kept on every Server we know about. | ||
74 | * | ||
75 | * Some things to note: | ||
76 | * | ||
77 | */ | ||
78 | #define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1) | ||
79 | |||
80 | /* | ||
81 | * CIFS vfs client Status information (based on what we know.) | 73 | * CIFS vfs client Status information (based on what we know.) |
82 | */ | 74 | */ |
83 | 75 | ||
@@ -460,6 +452,37 @@ struct dir_notify_req { | |||
460 | struct file *pfile; | 452 | struct file *pfile; |
461 | }; | 453 | }; |
462 | 454 | ||
455 | struct dfs_info3_param { | ||
456 | int flags; /* DFSREF_REFERRAL_SERVER, DFSREF_STORAGE_SERVER*/ | ||
457 | int PathConsumed; | ||
458 | int server_type; | ||
459 | int ref_flag; | ||
460 | char *path_name; | ||
461 | char *node_name; | ||
462 | }; | ||
463 | |||
464 | static inline void free_dfs_info_param(struct dfs_info3_param *param) | ||
465 | { | ||
466 | if (param) { | ||
467 | kfree(param->path_name); | ||
468 | kfree(param->node_name); | ||
469 | kfree(param); | ||
470 | } | ||
471 | } | ||
472 | |||
473 | static inline void free_dfs_info_array(struct dfs_info3_param *param, | ||
474 | int number_of_items) | ||
475 | { | ||
476 | int i; | ||
477 | if ((number_of_items == 0) || (param == NULL)) | ||
478 | return; | ||
479 | for (i = 0; i < number_of_items; i++) { | ||
480 | kfree(param[i].path_name); | ||
481 | kfree(param[i].node_name); | ||
482 | } | ||
483 | kfree(param); | ||
484 | } | ||
485 | |||
463 | #define MID_FREE 0 | 486 | #define MID_FREE 0 |
464 | #define MID_REQUEST_ALLOCATED 1 | 487 | #define MID_REQUEST_ALLOCATED 1 |
465 | #define MID_REQUEST_SUBMITTED 2 | 488 | #define MID_REQUEST_SUBMITTED 2 |
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index dbe6b846f37f..47f79504f57b 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h | |||
@@ -237,6 +237,9 @@ | |||
237 | | DELETE | READ_CONTROL | WRITE_DAC \ | 237 | | DELETE | READ_CONTROL | WRITE_DAC \ |
238 | | WRITE_OWNER | SYNCHRONIZE) | 238 | | WRITE_OWNER | SYNCHRONIZE) |
239 | 239 | ||
240 | #define SET_MINIMUM_RIGHTS (FILE_READ_EA | FILE_READ_ATTRIBUTES \ | ||
241 | | READ_CONTROL | SYNCHRONIZE) | ||
242 | |||
240 | 243 | ||
241 | /* | 244 | /* |
242 | * Invalid readdir handle | 245 | * Invalid readdir handle |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 8350eec49663..2f09f565a3d9 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * fs/cifs/cifsproto.h | 2 | * fs/cifs/cifsproto.h |
3 | * | 3 | * |
4 | * Copyright (c) International Business Machines Corp., 2002,2007 | 4 | * Copyright (c) International Business Machines Corp., 2002,2008 |
5 | * Author(s): Steve French (sfrench@us.ibm.com) | 5 | * Author(s): Steve French (sfrench@us.ibm.com) |
6 | * | 6 | * |
7 | * This library is free software; you can redistribute it and/or modify | 7 | * This library is free software; you can redistribute it and/or modify |
@@ -97,11 +97,14 @@ extern int cifs_get_inode_info_unix(struct inode **pinode, | |||
97 | const unsigned char *search_path, | 97 | const unsigned char *search_path, |
98 | struct super_block *sb, int xid); | 98 | struct super_block *sb, int xid); |
99 | extern void acl_to_uid_mode(struct inode *inode, const char *search_path); | 99 | extern void acl_to_uid_mode(struct inode *inode, const char *search_path); |
100 | extern int mode_to_acl(struct inode *inode, const char *path); | 100 | extern int mode_to_acl(struct inode *inode, const char *path, __u64); |
101 | 101 | ||
102 | extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, | 102 | extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, |
103 | const char *); | 103 | const char *); |
104 | extern int cifs_umount(struct super_block *, struct cifs_sb_info *); | 104 | extern int cifs_umount(struct super_block *, struct cifs_sb_info *); |
105 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
106 | extern void dfs_shrink_umount_helper(struct vfsmount *vfsmnt); | ||
107 | #endif | ||
105 | void cifs_proc_init(void); | 108 | void cifs_proc_init(void); |
106 | void cifs_proc_clean(void); | 109 | void cifs_proc_clean(void); |
107 | 110 | ||
@@ -153,7 +156,7 @@ extern int get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, | |||
153 | const char *old_path, | 156 | const char *old_path, |
154 | const struct nls_table *nls_codepage, | 157 | const struct nls_table *nls_codepage, |
155 | unsigned int *pnum_referrals, | 158 | unsigned int *pnum_referrals, |
156 | unsigned char **preferrals, | 159 | struct dfs_info3_param **preferrals, |
157 | int remap); | 160 | int remap); |
158 | extern void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon, | 161 | extern void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon, |
159 | struct super_block *sb, struct smb_vol *vol); | 162 | struct super_block *sb, struct smb_vol *vol); |
@@ -342,6 +345,8 @@ extern int CIFSSMBSetEA(const int xid, struct cifsTconInfo *tcon, | |||
342 | const struct nls_table *nls_codepage, int remap_special_chars); | 345 | const struct nls_table *nls_codepage, int remap_special_chars); |
343 | extern int CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, | 346 | extern int CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, |
344 | __u16 fid, struct cifs_ntsd **acl_inf, __u32 *buflen); | 347 | __u16 fid, struct cifs_ntsd **acl_inf, __u32 *buflen); |
348 | extern int CIFSSMBSetCIFSACL(const int, struct cifsTconInfo *, __u16, | ||
349 | struct cifs_ntsd *, __u32); | ||
345 | extern int CIFSSMBGetPosixACL(const int xid, struct cifsTconInfo *tcon, | 350 | extern int CIFSSMBGetPosixACL(const int xid, struct cifsTconInfo *tcon, |
346 | const unsigned char *searchName, | 351 | const unsigned char *searchName, |
347 | char *acl_inf, const int buflen, const int acl_type, | 352 | char *acl_inf, const int buflen, const int acl_type, |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 9e8a6bef029a..9409524e4bf8 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -3156,6 +3156,71 @@ qsec_out: | |||
3156 | /* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ | 3156 | /* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ |
3157 | return rc; | 3157 | return rc; |
3158 | } | 3158 | } |
3159 | |||
3160 | int | ||
3161 | CIFSSMBSetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid, | ||
3162 | struct cifs_ntsd *pntsd, __u32 acllen) | ||
3163 | { | ||
3164 | __u16 byte_count, param_count, data_count, param_offset, data_offset; | ||
3165 | int rc = 0; | ||
3166 | int bytes_returned = 0; | ||
3167 | SET_SEC_DESC_REQ *pSMB = NULL; | ||
3168 | NTRANSACT_RSP *pSMBr = NULL; | ||
3169 | |||
3170 | setCifsAclRetry: | ||
3171 | rc = smb_init(SMB_COM_NT_TRANSACT, 19, tcon, (void **) &pSMB, | ||
3172 | (void **) &pSMBr); | ||
3173 | if (rc) | ||
3174 | return (rc); | ||
3175 | |||
3176 | pSMB->MaxSetupCount = 0; | ||
3177 | pSMB->Reserved = 0; | ||
3178 | |||
3179 | param_count = 8; | ||
3180 | param_offset = offsetof(struct smb_com_transaction_ssec_req, Fid) - 4; | ||
3181 | data_count = acllen; | ||
3182 | data_offset = param_offset + param_count; | ||
3183 | byte_count = 3 /* pad */ + param_count; | ||
3184 | |||
3185 | pSMB->DataCount = cpu_to_le32(data_count); | ||
3186 | pSMB->TotalDataCount = pSMB->DataCount; | ||
3187 | pSMB->MaxParameterCount = cpu_to_le32(4); | ||
3188 | pSMB->MaxDataCount = cpu_to_le32(16384); | ||
3189 | pSMB->ParameterCount = cpu_to_le32(param_count); | ||
3190 | pSMB->ParameterOffset = cpu_to_le32(param_offset); | ||
3191 | pSMB->TotalParameterCount = pSMB->ParameterCount; | ||
3192 | pSMB->DataOffset = cpu_to_le32(data_offset); | ||
3193 | pSMB->SetupCount = 0; | ||
3194 | pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_SET_SECURITY_DESC); | ||
3195 | pSMB->ByteCount = cpu_to_le16(byte_count+data_count); | ||
3196 | |||
3197 | pSMB->Fid = fid; /* file handle always le */ | ||
3198 | pSMB->Reserved2 = 0; | ||
3199 | pSMB->AclFlags = cpu_to_le32(CIFS_ACL_DACL); | ||
3200 | |||
3201 | if (pntsd && acllen) { | ||
3202 | memcpy((char *) &pSMBr->hdr.Protocol + data_offset, | ||
3203 | (char *) pntsd, | ||
3204 | acllen); | ||
3205 | pSMB->hdr.smb_buf_length += (byte_count + data_count); | ||
3206 | |||
3207 | } else | ||
3208 | pSMB->hdr.smb_buf_length += byte_count; | ||
3209 | |||
3210 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, | ||
3211 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); | ||
3212 | |||
3213 | cFYI(1, ("SetCIFSACL bytes_returned: %d, rc: %d", bytes_returned, rc)); | ||
3214 | if (rc) | ||
3215 | cFYI(1, ("Set CIFS ACL returned %d", rc)); | ||
3216 | cifs_buf_release(pSMB); | ||
3217 | |||
3218 | if (rc == -EAGAIN) | ||
3219 | goto setCifsAclRetry; | ||
3220 | |||
3221 | return (rc); | ||
3222 | } | ||
3223 | |||
3159 | #endif /* CONFIG_CIFS_EXPERIMENTAL */ | 3224 | #endif /* CONFIG_CIFS_EXPERIMENTAL */ |
3160 | 3225 | ||
3161 | /* Legacy Query Path Information call for lookup to old servers such | 3226 | /* Legacy Query Path Information call for lookup to old servers such |
@@ -5499,7 +5564,7 @@ SetEARetry: | |||
5499 | else | 5564 | else |
5500 | name_len = strnlen(ea_name, 255); | 5565 | name_len = strnlen(ea_name, 255); |
5501 | 5566 | ||
5502 | count = sizeof(*parm_data) + ea_value_len + name_len + 1; | 5567 | count = sizeof(*parm_data) + ea_value_len + name_len; |
5503 | pSMB->MaxParameterCount = cpu_to_le16(2); | 5568 | pSMB->MaxParameterCount = cpu_to_le16(2); |
5504 | pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB size from sess */ | 5569 | pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB size from sess */ |
5505 | pSMB->MaxSetupCount = 0; | 5570 | pSMB->MaxSetupCount = 0; |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index fd9147cdb5a9..65d0ba72e78f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * fs/cifs/connect.c | 2 | * fs/cifs/connect.c |
3 | * | 3 | * |
4 | * Copyright (C) International Business Machines Corp., 2002,2007 | 4 | * Copyright (C) International Business Machines Corp., 2002,2008 |
5 | * Author(s): Steve French (sfrench@us.ibm.com) | 5 | * Author(s): Steve French (sfrench@us.ibm.com) |
6 | * | 6 | * |
7 | * This library is free software; you can redistribute it and/or modify | 7 | * This library is free software; you can redistribute it and/or modify |
@@ -1410,7 +1410,7 @@ connect_to_dfs_path(int xid, struct cifsSesInfo *pSesInfo, | |||
1410 | const char *old_path, const struct nls_table *nls_codepage, | 1410 | const char *old_path, const struct nls_table *nls_codepage, |
1411 | int remap) | 1411 | int remap) |
1412 | { | 1412 | { |
1413 | unsigned char *referrals = NULL; | 1413 | struct dfs_info3_param *referrals = NULL; |
1414 | unsigned int num_referrals; | 1414 | unsigned int num_referrals; |
1415 | int rc = 0; | 1415 | int rc = 0; |
1416 | 1416 | ||
@@ -1429,12 +1429,14 @@ connect_to_dfs_path(int xid, struct cifsSesInfo *pSesInfo, | |||
1429 | int | 1429 | int |
1430 | get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path, | 1430 | get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path, |
1431 | const struct nls_table *nls_codepage, unsigned int *pnum_referrals, | 1431 | const struct nls_table *nls_codepage, unsigned int *pnum_referrals, |
1432 | unsigned char **preferrals, int remap) | 1432 | struct dfs_info3_param **preferrals, int remap) |
1433 | { | 1433 | { |
1434 | char *temp_unc; | 1434 | char *temp_unc; |
1435 | int rc = 0; | 1435 | int rc = 0; |
1436 | unsigned char *targetUNCs; | ||
1436 | 1437 | ||
1437 | *pnum_referrals = 0; | 1438 | *pnum_referrals = 0; |
1439 | *preferrals = NULL; | ||
1438 | 1440 | ||
1439 | if (pSesInfo->ipc_tid == 0) { | 1441 | if (pSesInfo->ipc_tid == 0) { |
1440 | temp_unc = kmalloc(2 /* for slashes */ + | 1442 | temp_unc = kmalloc(2 /* for slashes */ + |
@@ -1454,8 +1456,10 @@ get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path, | |||
1454 | kfree(temp_unc); | 1456 | kfree(temp_unc); |
1455 | } | 1457 | } |
1456 | if (rc == 0) | 1458 | if (rc == 0) |
1457 | rc = CIFSGetDFSRefer(xid, pSesInfo, old_path, preferrals, | 1459 | rc = CIFSGetDFSRefer(xid, pSesInfo, old_path, &targetUNCs, |
1458 | pnum_referrals, nls_codepage, remap); | 1460 | pnum_referrals, nls_codepage, remap); |
1461 | /* BB map targetUNCs to dfs_info3 structures, here or | ||
1462 | in CIFSGetDFSRefer BB */ | ||
1459 | 1463 | ||
1460 | return rc; | 1464 | return rc; |
1461 | } | 1465 | } |
@@ -1964,7 +1968,15 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, | |||
1964 | 1968 | ||
1965 | if (existingCifsSes) { | 1969 | if (existingCifsSes) { |
1966 | pSesInfo = existingCifsSes; | 1970 | pSesInfo = existingCifsSes; |
1967 | cFYI(1, ("Existing smb sess found")); | 1971 | cFYI(1, ("Existing smb sess found (status=%d)", |
1972 | pSesInfo->status)); | ||
1973 | down(&pSesInfo->sesSem); | ||
1974 | if (pSesInfo->status == CifsNeedReconnect) { | ||
1975 | cFYI(1, ("Session needs reconnect")); | ||
1976 | rc = cifs_setup_session(xid, pSesInfo, | ||
1977 | cifs_sb->local_nls); | ||
1978 | } | ||
1979 | up(&pSesInfo->sesSem); | ||
1968 | } else if (!rc) { | 1980 | } else if (!rc) { |
1969 | cFYI(1, ("Existing smb sess not found")); | 1981 | cFYI(1, ("Existing smb sess not found")); |
1970 | pSesInfo = sesInfoAlloc(); | 1982 | pSesInfo = sesInfoAlloc(); |
@@ -3514,7 +3526,7 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) | |||
3514 | sesInfoFree(ses); | 3526 | sesInfoFree(ses); |
3515 | 3527 | ||
3516 | FreeXid(xid); | 3528 | FreeXid(xid); |
3517 | return rc; /* BB check if we should always return zero here */ | 3529 | return rc; |
3518 | } | 3530 | } |
3519 | 3531 | ||
3520 | int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, | 3532 | int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 37dc97af1487..699ec1198409 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -517,12 +517,10 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, | |||
517 | d_add(direntry, NULL); | 517 | d_add(direntry, NULL); |
518 | /* if it was once a directory (but how can we tell?) we could do | 518 | /* if it was once a directory (but how can we tell?) we could do |
519 | shrink_dcache_parent(direntry); */ | 519 | shrink_dcache_parent(direntry); */ |
520 | } else { | 520 | } else if (rc != -EACCES) { |
521 | cERROR(1, ("Error 0x%x on cifs_get_inode_info in lookup of %s", | 521 | cERROR(1, ("Unexpected lookup error %d", rc)); |
522 | rc, full_path)); | 522 | /* We special case check for Access Denied - since that |
523 | /* BB special case check for Access Denied - watch security | 523 | is a common return code */ |
524 | exposure of returning dir info implicitly via different rc | ||
525 | if file exists or not but no access BB */ | ||
526 | } | 524 | } |
527 | 525 | ||
528 | kfree(full_path); | 526 | kfree(full_path); |
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c new file mode 100644 index 000000000000..ef7f43824347 --- /dev/null +++ b/fs/cifs/dns_resolve.c | |||
@@ -0,0 +1,124 @@ | |||
1 | /* | ||
2 | * fs/cifs/dns_resolve.c | ||
3 | * | ||
4 | * Copyright (c) 2007 Igor Mammedov | ||
5 | * Author(s): Igor Mammedov (niallain@gmail.com) | ||
6 | * Steve French (sfrench@us.ibm.com) | ||
7 | * | ||
8 | * Contains the CIFS DFS upcall routines used for hostname to | ||
9 | * IP address translation. | ||
10 | * | ||
11 | * This library is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU Lesser General Public License as published | ||
13 | * by the Free Software Foundation; either version 2.1 of the License, or | ||
14 | * (at your option) any later version. | ||
15 | * | ||
16 | * This library is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
19 | * the GNU Lesser General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU Lesser General Public License | ||
22 | * along with this library; if not, write to the Free Software | ||
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
24 | */ | ||
25 | |||
26 | #include <keys/user-type.h> | ||
27 | #include "dns_resolve.h" | ||
28 | #include "cifsglob.h" | ||
29 | #include "cifsproto.h" | ||
30 | #include "cifs_debug.h" | ||
31 | |||
32 | static int dns_resolver_instantiate(struct key *key, const void *data, | ||
33 | size_t datalen) | ||
34 | { | ||
35 | int rc = 0; | ||
36 | char *ip; | ||
37 | |||
38 | ip = kmalloc(datalen+1, GFP_KERNEL); | ||
39 | if (!ip) | ||
40 | return -ENOMEM; | ||
41 | |||
42 | memcpy(ip, data, datalen); | ||
43 | ip[datalen] = '\0'; | ||
44 | |||
45 | rcu_assign_pointer(key->payload.data, ip); | ||
46 | |||
47 | return rc; | ||
48 | } | ||
49 | |||
50 | struct key_type key_type_dns_resolver = { | ||
51 | .name = "dns_resolver", | ||
52 | .def_datalen = sizeof(struct in_addr), | ||
53 | .describe = user_describe, | ||
54 | .instantiate = dns_resolver_instantiate, | ||
55 | .match = user_match, | ||
56 | }; | ||
57 | |||
58 | |||
59 | /* Resolves server name to ip address. | ||
60 | * input: | ||
61 | * unc - server UNC | ||
62 | * output: | ||
63 | * *ip_addr - pointer to server ip, caller responcible for freeing it. | ||
64 | * return 0 on success | ||
65 | */ | ||
66 | int | ||
67 | dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) | ||
68 | { | ||
69 | int rc = -EAGAIN; | ||
70 | struct key *rkey; | ||
71 | char *name; | ||
72 | int len; | ||
73 | |||
74 | if (!ip_addr || !unc) | ||
75 | return -EINVAL; | ||
76 | |||
77 | /* search for server name delimiter */ | ||
78 | len = strlen(unc); | ||
79 | if (len < 3) { | ||
80 | cFYI(1, ("%s: unc is too short: %s", __FUNCTION__, unc)); | ||
81 | return -EINVAL; | ||
82 | } | ||
83 | len -= 2; | ||
84 | name = memchr(unc+2, '\\', len); | ||
85 | if (!name) { | ||
86 | cFYI(1, ("%s: probably server name is whole unc: %s", | ||
87 | __FUNCTION__, unc)); | ||
88 | } else { | ||
89 | len = (name - unc) - 2/* leading // */; | ||
90 | } | ||
91 | |||
92 | name = kmalloc(len+1, GFP_KERNEL); | ||
93 | if (!name) { | ||
94 | rc = -ENOMEM; | ||
95 | return rc; | ||
96 | } | ||
97 | memcpy(name, unc+2, len); | ||
98 | name[len] = 0; | ||
99 | |||
100 | rkey = request_key(&key_type_dns_resolver, name, ""); | ||
101 | if (!IS_ERR(rkey)) { | ||
102 | len = strlen(rkey->payload.data); | ||
103 | *ip_addr = kmalloc(len+1, GFP_KERNEL); | ||
104 | if (*ip_addr) { | ||
105 | memcpy(*ip_addr, rkey->payload.data, len); | ||
106 | (*ip_addr)[len] = '\0'; | ||
107 | cFYI(1, ("%s: resolved: %s to %s", __FUNCTION__, | ||
108 | rkey->description, | ||
109 | *ip_addr | ||
110 | )); | ||
111 | rc = 0; | ||
112 | } else { | ||
113 | rc = -ENOMEM; | ||
114 | } | ||
115 | key_put(rkey); | ||
116 | } else { | ||
117 | cERROR(1, ("%s: unable to resolve: %s", __FUNCTION__, name)); | ||
118 | } | ||
119 | |||
120 | kfree(name); | ||
121 | return rc; | ||
122 | } | ||
123 | |||
124 | |||
diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h new file mode 100644 index 000000000000..073fdc3db419 --- /dev/null +++ b/fs/cifs/dns_resolve.h | |||
@@ -0,0 +1,32 @@ | |||
1 | /* | ||
2 | * fs/cifs/dns_resolve.h -- DNS Resolver upcall management for CIFS DFS | ||
3 | * Handles host name to IP address resolution | ||
4 | * | ||
5 | * Copyright (c) International Business Machines Corp., 2008 | ||
6 | * Author(s): Steve French (sfrench@us.ibm.com) | ||
7 | * | ||
8 | * This library is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU Lesser General Public License as published | ||
10 | * by the Free Software Foundation; either version 2.1 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This library is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
16 | * the GNU Lesser General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU Lesser General Public License | ||
19 | * along with this library; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #ifndef _DNS_RESOLVE_H | ||
24 | #define _DNS_RESOLVE_H | ||
25 | |||
26 | #ifdef __KERNEL__ | ||
27 | #include <linux/key-type.h> | ||
28 | extern struct key_type key_type_dns_resolver; | ||
29 | extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr); | ||
30 | #endif /* KERNEL */ | ||
31 | |||
32 | #endif /* _DNS_RESOLVE_H */ | ||
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index dd26e2759b17..5f7c374ae89c 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -1179,12 +1179,10 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) | |||
1179 | atomic_dec(&open_file->wrtPending); | 1179 | atomic_dec(&open_file->wrtPending); |
1180 | /* Does mm or vfs already set times? */ | 1180 | /* Does mm or vfs already set times? */ |
1181 | inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb); | 1181 | inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb); |
1182 | if ((bytes_written > 0) && (offset)) { | 1182 | if ((bytes_written > 0) && (offset)) |
1183 | rc = 0; | 1183 | rc = 0; |
1184 | } else if (bytes_written < 0) { | 1184 | else if (bytes_written < 0) |
1185 | if (rc != -EBADF) | 1185 | rc = bytes_written; |
1186 | rc = bytes_written; | ||
1187 | } | ||
1188 | } else { | 1186 | } else { |
1189 | cFYI(1, ("No writeable filehandles for inode")); | 1187 | cFYI(1, ("No writeable filehandles for inode")); |
1190 | rc = -EIO; | 1188 | rc = -EIO; |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index e915eb1d2e66..d9567ba2960b 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -54,9 +54,9 @@ int cifs_get_inode_info_unix(struct inode **pinode, | |||
54 | MAX_TREE_SIZE + 1) + | 54 | MAX_TREE_SIZE + 1) + |
55 | strnlen(search_path, MAX_PATHCONF) + 1, | 55 | strnlen(search_path, MAX_PATHCONF) + 1, |
56 | GFP_KERNEL); | 56 | GFP_KERNEL); |
57 | if (tmp_path == NULL) { | 57 | if (tmp_path == NULL) |
58 | return -ENOMEM; | 58 | return -ENOMEM; |
59 | } | 59 | |
60 | /* have to skip first of the double backslash of | 60 | /* have to skip first of the double backslash of |
61 | UNC name */ | 61 | UNC name */ |
62 | strncpy(tmp_path, pTcon->treeName, MAX_TREE_SIZE); | 62 | strncpy(tmp_path, pTcon->treeName, MAX_TREE_SIZE); |
@@ -511,7 +511,8 @@ int cifs_get_inode_info(struct inode **pinode, | |||
511 | } | 511 | } |
512 | 512 | ||
513 | spin_lock(&inode->i_lock); | 513 | spin_lock(&inode->i_lock); |
514 | if (is_size_safe_to_change(cifsInfo, le64_to_cpu(pfindData->EndOfFile))) { | 514 | if (is_size_safe_to_change(cifsInfo, |
515 | le64_to_cpu(pfindData->EndOfFile))) { | ||
515 | /* can not safely shrink the file size here if the | 516 | /* can not safely shrink the file size here if the |
516 | client is writing to it due to potential races */ | 517 | client is writing to it due to potential races */ |
517 | i_size_write(inode, le64_to_cpu(pfindData->EndOfFile)); | 518 | i_size_write(inode, le64_to_cpu(pfindData->EndOfFile)); |
@@ -931,7 +932,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) | |||
931 | (CIFS_UNIX_POSIX_PATH_OPS_CAP & | 932 | (CIFS_UNIX_POSIX_PATH_OPS_CAP & |
932 | le64_to_cpu(pTcon->fsUnixInfo.Capability))) { | 933 | le64_to_cpu(pTcon->fsUnixInfo.Capability))) { |
933 | u32 oplock = 0; | 934 | u32 oplock = 0; |
934 | FILE_UNIX_BASIC_INFO * pInfo = | 935 | FILE_UNIX_BASIC_INFO *pInfo = |
935 | kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); | 936 | kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); |
936 | if (pInfo == NULL) { | 937 | if (pInfo == NULL) { |
937 | rc = -ENOMEM; | 938 | rc = -ENOMEM; |
@@ -1607,7 +1608,14 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) | |||
1607 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1608 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
1608 | else if (attrs->ia_valid & ATTR_MODE) { | 1609 | else if (attrs->ia_valid & ATTR_MODE) { |
1609 | rc = 0; | 1610 | rc = 0; |
1610 | if ((mode & S_IWUGO) == 0) /* not writeable */ { | 1611 | #ifdef CONFIG_CIFS_EXPERIMENTAL |
1612 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) | ||
1613 | rc = mode_to_acl(direntry->d_inode, full_path, mode); | ||
1614 | else if ((mode & S_IWUGO) == 0) { | ||
1615 | #else | ||
1616 | if ((mode & S_IWUGO) == 0) { | ||
1617 | #endif | ||
1618 | /* not writeable */ | ||
1611 | if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0) { | 1619 | if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0) { |
1612 | set_dosattr = TRUE; | 1620 | set_dosattr = TRUE; |
1613 | time_buf.Attributes = | 1621 | time_buf.Attributes = |
@@ -1626,10 +1634,10 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) | |||
1626 | if (time_buf.Attributes == 0) | 1634 | if (time_buf.Attributes == 0) |
1627 | time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL); | 1635 | time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL); |
1628 | } | 1636 | } |
1629 | /* BB to be implemented - | 1637 | #ifdef CONFIG_CIFS_EXPERIMENTAL |
1630 | via Windows security descriptors or streams */ | 1638 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) |
1631 | /* CIFSSMBWinSetPerms(xid, pTcon, full_path, mode, uid, gid, | 1639 | mode_to_acl(direntry->d_inode, full_path, mode); |
1632 | cifs_sb->local_nls); */ | 1640 | #endif |
1633 | } | 1641 | } |
1634 | 1642 | ||
1635 | if (attrs->ia_valid & ATTR_ATIME) { | 1643 | if (attrs->ia_valid & ATTR_ATIME) { |
diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 11f265726db7..1d6fb01b8e6d 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * fs/cifs/link.c | 2 | * fs/cifs/link.c |
3 | * | 3 | * |
4 | * Copyright (C) International Business Machines Corp., 2002,2003 | 4 | * Copyright (C) International Business Machines Corp., 2002,2008 |
5 | * Author(s): Steve French (sfrench@us.ibm.com) | 5 | * Author(s): Steve French (sfrench@us.ibm.com) |
6 | * | 6 | * |
7 | * This library is free software; you can redistribute it and/or modify | 7 | * This library is free software; you can redistribute it and/or modify |
@@ -236,8 +236,6 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen) | |||
236 | char *full_path = NULL; | 236 | char *full_path = NULL; |
237 | char *tmp_path = NULL; | 237 | char *tmp_path = NULL; |
238 | char *tmpbuffer; | 238 | char *tmpbuffer; |
239 | unsigned char *referrals = NULL; | ||
240 | unsigned int num_referrals = 0; | ||
241 | int len; | 239 | int len; |
242 | __u16 fid; | 240 | __u16 fid; |
243 | 241 | ||
@@ -297,8 +295,11 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen) | |||
297 | cFYI(1, ("Error closing junction point " | 295 | cFYI(1, ("Error closing junction point " |
298 | "(open for ioctl)")); | 296 | "(open for ioctl)")); |
299 | } | 297 | } |
298 | /* BB unwind this long, nested function, or remove BB */ | ||
300 | if (rc == -EIO) { | 299 | if (rc == -EIO) { |
301 | /* Query if DFS Junction */ | 300 | /* Query if DFS Junction */ |
301 | unsigned int num_referrals = 0; | ||
302 | struct dfs_info3_param *refs = NULL; | ||
302 | tmp_path = | 303 | tmp_path = |
303 | kmalloc(MAX_TREE_SIZE + MAX_PATHCONF + 1, | 304 | kmalloc(MAX_TREE_SIZE + MAX_PATHCONF + 1, |
304 | GFP_KERNEL); | 305 | GFP_KERNEL); |
@@ -310,7 +311,7 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen) | |||
310 | rc = get_dfs_path(xid, pTcon->ses, | 311 | rc = get_dfs_path(xid, pTcon->ses, |
311 | tmp_path, | 312 | tmp_path, |
312 | cifs_sb->local_nls, | 313 | cifs_sb->local_nls, |
313 | &num_referrals, &referrals, | 314 | &num_referrals, &refs, |
314 | cifs_sb->mnt_cifs_flags & | 315 | cifs_sb->mnt_cifs_flags & |
315 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 316 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
316 | cFYI(1, ("Get DFS for %s rc = %d ", | 317 | cFYI(1, ("Get DFS for %s rc = %d ", |
@@ -320,14 +321,13 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen) | |||
320 | else { | 321 | else { |
321 | cFYI(1, ("num referral: %d", | 322 | cFYI(1, ("num referral: %d", |
322 | num_referrals)); | 323 | num_referrals)); |
323 | if (referrals) { | 324 | if (refs && refs->path_name) { |
324 | cFYI(1,("referral string: %s", referrals)); | ||
325 | strncpy(tmpbuffer, | 325 | strncpy(tmpbuffer, |
326 | referrals, | 326 | refs->path_name, |
327 | len-1); | 327 | len-1); |
328 | } | 328 | } |
329 | } | 329 | } |
330 | kfree(referrals); | 330 | kfree(refs); |
331 | kfree(tmp_path); | 331 | kfree(tmp_path); |
332 | } | 332 | } |
333 | /* BB add code like else decode referrals | 333 | /* BB add code like else decode referrals |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index d0cb469daab7..d2153abcba6d 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
@@ -528,9 +528,11 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
528 | rc = -EOVERFLOW; | 528 | rc = -EOVERFLOW; |
529 | goto ssetup_exit; | 529 | goto ssetup_exit; |
530 | } | 530 | } |
531 | ses->server->mac_signing_key.len = msg->sesskey_len; | 531 | if (first_time) { |
532 | memcpy(ses->server->mac_signing_key.data.krb5, msg->data, | 532 | ses->server->mac_signing_key.len = msg->sesskey_len; |
533 | msg->sesskey_len); | 533 | memcpy(ses->server->mac_signing_key.data.krb5, |
534 | msg->data, msg->sesskey_len); | ||
535 | } | ||
534 | pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; | 536 | pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; |
535 | capabilities |= CAP_EXTENDED_SECURITY; | 537 | capabilities |= CAP_EXTENDED_SECURITY; |
536 | pSMB->req.Capabilities = cpu_to_le32(capabilities); | 538 | pSMB->req.Capabilities = cpu_to_le32(capabilities); |
@@ -540,7 +542,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
540 | 542 | ||
541 | if (ses->capabilities & CAP_UNICODE) { | 543 | if (ses->capabilities & CAP_UNICODE) { |
542 | /* unicode strings must be word aligned */ | 544 | /* unicode strings must be word aligned */ |
543 | if (iov[0].iov_len % 2) { | 545 | if ((iov[0].iov_len + iov[1].iov_len) % 2) { |
544 | *bcc_ptr = 0; | 546 | *bcc_ptr = 0; |
545 | bcc_ptr++; | 547 | bcc_ptr++; |
546 | } | 548 | } |
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index dcc6aead70f5..e3eb3556622b 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c | |||
@@ -362,8 +362,8 @@ static int init_coda_psdev(void) | |||
362 | goto out_chrdev; | 362 | goto out_chrdev; |
363 | } | 363 | } |
364 | for (i = 0; i < MAX_CODADEVS; i++) | 364 | for (i = 0; i < MAX_CODADEVS; i++) |
365 | class_device_create(coda_psdev_class, NULL, | 365 | device_create(coda_psdev_class, NULL, |
366 | MKDEV(CODA_PSDEV_MAJOR,i), NULL, "cfs%d", i); | 366 | MKDEV(CODA_PSDEV_MAJOR,i), "cfs%d", i); |
367 | coda_sysctl_init(); | 367 | coda_sysctl_init(); |
368 | goto out; | 368 | goto out; |
369 | 369 | ||
@@ -405,7 +405,7 @@ static int __init init_coda(void) | |||
405 | return 0; | 405 | return 0; |
406 | out: | 406 | out: |
407 | for (i = 0; i < MAX_CODADEVS; i++) | 407 | for (i = 0; i < MAX_CODADEVS; i++) |
408 | class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); | 408 | device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); |
409 | class_destroy(coda_psdev_class); | 409 | class_destroy(coda_psdev_class); |
410 | unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); | 410 | unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); |
411 | coda_sysctl_clean(); | 411 | coda_sysctl_clean(); |
@@ -424,7 +424,7 @@ static void __exit exit_coda(void) | |||
424 | printk("coda: failed to unregister filesystem\n"); | 424 | printk("coda: failed to unregister filesystem\n"); |
425 | } | 425 | } |
426 | for (i = 0; i < MAX_CODADEVS; i++) | 426 | for (i = 0; i < MAX_CODADEVS; i++) |
427 | class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); | 427 | device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); |
428 | class_destroy(coda_psdev_class); | 428 | class_destroy(coda_psdev_class); |
429 | unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); | 429 | unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); |
430 | coda_sysctl_clean(); | 430 | coda_sysctl_clean(); |
diff --git a/fs/compat.c b/fs/compat.c index 15078ce4c04a..5216c3fd7517 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -1104,10 +1104,6 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, | |||
1104 | if (ret < 0) | 1104 | if (ret < 0) |
1105 | goto out; | 1105 | goto out; |
1106 | 1106 | ||
1107 | ret = security_file_permission(file, type == READ ? MAY_READ:MAY_WRITE); | ||
1108 | if (ret) | ||
1109 | goto out; | ||
1110 | |||
1111 | fnv = NULL; | 1107 | fnv = NULL; |
1112 | if (type == READ) { | 1108 | if (type == READ) { |
1113 | fn = file->f_op->read; | 1109 | fn = file->f_op->read; |
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c new file mode 100644 index 000000000000..0adced2f296f --- /dev/null +++ b/fs/compat_binfmt_elf.c | |||
@@ -0,0 +1,131 @@ | |||
1 | /* | ||
2 | * 32-bit compatibility support for ELF format executables and core dumps. | ||
3 | * | ||
4 | * Copyright (C) 2007 Red Hat, Inc. All rights reserved. | ||
5 | * | ||
6 | * This copyrighted material is made available to anyone wishing to use, | ||
7 | * modify, copy, or redistribute it subject to the terms and conditions | ||
8 | * of the GNU General Public License v.2. | ||
9 | * | ||
10 | * Red Hat Author: Roland McGrath. | ||
11 | * | ||
12 | * This file is used in a 64-bit kernel that wants to support 32-bit ELF. | ||
13 | * asm/elf.h is responsible for defining the compat_* and COMPAT_* macros | ||
14 | * used below, with definitions appropriate for 32-bit ABI compatibility. | ||
15 | * | ||
16 | * We use macros to rename the ABI types and machine-dependent | ||
17 | * functions used in binfmt_elf.c to compat versions. | ||
18 | */ | ||
19 | |||
20 | #include <linux/elfcore-compat.h> | ||
21 | #include <linux/time.h> | ||
22 | |||
23 | /* | ||
24 | * Rename the basic ELF layout types to refer to the 32-bit class of files. | ||
25 | */ | ||
26 | #undef ELF_CLASS | ||
27 | #define ELF_CLASS ELFCLASS32 | ||
28 | |||
29 | #undef elfhdr | ||
30 | #undef elf_phdr | ||
31 | #undef elf_note | ||
32 | #undef elf_addr_t | ||
33 | #define elfhdr elf32_hdr | ||
34 | #define elf_phdr elf32_phdr | ||
35 | #define elf_note elf32_note | ||
36 | #define elf_addr_t Elf32_Addr | ||
37 | |||
38 | /* | ||
39 | * The machine-dependent core note format types are defined in elfcore-compat.h, | ||
40 | * which requires asm/elf.h to define compat_elf_gregset_t et al. | ||
41 | */ | ||
42 | #define elf_prstatus compat_elf_prstatus | ||
43 | #define elf_prpsinfo compat_elf_prpsinfo | ||
44 | |||
45 | /* | ||
46 | * Compat version of cputime_to_compat_timeval, perhaps this | ||
47 | * should be an inline in <linux/compat.h>. | ||
48 | */ | ||
49 | static void cputime_to_compat_timeval(const cputime_t cputime, | ||
50 | struct compat_timeval *value) | ||
51 | { | ||
52 | struct timeval tv; | ||
53 | cputime_to_timeval(cputime, &tv); | ||
54 | value->tv_sec = tv.tv_sec; | ||
55 | value->tv_usec = tv.tv_usec; | ||
56 | } | ||
57 | |||
58 | #undef cputime_to_timeval | ||
59 | #define cputime_to_timeval cputime_to_compat_timeval | ||
60 | |||
61 | |||
62 | /* | ||
63 | * To use this file, asm/elf.h must define compat_elf_check_arch. | ||
64 | * The other following macros can be defined if the compat versions | ||
65 | * differ from the native ones, or omitted when they match. | ||
66 | */ | ||
67 | |||
68 | #undef ELF_ARCH | ||
69 | #undef elf_check_arch | ||
70 | #define elf_check_arch compat_elf_check_arch | ||
71 | |||
72 | #ifdef COMPAT_ELF_PLATFORM | ||
73 | #undef ELF_PLATFORM | ||
74 | #define ELF_PLATFORM COMPAT_ELF_PLATFORM | ||
75 | #endif | ||
76 | |||
77 | #ifdef COMPAT_ELF_HWCAP | ||
78 | #undef ELF_HWCAP | ||
79 | #define ELF_HWCAP COMPAT_ELF_HWCAP | ||
80 | #endif | ||
81 | |||
82 | #ifdef COMPAT_ARCH_DLINFO | ||
83 | #undef ARCH_DLINFO | ||
84 | #define ARCH_DLINFO COMPAT_ARCH_DLINFO | ||
85 | #endif | ||
86 | |||
87 | #ifdef COMPAT_ELF_ET_DYN_BASE | ||
88 | #undef ELF_ET_DYN_BASE | ||
89 | #define ELF_ET_DYN_BASE COMPAT_ELF_ET_DYN_BASE | ||
90 | #endif | ||
91 | |||
92 | #ifdef COMPAT_ELF_EXEC_PAGESIZE | ||
93 | #undef ELF_EXEC_PAGESIZE | ||
94 | #define ELF_EXEC_PAGESIZE COMPAT_ELF_EXEC_PAGESIZE | ||
95 | #endif | ||
96 | |||
97 | #ifdef COMPAT_ELF_PLAT_INIT | ||
98 | #undef ELF_PLAT_INIT | ||
99 | #define ELF_PLAT_INIT COMPAT_ELF_PLAT_INIT | ||
100 | #endif | ||
101 | |||
102 | #ifdef COMPAT_SET_PERSONALITY | ||
103 | #undef SET_PERSONALITY | ||
104 | #define SET_PERSONALITY COMPAT_SET_PERSONALITY | ||
105 | #endif | ||
106 | |||
107 | #ifdef compat_start_thread | ||
108 | #undef start_thread | ||
109 | #define start_thread compat_start_thread | ||
110 | #endif | ||
111 | |||
112 | #ifdef compat_arch_setup_additional_pages | ||
113 | #undef ARCH_HAS_SETUP_ADDITIONAL_PAGES | ||
114 | #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 | ||
115 | #undef arch_setup_additional_pages | ||
116 | #define arch_setup_additional_pages compat_arch_setup_additional_pages | ||
117 | #endif | ||
118 | |||
119 | /* | ||
120 | * Rename a few of the symbols that binfmt_elf.c will define. | ||
121 | * These are all local so the names don't really matter, but it | ||
122 | * might make some debugging less confusing not to duplicate them. | ||
123 | */ | ||
124 | #define elf_format compat_elf_format | ||
125 | #define init_elf_binfmt init_compat_elf_binfmt | ||
126 | #define exit_elf_binfmt exit_compat_elf_binfmt | ||
127 | |||
128 | /* | ||
129 | * We share all the actual code with the native (64-bit) version. | ||
130 | */ | ||
131 | #include "binfmt_elf.c" | ||
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index e8b7c3a98a54..ffdc022cae64 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -10,6 +10,8 @@ | |||
10 | * ioctls. | 10 | * ioctls. |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <linux/joystick.h> | ||
14 | |||
13 | #include <linux/types.h> | 15 | #include <linux/types.h> |
14 | #include <linux/compat.h> | 16 | #include <linux/compat.h> |
15 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
@@ -1374,7 +1376,7 @@ static int do_atm_ioctl(unsigned int fd, unsigned int cmd32, unsigned long arg) | |||
1374 | return -EINVAL; | 1376 | return -EINVAL; |
1375 | } | 1377 | } |
1376 | 1378 | ||
1377 | static __attribute_used__ int | 1379 | static __used int |
1378 | ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg) | 1380 | ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg) |
1379 | { | 1381 | { |
1380 | return -EINVAL; | 1382 | return -EINVAL; |
@@ -2642,6 +2644,12 @@ COMPATIBLE_IOCTL(VIDEO_SET_ATTRIBUTES) | |||
2642 | COMPATIBLE_IOCTL(VIDEO_GET_SIZE) | 2644 | COMPATIBLE_IOCTL(VIDEO_GET_SIZE) |
2643 | COMPATIBLE_IOCTL(VIDEO_GET_FRAME_RATE) | 2645 | COMPATIBLE_IOCTL(VIDEO_GET_FRAME_RATE) |
2644 | 2646 | ||
2647 | /* joystick */ | ||
2648 | COMPATIBLE_IOCTL(JSIOCGVERSION) | ||
2649 | COMPATIBLE_IOCTL(JSIOCGAXES) | ||
2650 | COMPATIBLE_IOCTL(JSIOCGBUTTONS) | ||
2651 | COMPATIBLE_IOCTL(JSIOCGNAME(0)) | ||
2652 | |||
2645 | /* now things that need handlers */ | 2653 | /* now things that need handlers */ |
2646 | HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob) | 2654 | HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob) |
2647 | HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob) | 2655 | HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob) |
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 50ed691098bc..a48dc7dd8765 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
@@ -546,7 +546,7 @@ static int populate_groups(struct config_group *group) | |||
546 | * That said, taking our i_mutex is closer to mkdir | 546 | * That said, taking our i_mutex is closer to mkdir |
547 | * emulation, and shouldn't hurt. | 547 | * emulation, and shouldn't hurt. |
548 | */ | 548 | */ |
549 | mutex_lock(&dentry->d_inode->i_mutex); | 549 | mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); |
550 | 550 | ||
551 | for (i = 0; group->default_groups[i]; i++) { | 551 | for (i = 0; group->default_groups[i]; i++) { |
552 | new_group = group->default_groups[i]; | 552 | new_group = group->default_groups[i]; |
@@ -1405,7 +1405,8 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) | |||
1405 | sd = configfs_sb->s_root->d_fsdata; | 1405 | sd = configfs_sb->s_root->d_fsdata; |
1406 | link_group(to_config_group(sd->s_element), group); | 1406 | link_group(to_config_group(sd->s_element), group); |
1407 | 1407 | ||
1408 | mutex_lock(&configfs_sb->s_root->d_inode->i_mutex); | 1408 | mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, |
1409 | I_MUTEX_PARENT); | ||
1409 | 1410 | ||
1410 | name.name = group->cg_item.ci_name; | 1411 | name.name = group->cg_item.ci_name; |
1411 | name.len = strlen(name.name); | 1412 | name.len = strlen(name.name); |
diff --git a/fs/configfs/file.c b/fs/configfs/file.c index a3658f9a082c..397cb503a180 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c | |||
@@ -320,7 +320,7 @@ int configfs_add_file(struct dentry * dir, const struct configfs_attribute * att | |||
320 | umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG; | 320 | umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG; |
321 | int error = 0; | 321 | int error = 0; |
322 | 322 | ||
323 | mutex_lock(&dir->d_inode->i_mutex); | 323 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_NORMAL); |
324 | error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type); | 324 | error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type); |
325 | mutex_unlock(&dir->d_inode->i_mutex); | 325 | mutex_unlock(&dir->d_inode->i_mutex); |
326 | 326 | ||
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index 3bf0278ea843..de3b31d0a37d 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c | |||
@@ -128,7 +128,7 @@ void configfs_release_fs(void) | |||
128 | } | 128 | } |
129 | 129 | ||
130 | 130 | ||
131 | static decl_subsys(config, NULL, NULL); | 131 | static struct kobject *config_kobj; |
132 | 132 | ||
133 | static int __init configfs_init(void) | 133 | static int __init configfs_init(void) |
134 | { | 134 | { |
@@ -140,9 +140,8 @@ static int __init configfs_init(void) | |||
140 | if (!configfs_dir_cachep) | 140 | if (!configfs_dir_cachep) |
141 | goto out; | 141 | goto out; |
142 | 142 | ||
143 | kobj_set_kset_s(&config_subsys, kernel_subsys); | 143 | config_kobj = kobject_create_and_add("config", kernel_kobj); |
144 | err = subsystem_register(&config_subsys); | 144 | if (!config_kobj) { |
145 | if (err) { | ||
146 | kmem_cache_destroy(configfs_dir_cachep); | 145 | kmem_cache_destroy(configfs_dir_cachep); |
147 | configfs_dir_cachep = NULL; | 146 | configfs_dir_cachep = NULL; |
148 | goto out; | 147 | goto out; |
@@ -151,7 +150,7 @@ static int __init configfs_init(void) | |||
151 | err = register_filesystem(&configfs_fs_type); | 150 | err = register_filesystem(&configfs_fs_type); |
152 | if (err) { | 151 | if (err) { |
153 | printk(KERN_ERR "configfs: Unable to register filesystem!\n"); | 152 | printk(KERN_ERR "configfs: Unable to register filesystem!\n"); |
154 | subsystem_unregister(&config_subsys); | 153 | kobject_put(config_kobj); |
155 | kmem_cache_destroy(configfs_dir_cachep); | 154 | kmem_cache_destroy(configfs_dir_cachep); |
156 | configfs_dir_cachep = NULL; | 155 | configfs_dir_cachep = NULL; |
157 | goto out; | 156 | goto out; |
@@ -160,7 +159,7 @@ static int __init configfs_init(void) | |||
160 | err = configfs_inode_init(); | 159 | err = configfs_inode_init(); |
161 | if (err) { | 160 | if (err) { |
162 | unregister_filesystem(&configfs_fs_type); | 161 | unregister_filesystem(&configfs_fs_type); |
163 | subsystem_unregister(&config_subsys); | 162 | kobject_put(config_kobj); |
164 | kmem_cache_destroy(configfs_dir_cachep); | 163 | kmem_cache_destroy(configfs_dir_cachep); |
165 | configfs_dir_cachep = NULL; | 164 | configfs_dir_cachep = NULL; |
166 | } | 165 | } |
@@ -171,7 +170,7 @@ out: | |||
171 | static void __exit configfs_exit(void) | 170 | static void __exit configfs_exit(void) |
172 | { | 171 | { |
173 | unregister_filesystem(&configfs_fs_type); | 172 | unregister_filesystem(&configfs_fs_type); |
174 | subsystem_unregister(&config_subsys); | 173 | kobject_put(config_kobj); |
175 | kmem_cache_destroy(configfs_dir_cachep); | 174 | kmem_cache_destroy(configfs_dir_cachep); |
176 | configfs_dir_cachep = NULL; | 175 | configfs_dir_cachep = NULL; |
177 | configfs_inode_exit(); | 176 | configfs_inode_exit(); |
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 6a713b33992f..d26e2826ba5b 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c | |||
@@ -426,20 +426,19 @@ exit: | |||
426 | } | 426 | } |
427 | EXPORT_SYMBOL_GPL(debugfs_rename); | 427 | EXPORT_SYMBOL_GPL(debugfs_rename); |
428 | 428 | ||
429 | static decl_subsys(debug, NULL, NULL); | 429 | static struct kobject *debug_kobj; |
430 | 430 | ||
431 | static int __init debugfs_init(void) | 431 | static int __init debugfs_init(void) |
432 | { | 432 | { |
433 | int retval; | 433 | int retval; |
434 | 434 | ||
435 | kobj_set_kset_s(&debug_subsys, kernel_subsys); | 435 | debug_kobj = kobject_create_and_add("debug", kernel_kobj); |
436 | retval = subsystem_register(&debug_subsys); | 436 | if (!debug_kobj) |
437 | if (retval) | 437 | return -EINVAL; |
438 | return retval; | ||
439 | 438 | ||
440 | retval = register_filesystem(&debug_fs_type); | 439 | retval = register_filesystem(&debug_fs_type); |
441 | if (retval) | 440 | if (retval) |
442 | subsystem_unregister(&debug_subsys); | 441 | kobject_put(debug_kobj); |
443 | return retval; | 442 | return retval; |
444 | } | 443 | } |
445 | 444 | ||
@@ -447,7 +446,7 @@ static void __exit debugfs_exit(void) | |||
447 | { | 446 | { |
448 | simple_release_fs(&debugfs_mount, &debugfs_mount_count); | 447 | simple_release_fs(&debugfs_mount, &debugfs_mount_count); |
449 | unregister_filesystem(&debug_fs_type); | 448 | unregister_filesystem(&debug_fs_type); |
450 | subsystem_unregister(&debug_subsys); | 449 | kobject_put(debug_kobj); |
451 | } | 450 | } |
452 | 451 | ||
453 | core_initcall(debugfs_init); | 452 | core_initcall(debugfs_init); |
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index 46754553fdcc..ff97ba924333 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c | |||
@@ -49,7 +49,7 @@ static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len) | |||
49 | spin_unlock(&ls->ls_recover_list_lock); | 49 | spin_unlock(&ls->ls_recover_list_lock); |
50 | 50 | ||
51 | if (!found) | 51 | if (!found) |
52 | de = allocate_direntry(ls, len); | 52 | de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_KERNEL); |
53 | return de; | 53 | return de; |
54 | } | 54 | } |
55 | 55 | ||
@@ -62,7 +62,7 @@ void dlm_clear_free_entries(struct dlm_ls *ls) | |||
62 | de = list_entry(ls->ls_recover_list.next, struct dlm_direntry, | 62 | de = list_entry(ls->ls_recover_list.next, struct dlm_direntry, |
63 | list); | 63 | list); |
64 | list_del(&de->list); | 64 | list_del(&de->list); |
65 | free_direntry(de); | 65 | kfree(de); |
66 | } | 66 | } |
67 | spin_unlock(&ls->ls_recover_list_lock); | 67 | spin_unlock(&ls->ls_recover_list_lock); |
68 | } | 68 | } |
@@ -171,7 +171,7 @@ void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen | |||
171 | } | 171 | } |
172 | 172 | ||
173 | list_del(&de->list); | 173 | list_del(&de->list); |
174 | free_direntry(de); | 174 | kfree(de); |
175 | out: | 175 | out: |
176 | write_unlock(&ls->ls_dirtbl[bucket].lock); | 176 | write_unlock(&ls->ls_dirtbl[bucket].lock); |
177 | } | 177 | } |
@@ -302,7 +302,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name, | |||
302 | 302 | ||
303 | write_unlock(&ls->ls_dirtbl[bucket].lock); | 303 | write_unlock(&ls->ls_dirtbl[bucket].lock); |
304 | 304 | ||
305 | de = allocate_direntry(ls, namelen); | 305 | de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_KERNEL); |
306 | if (!de) | 306 | if (!de) |
307 | return -ENOMEM; | 307 | return -ENOMEM; |
308 | 308 | ||
@@ -313,7 +313,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name, | |||
313 | write_lock(&ls->ls_dirtbl[bucket].lock); | 313 | write_lock(&ls->ls_dirtbl[bucket].lock); |
314 | tmp = search_bucket(ls, name, namelen, bucket); | 314 | tmp = search_bucket(ls, name, namelen, bucket); |
315 | if (tmp) { | 315 | if (tmp) { |
316 | free_direntry(de); | 316 | kfree(de); |
317 | de = tmp; | 317 | de = tmp; |
318 | } else { | 318 | } else { |
319 | list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); | 319 | list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); |
@@ -329,49 +329,47 @@ int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen, | |||
329 | return get_entry(ls, nodeid, name, namelen, r_nodeid); | 329 | return get_entry(ls, nodeid, name, namelen, r_nodeid); |
330 | } | 330 | } |
331 | 331 | ||
332 | /* Copy the names of master rsb's into the buffer provided. | 332 | static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) |
333 | Only select names whose dir node is the given nodeid. */ | 333 | { |
334 | struct dlm_rsb *r; | ||
335 | |||
336 | down_read(&ls->ls_root_sem); | ||
337 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | ||
338 | if (len == r->res_length && !memcmp(name, r->res_name, len)) { | ||
339 | up_read(&ls->ls_root_sem); | ||
340 | return r; | ||
341 | } | ||
342 | } | ||
343 | up_read(&ls->ls_root_sem); | ||
344 | return NULL; | ||
345 | } | ||
346 | |||
347 | /* Find the rsb where we left off (or start again), then send rsb names | ||
348 | for rsb's we're master of and whose directory node matches the requesting | ||
349 | node. inbuf is the rsb name last sent, inlen is the name's length */ | ||
334 | 350 | ||
335 | void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, | 351 | void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, |
336 | char *outbuf, int outlen, int nodeid) | 352 | char *outbuf, int outlen, int nodeid) |
337 | { | 353 | { |
338 | struct list_head *list; | 354 | struct list_head *list; |
339 | struct dlm_rsb *start_r = NULL, *r = NULL; | 355 | struct dlm_rsb *r; |
340 | int offset = 0, start_namelen, error, dir_nodeid; | 356 | int offset = 0, dir_nodeid; |
341 | char *start_name; | ||
342 | uint16_t be_namelen; | 357 | uint16_t be_namelen; |
343 | 358 | ||
344 | /* | ||
345 | * Find the rsb where we left off (or start again) | ||
346 | */ | ||
347 | |||
348 | start_namelen = inlen; | ||
349 | start_name = inbuf; | ||
350 | |||
351 | if (start_namelen > 1) { | ||
352 | /* | ||
353 | * We could also use a find_rsb_root() function here that | ||
354 | * searched the ls_root_list. | ||
355 | */ | ||
356 | error = dlm_find_rsb(ls, start_name, start_namelen, R_MASTER, | ||
357 | &start_r); | ||
358 | DLM_ASSERT(!error && start_r, | ||
359 | printk("error %d\n", error);); | ||
360 | DLM_ASSERT(!list_empty(&start_r->res_root_list), | ||
361 | dlm_print_rsb(start_r);); | ||
362 | dlm_put_rsb(start_r); | ||
363 | } | ||
364 | |||
365 | /* | ||
366 | * Send rsb names for rsb's we're master of and whose directory node | ||
367 | * matches the requesting node. | ||
368 | */ | ||
369 | |||
370 | down_read(&ls->ls_root_sem); | 359 | down_read(&ls->ls_root_sem); |
371 | if (start_r) | 360 | |
372 | list = start_r->res_root_list.next; | 361 | if (inlen > 1) { |
373 | else | 362 | r = find_rsb_root(ls, inbuf, inlen); |
363 | if (!r) { | ||
364 | inbuf[inlen - 1] = '\0'; | ||
365 | log_error(ls, "copy_master_names from %d start %d %s", | ||
366 | nodeid, inlen, inbuf); | ||
367 | goto out; | ||
368 | } | ||
369 | list = r->res_root_list.next; | ||
370 | } else { | ||
374 | list = ls->ls_root_list.next; | 371 | list = ls->ls_root_list.next; |
372 | } | ||
375 | 373 | ||
376 | for (offset = 0; list != &ls->ls_root_list; list = list->next) { | 374 | for (offset = 0; list != &ls->ls_root_list; list = list->next) { |
377 | r = list_entry(list, struct dlm_rsb, res_root_list); | 375 | r = list_entry(list, struct dlm_rsb, res_root_list); |
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index d2fc2384c3be..ec61bbaf25df 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
@@ -570,5 +570,21 @@ static inline int dlm_no_directory(struct dlm_ls *ls) | |||
570 | return (ls->ls_exflags & DLM_LSFL_NODIR) ? 1 : 0; | 570 | return (ls->ls_exflags & DLM_LSFL_NODIR) ? 1 : 0; |
571 | } | 571 | } |
572 | 572 | ||
573 | int dlm_netlink_init(void); | ||
574 | void dlm_netlink_exit(void); | ||
575 | void dlm_timeout_warn(struct dlm_lkb *lkb); | ||
576 | |||
577 | #ifdef CONFIG_DLM_DEBUG | ||
578 | int dlm_register_debugfs(void); | ||
579 | void dlm_unregister_debugfs(void); | ||
580 | int dlm_create_debug_file(struct dlm_ls *ls); | ||
581 | void dlm_delete_debug_file(struct dlm_ls *ls); | ||
582 | #else | ||
583 | static inline int dlm_register_debugfs(void) { return 0; } | ||
584 | static inline void dlm_unregister_debugfs(void) { } | ||
585 | static inline int dlm_create_debug_file(struct dlm_ls *ls) { return 0; } | ||
586 | static inline void dlm_delete_debug_file(struct dlm_ls *ls) { } | ||
587 | #endif | ||
588 | |||
573 | #endif /* __DLM_INTERNAL_DOT_H__ */ | 589 | #endif /* __DLM_INTERNAL_DOT_H__ */ |
574 | 590 | ||
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 3915b8e14146..ff4a198fa677 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. | 4 | ** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. |
5 | ** | 5 | ** |
6 | ** This copyrighted material is made available to anyone wishing to use, | 6 | ** This copyrighted material is made available to anyone wishing to use, |
7 | ** modify, copy, or redistribute it subject to the terms and conditions | 7 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -88,7 +88,6 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | |||
88 | static int receive_extralen(struct dlm_message *ms); | 88 | static int receive_extralen(struct dlm_message *ms); |
89 | static void do_purge(struct dlm_ls *ls, int nodeid, int pid); | 89 | static void do_purge(struct dlm_ls *ls, int nodeid, int pid); |
90 | static void del_timeout(struct dlm_lkb *lkb); | 90 | static void del_timeout(struct dlm_lkb *lkb); |
91 | void dlm_timeout_warn(struct dlm_lkb *lkb); | ||
92 | 91 | ||
93 | /* | 92 | /* |
94 | * Lock compatibilty matrix - thanks Steve | 93 | * Lock compatibilty matrix - thanks Steve |
@@ -335,7 +334,7 @@ static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len) | |||
335 | { | 334 | { |
336 | struct dlm_rsb *r; | 335 | struct dlm_rsb *r; |
337 | 336 | ||
338 | r = allocate_rsb(ls, len); | 337 | r = dlm_allocate_rsb(ls, len); |
339 | if (!r) | 338 | if (!r) |
340 | return NULL; | 339 | return NULL; |
341 | 340 | ||
@@ -478,7 +477,7 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen, | |||
478 | error = _search_rsb(ls, name, namelen, bucket, 0, &tmp); | 477 | error = _search_rsb(ls, name, namelen, bucket, 0, &tmp); |
479 | if (!error) { | 478 | if (!error) { |
480 | write_unlock(&ls->ls_rsbtbl[bucket].lock); | 479 | write_unlock(&ls->ls_rsbtbl[bucket].lock); |
481 | free_rsb(r); | 480 | dlm_free_rsb(r); |
482 | r = tmp; | 481 | r = tmp; |
483 | goto out; | 482 | goto out; |
484 | } | 483 | } |
@@ -490,12 +489,6 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen, | |||
490 | return error; | 489 | return error; |
491 | } | 490 | } |
492 | 491 | ||
493 | int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen, | ||
494 | unsigned int flags, struct dlm_rsb **r_ret) | ||
495 | { | ||
496 | return find_rsb(ls, name, namelen, flags, r_ret); | ||
497 | } | ||
498 | |||
499 | /* This is only called to add a reference when the code already holds | 492 | /* This is only called to add a reference when the code already holds |
500 | a valid reference to the rsb, so there's no need for locking. */ | 493 | a valid reference to the rsb, so there's no need for locking. */ |
501 | 494 | ||
@@ -519,7 +512,7 @@ static void toss_rsb(struct kref *kref) | |||
519 | list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss); | 512 | list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss); |
520 | r->res_toss_time = jiffies; | 513 | r->res_toss_time = jiffies; |
521 | if (r->res_lvbptr) { | 514 | if (r->res_lvbptr) { |
522 | free_lvb(r->res_lvbptr); | 515 | dlm_free_lvb(r->res_lvbptr); |
523 | r->res_lvbptr = NULL; | 516 | r->res_lvbptr = NULL; |
524 | } | 517 | } |
525 | } | 518 | } |
@@ -589,7 +582,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) | |||
589 | uint32_t lkid = 0; | 582 | uint32_t lkid = 0; |
590 | uint16_t bucket; | 583 | uint16_t bucket; |
591 | 584 | ||
592 | lkb = allocate_lkb(ls); | 585 | lkb = dlm_allocate_lkb(ls); |
593 | if (!lkb) | 586 | if (!lkb) |
594 | return -ENOMEM; | 587 | return -ENOMEM; |
595 | 588 | ||
@@ -683,8 +676,8 @@ static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb) | |||
683 | 676 | ||
684 | /* for local/process lkbs, lvbptr points to caller's lksb */ | 677 | /* for local/process lkbs, lvbptr points to caller's lksb */ |
685 | if (lkb->lkb_lvbptr && is_master_copy(lkb)) | 678 | if (lkb->lkb_lvbptr && is_master_copy(lkb)) |
686 | free_lvb(lkb->lkb_lvbptr); | 679 | dlm_free_lvb(lkb->lkb_lvbptr); |
687 | free_lkb(lkb); | 680 | dlm_free_lkb(lkb); |
688 | return 1; | 681 | return 1; |
689 | } else { | 682 | } else { |
690 | write_unlock(&ls->ls_lkbtbl[bucket].lock); | 683 | write_unlock(&ls->ls_lkbtbl[bucket].lock); |
@@ -988,7 +981,7 @@ static int shrink_bucket(struct dlm_ls *ls, int b) | |||
988 | 981 | ||
989 | if (is_master(r)) | 982 | if (is_master(r)) |
990 | dir_remove(r); | 983 | dir_remove(r); |
991 | free_rsb(r); | 984 | dlm_free_rsb(r); |
992 | count++; | 985 | count++; |
993 | } else { | 986 | } else { |
994 | write_unlock(&ls->ls_rsbtbl[b].lock); | 987 | write_unlock(&ls->ls_rsbtbl[b].lock); |
@@ -1171,7 +1164,7 @@ static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
1171 | return; | 1164 | return; |
1172 | 1165 | ||
1173 | if (!r->res_lvbptr) | 1166 | if (!r->res_lvbptr) |
1174 | r->res_lvbptr = allocate_lvb(r->res_ls); | 1167 | r->res_lvbptr = dlm_allocate_lvb(r->res_ls); |
1175 | 1168 | ||
1176 | if (!r->res_lvbptr) | 1169 | if (!r->res_lvbptr) |
1177 | return; | 1170 | return; |
@@ -1203,7 +1196,7 @@ static void set_lvb_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
1203 | return; | 1196 | return; |
1204 | 1197 | ||
1205 | if (!r->res_lvbptr) | 1198 | if (!r->res_lvbptr) |
1206 | r->res_lvbptr = allocate_lvb(r->res_ls); | 1199 | r->res_lvbptr = dlm_allocate_lvb(r->res_ls); |
1207 | 1200 | ||
1208 | if (!r->res_lvbptr) | 1201 | if (!r->res_lvbptr) |
1209 | return; | 1202 | return; |
@@ -1852,7 +1845,7 @@ static void send_blocking_asts_all(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
1852 | static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) | 1845 | static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) |
1853 | { | 1846 | { |
1854 | struct dlm_ls *ls = r->res_ls; | 1847 | struct dlm_ls *ls = r->res_ls; |
1855 | int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid(); | 1848 | int i, error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid(); |
1856 | 1849 | ||
1857 | if (rsb_flag(r, RSB_MASTER_UNCERTAIN)) { | 1850 | if (rsb_flag(r, RSB_MASTER_UNCERTAIN)) { |
1858 | rsb_clear_flag(r, RSB_MASTER_UNCERTAIN); | 1851 | rsb_clear_flag(r, RSB_MASTER_UNCERTAIN); |
@@ -1886,7 +1879,7 @@ static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
1886 | return 1; | 1879 | return 1; |
1887 | } | 1880 | } |
1888 | 1881 | ||
1889 | for (;;) { | 1882 | for (i = 0; i < 2; i++) { |
1890 | /* It's possible for dlm_scand to remove an old rsb for | 1883 | /* It's possible for dlm_scand to remove an old rsb for |
1891 | this same resource from the toss list, us to create | 1884 | this same resource from the toss list, us to create |
1892 | a new one, look up the master locally, and find it | 1885 | a new one, look up the master locally, and find it |
@@ -1900,6 +1893,8 @@ static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
1900 | log_debug(ls, "dir_lookup error %d %s", error, r->res_name); | 1893 | log_debug(ls, "dir_lookup error %d %s", error, r->res_name); |
1901 | schedule(); | 1894 | schedule(); |
1902 | } | 1895 | } |
1896 | if (error && error != -EEXIST) | ||
1897 | return error; | ||
1903 | 1898 | ||
1904 | if (ret_nodeid == our_nodeid) { | 1899 | if (ret_nodeid == our_nodeid) { |
1905 | r->res_first_lkid = 0; | 1900 | r->res_first_lkid = 0; |
@@ -1941,8 +1936,11 @@ static void confirm_master(struct dlm_rsb *r, int error) | |||
1941 | break; | 1936 | break; |
1942 | 1937 | ||
1943 | case -EAGAIN: | 1938 | case -EAGAIN: |
1944 | /* the remote master didn't queue our NOQUEUE request; | 1939 | case -EBADR: |
1945 | make a waiting lkb the first_lkid */ | 1940 | case -ENOTBLK: |
1941 | /* the remote request failed and won't be retried (it was | ||
1942 | a NOQUEUE, or has been canceled/unlocked); make a waiting | ||
1943 | lkb the first_lkid */ | ||
1946 | 1944 | ||
1947 | r->res_first_lkid = 0; | 1945 | r->res_first_lkid = 0; |
1948 | 1946 | ||
@@ -2108,17 +2106,18 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) | |||
2108 | /* an lkb may be waiting for an rsb lookup to complete where the | 2106 | /* an lkb may be waiting for an rsb lookup to complete where the |
2109 | lookup was initiated by another lock */ | 2107 | lookup was initiated by another lock */ |
2110 | 2108 | ||
2111 | if (args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) { | 2109 | if (!list_empty(&lkb->lkb_rsb_lookup)) { |
2112 | if (!list_empty(&lkb->lkb_rsb_lookup)) { | 2110 | if (args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) { |
2113 | log_debug(ls, "unlock on rsb_lookup %x", lkb->lkb_id); | 2111 | log_debug(ls, "unlock on rsb_lookup %x", lkb->lkb_id); |
2114 | list_del_init(&lkb->lkb_rsb_lookup); | 2112 | list_del_init(&lkb->lkb_rsb_lookup); |
2115 | queue_cast(lkb->lkb_resource, lkb, | 2113 | queue_cast(lkb->lkb_resource, lkb, |
2116 | args->flags & DLM_LKF_CANCEL ? | 2114 | args->flags & DLM_LKF_CANCEL ? |
2117 | -DLM_ECANCEL : -DLM_EUNLOCK); | 2115 | -DLM_ECANCEL : -DLM_EUNLOCK); |
2118 | unhold_lkb(lkb); /* undoes create_lkb() */ | 2116 | unhold_lkb(lkb); /* undoes create_lkb() */ |
2119 | rv = -EBUSY; | ||
2120 | goto out; | ||
2121 | } | 2117 | } |
2118 | /* caller changes -EBUSY to 0 for CANCEL and FORCEUNLOCK */ | ||
2119 | rv = -EBUSY; | ||
2120 | goto out; | ||
2122 | } | 2121 | } |
2123 | 2122 | ||
2124 | /* cancel not allowed with another cancel/unlock in progress */ | 2123 | /* cancel not allowed with another cancel/unlock in progress */ |
@@ -2986,7 +2985,7 @@ static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb, | |||
2986 | 2985 | ||
2987 | if (lkb->lkb_exflags & DLM_LKF_VALBLK) { | 2986 | if (lkb->lkb_exflags & DLM_LKF_VALBLK) { |
2988 | if (!lkb->lkb_lvbptr) | 2987 | if (!lkb->lkb_lvbptr) |
2989 | lkb->lkb_lvbptr = allocate_lvb(ls); | 2988 | lkb->lkb_lvbptr = dlm_allocate_lvb(ls); |
2990 | if (!lkb->lkb_lvbptr) | 2989 | if (!lkb->lkb_lvbptr) |
2991 | return -ENOMEM; | 2990 | return -ENOMEM; |
2992 | len = receive_extralen(ms); | 2991 | len = receive_extralen(ms); |
@@ -3006,11 +3005,9 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | |||
3006 | lkb->lkb_bastaddr = (void *) (long) (ms->m_asts & AST_BAST); | 3005 | lkb->lkb_bastaddr = (void *) (long) (ms->m_asts & AST_BAST); |
3007 | lkb->lkb_astaddr = (void *) (long) (ms->m_asts & AST_COMP); | 3006 | lkb->lkb_astaddr = (void *) (long) (ms->m_asts & AST_COMP); |
3008 | 3007 | ||
3009 | DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb);); | ||
3010 | |||
3011 | if (lkb->lkb_exflags & DLM_LKF_VALBLK) { | 3008 | if (lkb->lkb_exflags & DLM_LKF_VALBLK) { |
3012 | /* lkb was just created so there won't be an lvb yet */ | 3009 | /* lkb was just created so there won't be an lvb yet */ |
3013 | lkb->lkb_lvbptr = allocate_lvb(ls); | 3010 | lkb->lkb_lvbptr = dlm_allocate_lvb(ls); |
3014 | if (!lkb->lkb_lvbptr) | 3011 | if (!lkb->lkb_lvbptr) |
3015 | return -ENOMEM; | 3012 | return -ENOMEM; |
3016 | } | 3013 | } |
@@ -3021,16 +3018,6 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | |||
3021 | static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | 3018 | static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb, |
3022 | struct dlm_message *ms) | 3019 | struct dlm_message *ms) |
3023 | { | 3020 | { |
3024 | if (lkb->lkb_nodeid != ms->m_header.h_nodeid) { | ||
3025 | log_error(ls, "convert_args nodeid %d %d lkid %x %x", | ||
3026 | lkb->lkb_nodeid, ms->m_header.h_nodeid, | ||
3027 | lkb->lkb_id, lkb->lkb_remid); | ||
3028 | return -EINVAL; | ||
3029 | } | ||
3030 | |||
3031 | if (!is_master_copy(lkb)) | ||
3032 | return -EINVAL; | ||
3033 | |||
3034 | if (lkb->lkb_status != DLM_LKSTS_GRANTED) | 3021 | if (lkb->lkb_status != DLM_LKSTS_GRANTED) |
3035 | return -EBUSY; | 3022 | return -EBUSY; |
3036 | 3023 | ||
@@ -3046,8 +3033,6 @@ static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | |||
3046 | static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | 3033 | static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, |
3047 | struct dlm_message *ms) | 3034 | struct dlm_message *ms) |
3048 | { | 3035 | { |
3049 | if (!is_master_copy(lkb)) | ||
3050 | return -EINVAL; | ||
3051 | if (receive_lvb(ls, lkb, ms)) | 3036 | if (receive_lvb(ls, lkb, ms)) |
3052 | return -ENOMEM; | 3037 | return -ENOMEM; |
3053 | return 0; | 3038 | return 0; |
@@ -3063,6 +3048,50 @@ static void setup_stub_lkb(struct dlm_ls *ls, struct dlm_message *ms) | |||
3063 | lkb->lkb_remid = ms->m_lkid; | 3048 | lkb->lkb_remid = ms->m_lkid; |
3064 | } | 3049 | } |
3065 | 3050 | ||
3051 | /* This is called after the rsb is locked so that we can safely inspect | ||
3052 | fields in the lkb. */ | ||
3053 | |||
3054 | static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms) | ||
3055 | { | ||
3056 | int from = ms->m_header.h_nodeid; | ||
3057 | int error = 0; | ||
3058 | |||
3059 | switch (ms->m_type) { | ||
3060 | case DLM_MSG_CONVERT: | ||
3061 | case DLM_MSG_UNLOCK: | ||
3062 | case DLM_MSG_CANCEL: | ||
3063 | if (!is_master_copy(lkb) || lkb->lkb_nodeid != from) | ||
3064 | error = -EINVAL; | ||
3065 | break; | ||
3066 | |||
3067 | case DLM_MSG_CONVERT_REPLY: | ||
3068 | case DLM_MSG_UNLOCK_REPLY: | ||
3069 | case DLM_MSG_CANCEL_REPLY: | ||
3070 | case DLM_MSG_GRANT: | ||
3071 | case DLM_MSG_BAST: | ||
3072 | if (!is_process_copy(lkb) || lkb->lkb_nodeid != from) | ||
3073 | error = -EINVAL; | ||
3074 | break; | ||
3075 | |||
3076 | case DLM_MSG_REQUEST_REPLY: | ||
3077 | if (!is_process_copy(lkb)) | ||
3078 | error = -EINVAL; | ||
3079 | else if (lkb->lkb_nodeid != -1 && lkb->lkb_nodeid != from) | ||
3080 | error = -EINVAL; | ||
3081 | break; | ||
3082 | |||
3083 | default: | ||
3084 | error = -EINVAL; | ||
3085 | } | ||
3086 | |||
3087 | if (error) | ||
3088 | log_error(lkb->lkb_resource->res_ls, | ||
3089 | "ignore invalid message %d from %d %x %x %x %d", | ||
3090 | ms->m_type, from, lkb->lkb_id, lkb->lkb_remid, | ||
3091 | lkb->lkb_flags, lkb->lkb_nodeid); | ||
3092 | return error; | ||
3093 | } | ||
3094 | |||
3066 | static void receive_request(struct dlm_ls *ls, struct dlm_message *ms) | 3095 | static void receive_request(struct dlm_ls *ls, struct dlm_message *ms) |
3067 | { | 3096 | { |
3068 | struct dlm_lkb *lkb; | 3097 | struct dlm_lkb *lkb; |
@@ -3124,17 +3153,21 @@ static void receive_convert(struct dlm_ls *ls, struct dlm_message *ms) | |||
3124 | hold_rsb(r); | 3153 | hold_rsb(r); |
3125 | lock_rsb(r); | 3154 | lock_rsb(r); |
3126 | 3155 | ||
3156 | error = validate_message(lkb, ms); | ||
3157 | if (error) | ||
3158 | goto out; | ||
3159 | |||
3127 | receive_flags(lkb, ms); | 3160 | receive_flags(lkb, ms); |
3128 | error = receive_convert_args(ls, lkb, ms); | 3161 | error = receive_convert_args(ls, lkb, ms); |
3129 | if (error) | 3162 | if (error) |
3130 | goto out; | 3163 | goto out_reply; |
3131 | reply = !down_conversion(lkb); | 3164 | reply = !down_conversion(lkb); |
3132 | 3165 | ||
3133 | error = do_convert(r, lkb); | 3166 | error = do_convert(r, lkb); |
3134 | out: | 3167 | out_reply: |
3135 | if (reply) | 3168 | if (reply) |
3136 | send_convert_reply(r, lkb, error); | 3169 | send_convert_reply(r, lkb, error); |
3137 | 3170 | out: | |
3138 | unlock_rsb(r); | 3171 | unlock_rsb(r); |
3139 | put_rsb(r); | 3172 | put_rsb(r); |
3140 | dlm_put_lkb(lkb); | 3173 | dlm_put_lkb(lkb); |
@@ -3160,15 +3193,19 @@ static void receive_unlock(struct dlm_ls *ls, struct dlm_message *ms) | |||
3160 | hold_rsb(r); | 3193 | hold_rsb(r); |
3161 | lock_rsb(r); | 3194 | lock_rsb(r); |
3162 | 3195 | ||
3196 | error = validate_message(lkb, ms); | ||
3197 | if (error) | ||
3198 | goto out; | ||
3199 | |||
3163 | receive_flags(lkb, ms); | 3200 | receive_flags(lkb, ms); |
3164 | error = receive_unlock_args(ls, lkb, ms); | 3201 | error = receive_unlock_args(ls, lkb, ms); |
3165 | if (error) | 3202 | if (error) |
3166 | goto out; | 3203 | goto out_reply; |
3167 | 3204 | ||
3168 | error = do_unlock(r, lkb); | 3205 | error = do_unlock(r, lkb); |
3169 | out: | 3206 | out_reply: |
3170 | send_unlock_reply(r, lkb, error); | 3207 | send_unlock_reply(r, lkb, error); |
3171 | 3208 | out: | |
3172 | unlock_rsb(r); | 3209 | unlock_rsb(r); |
3173 | put_rsb(r); | 3210 | put_rsb(r); |
3174 | dlm_put_lkb(lkb); | 3211 | dlm_put_lkb(lkb); |
@@ -3196,9 +3233,13 @@ static void receive_cancel(struct dlm_ls *ls, struct dlm_message *ms) | |||
3196 | hold_rsb(r); | 3233 | hold_rsb(r); |
3197 | lock_rsb(r); | 3234 | lock_rsb(r); |
3198 | 3235 | ||
3236 | error = validate_message(lkb, ms); | ||
3237 | if (error) | ||
3238 | goto out; | ||
3239 | |||
3199 | error = do_cancel(r, lkb); | 3240 | error = do_cancel(r, lkb); |
3200 | send_cancel_reply(r, lkb, error); | 3241 | send_cancel_reply(r, lkb, error); |
3201 | 3242 | out: | |
3202 | unlock_rsb(r); | 3243 | unlock_rsb(r); |
3203 | put_rsb(r); | 3244 | put_rsb(r); |
3204 | dlm_put_lkb(lkb); | 3245 | dlm_put_lkb(lkb); |
@@ -3217,22 +3258,26 @@ static void receive_grant(struct dlm_ls *ls, struct dlm_message *ms) | |||
3217 | 3258 | ||
3218 | error = find_lkb(ls, ms->m_remid, &lkb); | 3259 | error = find_lkb(ls, ms->m_remid, &lkb); |
3219 | if (error) { | 3260 | if (error) { |
3220 | log_error(ls, "receive_grant no lkb"); | 3261 | log_debug(ls, "receive_grant from %d no lkb %x", |
3262 | ms->m_header.h_nodeid, ms->m_remid); | ||
3221 | return; | 3263 | return; |
3222 | } | 3264 | } |
3223 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
3224 | 3265 | ||
3225 | r = lkb->lkb_resource; | 3266 | r = lkb->lkb_resource; |
3226 | 3267 | ||
3227 | hold_rsb(r); | 3268 | hold_rsb(r); |
3228 | lock_rsb(r); | 3269 | lock_rsb(r); |
3229 | 3270 | ||
3271 | error = validate_message(lkb, ms); | ||
3272 | if (error) | ||
3273 | goto out; | ||
3274 | |||
3230 | receive_flags_reply(lkb, ms); | 3275 | receive_flags_reply(lkb, ms); |
3231 | if (is_altmode(lkb)) | 3276 | if (is_altmode(lkb)) |
3232 | munge_altmode(lkb, ms); | 3277 | munge_altmode(lkb, ms); |
3233 | grant_lock_pc(r, lkb, ms); | 3278 | grant_lock_pc(r, lkb, ms); |
3234 | queue_cast(r, lkb, 0); | 3279 | queue_cast(r, lkb, 0); |
3235 | 3280 | out: | |
3236 | unlock_rsb(r); | 3281 | unlock_rsb(r); |
3237 | put_rsb(r); | 3282 | put_rsb(r); |
3238 | dlm_put_lkb(lkb); | 3283 | dlm_put_lkb(lkb); |
@@ -3246,18 +3291,22 @@ static void receive_bast(struct dlm_ls *ls, struct dlm_message *ms) | |||
3246 | 3291 | ||
3247 | error = find_lkb(ls, ms->m_remid, &lkb); | 3292 | error = find_lkb(ls, ms->m_remid, &lkb); |
3248 | if (error) { | 3293 | if (error) { |
3249 | log_error(ls, "receive_bast no lkb"); | 3294 | log_debug(ls, "receive_bast from %d no lkb %x", |
3295 | ms->m_header.h_nodeid, ms->m_remid); | ||
3250 | return; | 3296 | return; |
3251 | } | 3297 | } |
3252 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
3253 | 3298 | ||
3254 | r = lkb->lkb_resource; | 3299 | r = lkb->lkb_resource; |
3255 | 3300 | ||
3256 | hold_rsb(r); | 3301 | hold_rsb(r); |
3257 | lock_rsb(r); | 3302 | lock_rsb(r); |
3258 | 3303 | ||
3259 | queue_bast(r, lkb, ms->m_bastmode); | 3304 | error = validate_message(lkb, ms); |
3305 | if (error) | ||
3306 | goto out; | ||
3260 | 3307 | ||
3308 | queue_bast(r, lkb, ms->m_bastmode); | ||
3309 | out: | ||
3261 | unlock_rsb(r); | 3310 | unlock_rsb(r); |
3262 | put_rsb(r); | 3311 | put_rsb(r); |
3263 | dlm_put_lkb(lkb); | 3312 | dlm_put_lkb(lkb); |
@@ -3323,15 +3372,19 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
3323 | 3372 | ||
3324 | error = find_lkb(ls, ms->m_remid, &lkb); | 3373 | error = find_lkb(ls, ms->m_remid, &lkb); |
3325 | if (error) { | 3374 | if (error) { |
3326 | log_error(ls, "receive_request_reply no lkb"); | 3375 | log_debug(ls, "receive_request_reply from %d no lkb %x", |
3376 | ms->m_header.h_nodeid, ms->m_remid); | ||
3327 | return; | 3377 | return; |
3328 | } | 3378 | } |
3329 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
3330 | 3379 | ||
3331 | r = lkb->lkb_resource; | 3380 | r = lkb->lkb_resource; |
3332 | hold_rsb(r); | 3381 | hold_rsb(r); |
3333 | lock_rsb(r); | 3382 | lock_rsb(r); |
3334 | 3383 | ||
3384 | error = validate_message(lkb, ms); | ||
3385 | if (error) | ||
3386 | goto out; | ||
3387 | |||
3335 | mstype = lkb->lkb_wait_type; | 3388 | mstype = lkb->lkb_wait_type; |
3336 | error = remove_from_waiters(lkb, DLM_MSG_REQUEST_REPLY); | 3389 | error = remove_from_waiters(lkb, DLM_MSG_REQUEST_REPLY); |
3337 | if (error) | 3390 | if (error) |
@@ -3383,6 +3436,7 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
3383 | if (is_overlap(lkb)) { | 3436 | if (is_overlap(lkb)) { |
3384 | /* we'll ignore error in cancel/unlock reply */ | 3437 | /* we'll ignore error in cancel/unlock reply */ |
3385 | queue_cast_overlap(r, lkb); | 3438 | queue_cast_overlap(r, lkb); |
3439 | confirm_master(r, result); | ||
3386 | unhold_lkb(lkb); /* undoes create_lkb() */ | 3440 | unhold_lkb(lkb); /* undoes create_lkb() */ |
3387 | } else | 3441 | } else |
3388 | _request_lock(r, lkb); | 3442 | _request_lock(r, lkb); |
@@ -3463,6 +3517,10 @@ static void _receive_convert_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | |||
3463 | hold_rsb(r); | 3517 | hold_rsb(r); |
3464 | lock_rsb(r); | 3518 | lock_rsb(r); |
3465 | 3519 | ||
3520 | error = validate_message(lkb, ms); | ||
3521 | if (error) | ||
3522 | goto out; | ||
3523 | |||
3466 | /* stub reply can happen with waiters_mutex held */ | 3524 | /* stub reply can happen with waiters_mutex held */ |
3467 | error = remove_from_waiters_ms(lkb, ms); | 3525 | error = remove_from_waiters_ms(lkb, ms); |
3468 | if (error) | 3526 | if (error) |
@@ -3481,10 +3539,10 @@ static void receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
3481 | 3539 | ||
3482 | error = find_lkb(ls, ms->m_remid, &lkb); | 3540 | error = find_lkb(ls, ms->m_remid, &lkb); |
3483 | if (error) { | 3541 | if (error) { |
3484 | log_error(ls, "receive_convert_reply no lkb"); | 3542 | log_debug(ls, "receive_convert_reply from %d no lkb %x", |
3543 | ms->m_header.h_nodeid, ms->m_remid); | ||
3485 | return; | 3544 | return; |
3486 | } | 3545 | } |
3487 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
3488 | 3546 | ||
3489 | _receive_convert_reply(lkb, ms); | 3547 | _receive_convert_reply(lkb, ms); |
3490 | dlm_put_lkb(lkb); | 3548 | dlm_put_lkb(lkb); |
@@ -3498,6 +3556,10 @@ static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | |||
3498 | hold_rsb(r); | 3556 | hold_rsb(r); |
3499 | lock_rsb(r); | 3557 | lock_rsb(r); |
3500 | 3558 | ||
3559 | error = validate_message(lkb, ms); | ||
3560 | if (error) | ||
3561 | goto out; | ||
3562 | |||
3501 | /* stub reply can happen with waiters_mutex held */ | 3563 | /* stub reply can happen with waiters_mutex held */ |
3502 | error = remove_from_waiters_ms(lkb, ms); | 3564 | error = remove_from_waiters_ms(lkb, ms); |
3503 | if (error) | 3565 | if (error) |
@@ -3529,10 +3591,10 @@ static void receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
3529 | 3591 | ||
3530 | error = find_lkb(ls, ms->m_remid, &lkb); | 3592 | error = find_lkb(ls, ms->m_remid, &lkb); |
3531 | if (error) { | 3593 | if (error) { |
3532 | log_error(ls, "receive_unlock_reply no lkb"); | 3594 | log_debug(ls, "receive_unlock_reply from %d no lkb %x", |
3595 | ms->m_header.h_nodeid, ms->m_remid); | ||
3533 | return; | 3596 | return; |
3534 | } | 3597 | } |
3535 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
3536 | 3598 | ||
3537 | _receive_unlock_reply(lkb, ms); | 3599 | _receive_unlock_reply(lkb, ms); |
3538 | dlm_put_lkb(lkb); | 3600 | dlm_put_lkb(lkb); |
@@ -3546,6 +3608,10 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | |||
3546 | hold_rsb(r); | 3608 | hold_rsb(r); |
3547 | lock_rsb(r); | 3609 | lock_rsb(r); |
3548 | 3610 | ||
3611 | error = validate_message(lkb, ms); | ||
3612 | if (error) | ||
3613 | goto out; | ||
3614 | |||
3549 | /* stub reply can happen with waiters_mutex held */ | 3615 | /* stub reply can happen with waiters_mutex held */ |
3550 | error = remove_from_waiters_ms(lkb, ms); | 3616 | error = remove_from_waiters_ms(lkb, ms); |
3551 | if (error) | 3617 | if (error) |
@@ -3577,10 +3643,10 @@ static void receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
3577 | 3643 | ||
3578 | error = find_lkb(ls, ms->m_remid, &lkb); | 3644 | error = find_lkb(ls, ms->m_remid, &lkb); |
3579 | if (error) { | 3645 | if (error) { |
3580 | log_error(ls, "receive_cancel_reply no lkb"); | 3646 | log_debug(ls, "receive_cancel_reply from %d no lkb %x", |
3647 | ms->m_header.h_nodeid, ms->m_remid); | ||
3581 | return; | 3648 | return; |
3582 | } | 3649 | } |
3583 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
3584 | 3650 | ||
3585 | _receive_cancel_reply(lkb, ms); | 3651 | _receive_cancel_reply(lkb, ms); |
3586 | dlm_put_lkb(lkb); | 3652 | dlm_put_lkb(lkb); |
@@ -3640,6 +3706,13 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
3640 | 3706 | ||
3641 | static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms) | 3707 | static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms) |
3642 | { | 3708 | { |
3709 | if (!dlm_is_member(ls, ms->m_header.h_nodeid)) { | ||
3710 | log_debug(ls, "ignore non-member message %d from %d %x %x %d", | ||
3711 | ms->m_type, ms->m_header.h_nodeid, ms->m_lkid, | ||
3712 | ms->m_remid, ms->m_result); | ||
3713 | return; | ||
3714 | } | ||
3715 | |||
3643 | switch (ms->m_type) { | 3716 | switch (ms->m_type) { |
3644 | 3717 | ||
3645 | /* messages sent to a master node */ | 3718 | /* messages sent to a master node */ |
@@ -3778,8 +3851,9 @@ void dlm_receive_buffer(struct dlm_header *hd, int nodeid) | |||
3778 | 3851 | ||
3779 | ls = dlm_find_lockspace_global(hd->h_lockspace); | 3852 | ls = dlm_find_lockspace_global(hd->h_lockspace); |
3780 | if (!ls) { | 3853 | if (!ls) { |
3781 | log_print("invalid h_lockspace %x from %d cmd %d type %d", | 3854 | if (dlm_config.ci_log_debug) |
3782 | hd->h_lockspace, nodeid, hd->h_cmd, type); | 3855 | log_print("invalid lockspace %x from %d cmd %d type %d", |
3856 | hd->h_lockspace, nodeid, hd->h_cmd, type); | ||
3783 | 3857 | ||
3784 | if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS) | 3858 | if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS) |
3785 | dlm_send_ls_not_ready(nodeid, rc); | 3859 | dlm_send_ls_not_ready(nodeid, rc); |
@@ -3806,6 +3880,7 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) | |||
3806 | ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; | 3880 | ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; |
3807 | ls->ls_stub_ms.m_result = -EINPROGRESS; | 3881 | ls->ls_stub_ms.m_result = -EINPROGRESS; |
3808 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | 3882 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; |
3883 | ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; | ||
3809 | _receive_convert_reply(lkb, &ls->ls_stub_ms); | 3884 | _receive_convert_reply(lkb, &ls->ls_stub_ms); |
3810 | 3885 | ||
3811 | /* Same special case as in receive_rcom_lock_args() */ | 3886 | /* Same special case as in receive_rcom_lock_args() */ |
@@ -3847,6 +3922,7 @@ static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb) | |||
3847 | void dlm_recover_waiters_pre(struct dlm_ls *ls) | 3922 | void dlm_recover_waiters_pre(struct dlm_ls *ls) |
3848 | { | 3923 | { |
3849 | struct dlm_lkb *lkb, *safe; | 3924 | struct dlm_lkb *lkb, *safe; |
3925 | int wait_type, stub_unlock_result, stub_cancel_result; | ||
3850 | 3926 | ||
3851 | mutex_lock(&ls->ls_waiters_mutex); | 3927 | mutex_lock(&ls->ls_waiters_mutex); |
3852 | 3928 | ||
@@ -3865,7 +3941,33 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) | |||
3865 | if (!waiter_needs_recovery(ls, lkb)) | 3941 | if (!waiter_needs_recovery(ls, lkb)) |
3866 | continue; | 3942 | continue; |
3867 | 3943 | ||
3868 | switch (lkb->lkb_wait_type) { | 3944 | wait_type = lkb->lkb_wait_type; |
3945 | stub_unlock_result = -DLM_EUNLOCK; | ||
3946 | stub_cancel_result = -DLM_ECANCEL; | ||
3947 | |||
3948 | /* Main reply may have been received leaving a zero wait_type, | ||
3949 | but a reply for the overlapping op may not have been | ||
3950 | received. In that case we need to fake the appropriate | ||
3951 | reply for the overlap op. */ | ||
3952 | |||
3953 | if (!wait_type) { | ||
3954 | if (is_overlap_cancel(lkb)) { | ||
3955 | wait_type = DLM_MSG_CANCEL; | ||
3956 | if (lkb->lkb_grmode == DLM_LOCK_IV) | ||
3957 | stub_cancel_result = 0; | ||
3958 | } | ||
3959 | if (is_overlap_unlock(lkb)) { | ||
3960 | wait_type = DLM_MSG_UNLOCK; | ||
3961 | if (lkb->lkb_grmode == DLM_LOCK_IV) | ||
3962 | stub_unlock_result = -ENOENT; | ||
3963 | } | ||
3964 | |||
3965 | log_debug(ls, "rwpre overlap %x %x %d %d %d", | ||
3966 | lkb->lkb_id, lkb->lkb_flags, wait_type, | ||
3967 | stub_cancel_result, stub_unlock_result); | ||
3968 | } | ||
3969 | |||
3970 | switch (wait_type) { | ||
3869 | 3971 | ||
3870 | case DLM_MSG_REQUEST: | 3972 | case DLM_MSG_REQUEST: |
3871 | lkb->lkb_flags |= DLM_IFL_RESEND; | 3973 | lkb->lkb_flags |= DLM_IFL_RESEND; |
@@ -3878,8 +3980,9 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) | |||
3878 | case DLM_MSG_UNLOCK: | 3980 | case DLM_MSG_UNLOCK: |
3879 | hold_lkb(lkb); | 3981 | hold_lkb(lkb); |
3880 | ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY; | 3982 | ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY; |
3881 | ls->ls_stub_ms.m_result = -DLM_EUNLOCK; | 3983 | ls->ls_stub_ms.m_result = stub_unlock_result; |
3882 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | 3984 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; |
3985 | ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; | ||
3883 | _receive_unlock_reply(lkb, &ls->ls_stub_ms); | 3986 | _receive_unlock_reply(lkb, &ls->ls_stub_ms); |
3884 | dlm_put_lkb(lkb); | 3987 | dlm_put_lkb(lkb); |
3885 | break; | 3988 | break; |
@@ -3887,15 +3990,16 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) | |||
3887 | case DLM_MSG_CANCEL: | 3990 | case DLM_MSG_CANCEL: |
3888 | hold_lkb(lkb); | 3991 | hold_lkb(lkb); |
3889 | ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY; | 3992 | ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY; |
3890 | ls->ls_stub_ms.m_result = -DLM_ECANCEL; | 3993 | ls->ls_stub_ms.m_result = stub_cancel_result; |
3891 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | 3994 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; |
3995 | ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; | ||
3892 | _receive_cancel_reply(lkb, &ls->ls_stub_ms); | 3996 | _receive_cancel_reply(lkb, &ls->ls_stub_ms); |
3893 | dlm_put_lkb(lkb); | 3997 | dlm_put_lkb(lkb); |
3894 | break; | 3998 | break; |
3895 | 3999 | ||
3896 | default: | 4000 | default: |
3897 | log_error(ls, "invalid lkb wait_type %d", | 4001 | log_error(ls, "invalid lkb wait_type %d %d", |
3898 | lkb->lkb_wait_type); | 4002 | lkb->lkb_wait_type, wait_type); |
3899 | } | 4003 | } |
3900 | schedule(); | 4004 | schedule(); |
3901 | } | 4005 | } |
@@ -4184,7 +4288,7 @@ static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | |||
4184 | lkb->lkb_astaddr = (void *) (long) (rl->rl_asts & AST_COMP); | 4288 | lkb->lkb_astaddr = (void *) (long) (rl->rl_asts & AST_COMP); |
4185 | 4289 | ||
4186 | if (lkb->lkb_exflags & DLM_LKF_VALBLK) { | 4290 | if (lkb->lkb_exflags & DLM_LKF_VALBLK) { |
4187 | lkb->lkb_lvbptr = allocate_lvb(ls); | 4291 | lkb->lkb_lvbptr = dlm_allocate_lvb(ls); |
4188 | if (!lkb->lkb_lvbptr) | 4292 | if (!lkb->lkb_lvbptr) |
4189 | return -ENOMEM; | 4293 | return -ENOMEM; |
4190 | lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) - | 4294 | lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) - |
@@ -4259,7 +4363,7 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) | |||
4259 | put_rsb(r); | 4363 | put_rsb(r); |
4260 | out: | 4364 | out: |
4261 | if (error) | 4365 | if (error) |
4262 | log_print("recover_master_copy %d %x", error, rl->rl_lkid); | 4366 | log_debug(ls, "recover_master_copy %d %x", error, rl->rl_lkid); |
4263 | rl->rl_result = error; | 4367 | rl->rl_result = error; |
4264 | return error; | 4368 | return error; |
4265 | } | 4369 | } |
@@ -4342,7 +4446,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, | |||
4342 | } | 4446 | } |
4343 | } | 4447 | } |
4344 | 4448 | ||
4345 | /* After ua is attached to lkb it will be freed by free_lkb(). | 4449 | /* After ua is attached to lkb it will be freed by dlm_free_lkb(). |
4346 | When DLM_IFL_USER is set, the dlm knows that this is a userspace | 4450 | When DLM_IFL_USER is set, the dlm knows that this is a userspace |
4347 | lock and that lkb_astparam is the dlm_user_args structure. */ | 4451 | lock and that lkb_astparam is the dlm_user_args structure. */ |
4348 | 4452 | ||
@@ -4679,6 +4783,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) | |||
4679 | } | 4783 | } |
4680 | 4784 | ||
4681 | list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { | 4785 | list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { |
4786 | lkb->lkb_ast_type = 0; | ||
4682 | list_del(&lkb->lkb_astqueue); | 4787 | list_del(&lkb->lkb_astqueue); |
4683 | dlm_put_lkb(lkb); | 4788 | dlm_put_lkb(lkb); |
4684 | } | 4789 | } |
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index ada04680a1e5..27b6ed302911 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h | |||
@@ -19,8 +19,6 @@ void dlm_print_lkb(struct dlm_lkb *lkb); | |||
19 | void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms); | 19 | void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms); |
20 | void dlm_receive_buffer(struct dlm_header *hd, int nodeid); | 20 | void dlm_receive_buffer(struct dlm_header *hd, int nodeid); |
21 | int dlm_modes_compat(int mode1, int mode2); | 21 | int dlm_modes_compat(int mode1, int mode2); |
22 | int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen, | ||
23 | unsigned int flags, struct dlm_rsb **r_ret); | ||
24 | void dlm_put_rsb(struct dlm_rsb *r); | 22 | void dlm_put_rsb(struct dlm_rsb *r); |
25 | void dlm_hold_rsb(struct dlm_rsb *r); | 23 | void dlm_hold_rsb(struct dlm_rsb *r); |
26 | int dlm_put_lkb(struct dlm_lkb *lkb); | 24 | int dlm_put_lkb(struct dlm_lkb *lkb); |
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 6353a8384520..b180fdc51085 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
@@ -24,14 +24,6 @@ | |||
24 | #include "recover.h" | 24 | #include "recover.h" |
25 | #include "requestqueue.h" | 25 | #include "requestqueue.h" |
26 | 26 | ||
27 | #ifdef CONFIG_DLM_DEBUG | ||
28 | int dlm_create_debug_file(struct dlm_ls *ls); | ||
29 | void dlm_delete_debug_file(struct dlm_ls *ls); | ||
30 | #else | ||
31 | static inline int dlm_create_debug_file(struct dlm_ls *ls) { return 0; } | ||
32 | static inline void dlm_delete_debug_file(struct dlm_ls *ls) { } | ||
33 | #endif | ||
34 | |||
35 | static int ls_count; | 27 | static int ls_count; |
36 | static struct mutex ls_lock; | 28 | static struct mutex ls_lock; |
37 | static struct list_head lslist; | 29 | static struct list_head lslist; |
@@ -166,26 +158,7 @@ static struct kobj_type dlm_ktype = { | |||
166 | .release = lockspace_kobj_release, | 158 | .release = lockspace_kobj_release, |
167 | }; | 159 | }; |
168 | 160 | ||
169 | static struct kset dlm_kset = { | 161 | static struct kset *dlm_kset; |
170 | .ktype = &dlm_ktype, | ||
171 | }; | ||
172 | |||
173 | static int kobject_setup(struct dlm_ls *ls) | ||
174 | { | ||
175 | char lsname[DLM_LOCKSPACE_LEN]; | ||
176 | int error; | ||
177 | |||
178 | memset(lsname, 0, DLM_LOCKSPACE_LEN); | ||
179 | snprintf(lsname, DLM_LOCKSPACE_LEN, "%s", ls->ls_name); | ||
180 | |||
181 | error = kobject_set_name(&ls->ls_kobj, "%s", lsname); | ||
182 | if (error) | ||
183 | return error; | ||
184 | |||
185 | ls->ls_kobj.kset = &dlm_kset; | ||
186 | ls->ls_kobj.ktype = &dlm_ktype; | ||
187 | return 0; | ||
188 | } | ||
189 | 162 | ||
190 | static int do_uevent(struct dlm_ls *ls, int in) | 163 | static int do_uevent(struct dlm_ls *ls, int in) |
191 | { | 164 | { |
@@ -220,24 +193,22 @@ static int do_uevent(struct dlm_ls *ls, int in) | |||
220 | 193 | ||
221 | int dlm_lockspace_init(void) | 194 | int dlm_lockspace_init(void) |
222 | { | 195 | { |
223 | int error; | ||
224 | |||
225 | ls_count = 0; | 196 | ls_count = 0; |
226 | mutex_init(&ls_lock); | 197 | mutex_init(&ls_lock); |
227 | INIT_LIST_HEAD(&lslist); | 198 | INIT_LIST_HEAD(&lslist); |
228 | spin_lock_init(&lslist_lock); | 199 | spin_lock_init(&lslist_lock); |
229 | 200 | ||
230 | kobject_set_name(&dlm_kset.kobj, "dlm"); | 201 | dlm_kset = kset_create_and_add("dlm", NULL, kernel_kobj); |
231 | kobj_set_kset_s(&dlm_kset, kernel_subsys); | 202 | if (!dlm_kset) { |
232 | error = kset_register(&dlm_kset); | 203 | printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__); |
233 | if (error) | 204 | return -ENOMEM; |
234 | printk("dlm_lockspace_init: cannot register kset %d\n", error); | 205 | } |
235 | return error; | 206 | return 0; |
236 | } | 207 | } |
237 | 208 | ||
238 | void dlm_lockspace_exit(void) | 209 | void dlm_lockspace_exit(void) |
239 | { | 210 | { |
240 | kset_unregister(&dlm_kset); | 211 | kset_unregister(dlm_kset); |
241 | } | 212 | } |
242 | 213 | ||
243 | static int dlm_scand(void *data) | 214 | static int dlm_scand(void *data) |
@@ -549,13 +520,12 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
549 | goto out_delist; | 520 | goto out_delist; |
550 | } | 521 | } |
551 | 522 | ||
552 | error = kobject_setup(ls); | 523 | ls->ls_kobj.kset = dlm_kset; |
553 | if (error) | 524 | error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL, |
554 | goto out_stop; | 525 | "%s", ls->ls_name); |
555 | |||
556 | error = kobject_register(&ls->ls_kobj); | ||
557 | if (error) | 526 | if (error) |
558 | goto out_stop; | 527 | goto out_stop; |
528 | kobject_uevent(&ls->ls_kobj, KOBJ_ADD); | ||
559 | 529 | ||
560 | /* let kobject handle freeing of ls if there's an error */ | 530 | /* let kobject handle freeing of ls if there's an error */ |
561 | do_unreg = 1; | 531 | do_unreg = 1; |
@@ -601,7 +571,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
601 | kfree(ls->ls_rsbtbl); | 571 | kfree(ls->ls_rsbtbl); |
602 | out_lsfree: | 572 | out_lsfree: |
603 | if (do_unreg) | 573 | if (do_unreg) |
604 | kobject_unregister(&ls->ls_kobj); | 574 | kobject_put(&ls->ls_kobj); |
605 | else | 575 | else |
606 | kfree(ls); | 576 | kfree(ls); |
607 | out: | 577 | out: |
@@ -706,9 +676,9 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
706 | dlm_del_ast(lkb); | 676 | dlm_del_ast(lkb); |
707 | 677 | ||
708 | if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY) | 678 | if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY) |
709 | free_lvb(lkb->lkb_lvbptr); | 679 | dlm_free_lvb(lkb->lkb_lvbptr); |
710 | 680 | ||
711 | free_lkb(lkb); | 681 | dlm_free_lkb(lkb); |
712 | } | 682 | } |
713 | } | 683 | } |
714 | dlm_astd_resume(); | 684 | dlm_astd_resume(); |
@@ -726,7 +696,7 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
726 | res_hashchain); | 696 | res_hashchain); |
727 | 697 | ||
728 | list_del(&rsb->res_hashchain); | 698 | list_del(&rsb->res_hashchain); |
729 | free_rsb(rsb); | 699 | dlm_free_rsb(rsb); |
730 | } | 700 | } |
731 | 701 | ||
732 | head = &ls->ls_rsbtbl[i].toss; | 702 | head = &ls->ls_rsbtbl[i].toss; |
@@ -734,7 +704,7 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
734 | rsb = list_entry(head->next, struct dlm_rsb, | 704 | rsb = list_entry(head->next, struct dlm_rsb, |
735 | res_hashchain); | 705 | res_hashchain); |
736 | list_del(&rsb->res_hashchain); | 706 | list_del(&rsb->res_hashchain); |
737 | free_rsb(rsb); | 707 | dlm_free_rsb(rsb); |
738 | } | 708 | } |
739 | } | 709 | } |
740 | 710 | ||
@@ -750,7 +720,7 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
750 | dlm_clear_members(ls); | 720 | dlm_clear_members(ls); |
751 | dlm_clear_members_gone(ls); | 721 | dlm_clear_members_gone(ls); |
752 | kfree(ls->ls_node_array); | 722 | kfree(ls->ls_node_array); |
753 | kobject_unregister(&ls->ls_kobj); | 723 | kobject_put(&ls->ls_kobj); |
754 | /* The ls structure will be freed when the kobject is done with */ | 724 | /* The ls structure will be freed when the kobject is done with */ |
755 | 725 | ||
756 | mutex_lock(&ls_lock); | 726 | mutex_lock(&ls_lock); |
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index e9923ca9c2d9..7c1e5e5cccd8 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
@@ -864,7 +864,7 @@ static void sctp_init_assoc(struct connection *con) | |||
864 | static void tcp_connect_to_sock(struct connection *con) | 864 | static void tcp_connect_to_sock(struct connection *con) |
865 | { | 865 | { |
866 | int result = -EHOSTUNREACH; | 866 | int result = -EHOSTUNREACH; |
867 | struct sockaddr_storage saddr; | 867 | struct sockaddr_storage saddr, src_addr; |
868 | int addr_len; | 868 | int addr_len; |
869 | struct socket *sock; | 869 | struct socket *sock; |
870 | 870 | ||
@@ -898,6 +898,17 @@ static void tcp_connect_to_sock(struct connection *con) | |||
898 | con->connect_action = tcp_connect_to_sock; | 898 | con->connect_action = tcp_connect_to_sock; |
899 | add_sock(sock, con); | 899 | add_sock(sock, con); |
900 | 900 | ||
901 | /* Bind to our cluster-known address connecting to avoid | ||
902 | routing problems */ | ||
903 | memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr)); | ||
904 | make_sockaddr(&src_addr, 0, &addr_len); | ||
905 | result = sock->ops->bind(sock, (struct sockaddr *) &src_addr, | ||
906 | addr_len); | ||
907 | if (result < 0) { | ||
908 | log_print("could not bind for connect: %d", result); | ||
909 | /* This *may* not indicate a critical error */ | ||
910 | } | ||
911 | |||
901 | make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len); | 912 | make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len); |
902 | 913 | ||
903 | log_print("connecting to %d", con->nodeid); | 914 | log_print("connecting to %d", con->nodeid); |
@@ -1426,6 +1437,8 @@ void dlm_lowcomms_stop(void) | |||
1426 | con = __nodeid2con(i, 0); | 1437 | con = __nodeid2con(i, 0); |
1427 | if (con) { | 1438 | if (con) { |
1428 | close_connection(con, true); | 1439 | close_connection(con, true); |
1440 | if (con->othercon) | ||
1441 | kmem_cache_free(con_cache, con->othercon); | ||
1429 | kmem_cache_free(con_cache, con); | 1442 | kmem_cache_free(con_cache, con); |
1430 | } | 1443 | } |
1431 | } | 1444 | } |
diff --git a/fs/dlm/main.c b/fs/dlm/main.c index eca2907f2386..58487fb95a4c 100644 --- a/fs/dlm/main.c +++ b/fs/dlm/main.c | |||
@@ -18,16 +18,6 @@ | |||
18 | #include "memory.h" | 18 | #include "memory.h" |
19 | #include "config.h" | 19 | #include "config.h" |
20 | 20 | ||
21 | #ifdef CONFIG_DLM_DEBUG | ||
22 | int dlm_register_debugfs(void); | ||
23 | void dlm_unregister_debugfs(void); | ||
24 | #else | ||
25 | static inline int dlm_register_debugfs(void) { return 0; } | ||
26 | static inline void dlm_unregister_debugfs(void) { } | ||
27 | #endif | ||
28 | int dlm_netlink_init(void); | ||
29 | void dlm_netlink_exit(void); | ||
30 | |||
31 | static int __init init_dlm(void) | 21 | static int __init init_dlm(void) |
32 | { | 22 | { |
33 | int error; | 23 | int error; |
diff --git a/fs/dlm/member.c b/fs/dlm/member.c index e9cdcab306e2..fa17f5a27883 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. | 4 | ** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. |
5 | ** | 5 | ** |
6 | ** This copyrighted material is made available to anyone wishing to use, | 6 | ** This copyrighted material is made available to anyone wishing to use, |
7 | ** modify, copy, or redistribute it subject to the terms and conditions | 7 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -70,7 +70,7 @@ static void dlm_remove_member(struct dlm_ls *ls, struct dlm_member *memb) | |||
70 | ls->ls_num_nodes--; | 70 | ls->ls_num_nodes--; |
71 | } | 71 | } |
72 | 72 | ||
73 | static int dlm_is_member(struct dlm_ls *ls, int nodeid) | 73 | int dlm_is_member(struct dlm_ls *ls, int nodeid) |
74 | { | 74 | { |
75 | struct dlm_member *memb; | 75 | struct dlm_member *memb; |
76 | 76 | ||
diff --git a/fs/dlm/member.h b/fs/dlm/member.h index 927c08c19214..7a26fca1e0b5 100644 --- a/fs/dlm/member.h +++ b/fs/dlm/member.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | 4 | ** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. |
5 | ** | 5 | ** |
6 | ** This copyrighted material is made available to anyone wishing to use, | 6 | ** This copyrighted material is made available to anyone wishing to use, |
7 | ** modify, copy, or redistribute it subject to the terms and conditions | 7 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -19,6 +19,7 @@ void dlm_clear_members(struct dlm_ls *ls); | |||
19 | void dlm_clear_members_gone(struct dlm_ls *ls); | 19 | void dlm_clear_members_gone(struct dlm_ls *ls); |
20 | int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv,int *neg_out); | 20 | int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv,int *neg_out); |
21 | int dlm_is_removed(struct dlm_ls *ls, int nodeid); | 21 | int dlm_is_removed(struct dlm_ls *ls, int nodeid); |
22 | int dlm_is_member(struct dlm_ls *ls, int nodeid); | ||
22 | 23 | ||
23 | #endif /* __MEMBER_DOT_H__ */ | 24 | #endif /* __MEMBER_DOT_H__ */ |
24 | 25 | ||
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c index ecf0e5cb2035..f7783867491a 100644 --- a/fs/dlm/memory.c +++ b/fs/dlm/memory.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -35,7 +35,7 @@ void dlm_memory_exit(void) | |||
35 | kmem_cache_destroy(lkb_cache); | 35 | kmem_cache_destroy(lkb_cache); |
36 | } | 36 | } |
37 | 37 | ||
38 | char *allocate_lvb(struct dlm_ls *ls) | 38 | char *dlm_allocate_lvb(struct dlm_ls *ls) |
39 | { | 39 | { |
40 | char *p; | 40 | char *p; |
41 | 41 | ||
@@ -43,7 +43,7 @@ char *allocate_lvb(struct dlm_ls *ls) | |||
43 | return p; | 43 | return p; |
44 | } | 44 | } |
45 | 45 | ||
46 | void free_lvb(char *p) | 46 | void dlm_free_lvb(char *p) |
47 | { | 47 | { |
48 | kfree(p); | 48 | kfree(p); |
49 | } | 49 | } |
@@ -51,7 +51,7 @@ void free_lvb(char *p) | |||
51 | /* FIXME: have some minimal space built-in to rsb for the name and | 51 | /* FIXME: have some minimal space built-in to rsb for the name and |
52 | kmalloc a separate name if needed, like dentries are done */ | 52 | kmalloc a separate name if needed, like dentries are done */ |
53 | 53 | ||
54 | struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen) | 54 | struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen) |
55 | { | 55 | { |
56 | struct dlm_rsb *r; | 56 | struct dlm_rsb *r; |
57 | 57 | ||
@@ -61,14 +61,14 @@ struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen) | |||
61 | return r; | 61 | return r; |
62 | } | 62 | } |
63 | 63 | ||
64 | void free_rsb(struct dlm_rsb *r) | 64 | void dlm_free_rsb(struct dlm_rsb *r) |
65 | { | 65 | { |
66 | if (r->res_lvbptr) | 66 | if (r->res_lvbptr) |
67 | free_lvb(r->res_lvbptr); | 67 | dlm_free_lvb(r->res_lvbptr); |
68 | kfree(r); | 68 | kfree(r); |
69 | } | 69 | } |
70 | 70 | ||
71 | struct dlm_lkb *allocate_lkb(struct dlm_ls *ls) | 71 | struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls) |
72 | { | 72 | { |
73 | struct dlm_lkb *lkb; | 73 | struct dlm_lkb *lkb; |
74 | 74 | ||
@@ -76,7 +76,7 @@ struct dlm_lkb *allocate_lkb(struct dlm_ls *ls) | |||
76 | return lkb; | 76 | return lkb; |
77 | } | 77 | } |
78 | 78 | ||
79 | void free_lkb(struct dlm_lkb *lkb) | 79 | void dlm_free_lkb(struct dlm_lkb *lkb) |
80 | { | 80 | { |
81 | if (lkb->lkb_flags & DLM_IFL_USER) { | 81 | if (lkb->lkb_flags & DLM_IFL_USER) { |
82 | struct dlm_user_args *ua; | 82 | struct dlm_user_args *ua; |
@@ -90,19 +90,3 @@ void free_lkb(struct dlm_lkb *lkb) | |||
90 | kmem_cache_free(lkb_cache, lkb); | 90 | kmem_cache_free(lkb_cache, lkb); |
91 | } | 91 | } |
92 | 92 | ||
93 | struct dlm_direntry *allocate_direntry(struct dlm_ls *ls, int namelen) | ||
94 | { | ||
95 | struct dlm_direntry *de; | ||
96 | |||
97 | DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN, | ||
98 | printk("namelen = %d\n", namelen);); | ||
99 | |||
100 | de = kzalloc(sizeof(*de) + namelen, GFP_KERNEL); | ||
101 | return de; | ||
102 | } | ||
103 | |||
104 | void free_direntry(struct dlm_direntry *de) | ||
105 | { | ||
106 | kfree(de); | ||
107 | } | ||
108 | |||
diff --git a/fs/dlm/memory.h b/fs/dlm/memory.h index 6ead158ccc5c..485fb29143bd 100644 --- a/fs/dlm/memory.h +++ b/fs/dlm/memory.h | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -16,14 +16,12 @@ | |||
16 | 16 | ||
17 | int dlm_memory_init(void); | 17 | int dlm_memory_init(void); |
18 | void dlm_memory_exit(void); | 18 | void dlm_memory_exit(void); |
19 | struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen); | 19 | struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen); |
20 | void free_rsb(struct dlm_rsb *r); | 20 | void dlm_free_rsb(struct dlm_rsb *r); |
21 | struct dlm_lkb *allocate_lkb(struct dlm_ls *ls); | 21 | struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls); |
22 | void free_lkb(struct dlm_lkb *l); | 22 | void dlm_free_lkb(struct dlm_lkb *l); |
23 | struct dlm_direntry *allocate_direntry(struct dlm_ls *ls, int namelen); | 23 | char *dlm_allocate_lvb(struct dlm_ls *ls); |
24 | void free_direntry(struct dlm_direntry *de); | 24 | void dlm_free_lvb(char *l); |
25 | char *allocate_lvb(struct dlm_ls *ls); | ||
26 | void free_lvb(char *l); | ||
27 | 25 | ||
28 | #endif /* __MEMORY_DOT_H__ */ | 26 | #endif /* __MEMORY_DOT_H__ */ |
29 | 27 | ||
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c index f8c69dda16a0..e69926e984db 100644 --- a/fs/dlm/midcomms.c +++ b/fs/dlm/midcomms.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -58,8 +58,12 @@ static void copy_from_cb(void *dst, const void *base, unsigned offset, | |||
58 | int dlm_process_incoming_buffer(int nodeid, const void *base, | 58 | int dlm_process_incoming_buffer(int nodeid, const void *base, |
59 | unsigned offset, unsigned len, unsigned limit) | 59 | unsigned offset, unsigned len, unsigned limit) |
60 | { | 60 | { |
61 | unsigned char __tmp[DLM_INBUF_LEN]; | 61 | union { |
62 | struct dlm_header *msg = (struct dlm_header *) __tmp; | 62 | unsigned char __buf[DLM_INBUF_LEN]; |
63 | /* this is to force proper alignment on some arches */ | ||
64 | struct dlm_header dlm; | ||
65 | } __tmp; | ||
66 | struct dlm_header *msg = &__tmp.dlm; | ||
63 | int ret = 0; | 67 | int ret = 0; |
64 | int err = 0; | 68 | int err = 0; |
65 | uint16_t msglen; | 69 | uint16_t msglen; |
@@ -100,8 +104,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base, | |||
100 | in the buffer on the stack (which should work for most | 104 | in the buffer on the stack (which should work for most |
101 | ordinary messages). */ | 105 | ordinary messages). */ |
102 | 106 | ||
103 | if (msglen > sizeof(__tmp) && | 107 | if (msglen > DLM_INBUF_LEN && msg == &__tmp.dlm) { |
104 | msg == (struct dlm_header *) __tmp) { | ||
105 | msg = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL); | 108 | msg = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL); |
106 | if (msg == NULL) | 109 | if (msg == NULL) |
107 | return ret; | 110 | return ret; |
@@ -119,7 +122,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base, | |||
119 | dlm_receive_buffer(msg, nodeid); | 122 | dlm_receive_buffer(msg, nodeid); |
120 | } | 123 | } |
121 | 124 | ||
122 | if (msg != (struct dlm_header *) __tmp) | 125 | if (msg != &__tmp.dlm) |
123 | kfree(msg); | 126 | kfree(msg); |
124 | 127 | ||
125 | return err ? err : ret; | 128 | return err ? err : ret; |
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index ae2fd97fa4ad..026824cd3acb 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -197,11 +197,6 @@ static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) | |||
197 | spin_unlock(&ls->ls_rcom_spin); | 197 | spin_unlock(&ls->ls_rcom_spin); |
198 | } | 198 | } |
199 | 199 | ||
200 | static void receive_rcom_status_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
201 | { | ||
202 | receive_sync_reply(ls, rc_in); | ||
203 | } | ||
204 | |||
205 | int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) | 200 | int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) |
206 | { | 201 | { |
207 | struct dlm_rcom *rc; | 202 | struct dlm_rcom *rc; |
@@ -254,11 +249,6 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in) | |||
254 | send_rcom(ls, mh, rc); | 249 | send_rcom(ls, mh, rc); |
255 | } | 250 | } |
256 | 251 | ||
257 | static void receive_rcom_names_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
258 | { | ||
259 | receive_sync_reply(ls, rc_in); | ||
260 | } | ||
261 | |||
262 | int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid) | 252 | int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid) |
263 | { | 253 | { |
264 | struct dlm_rcom *rc; | 254 | struct dlm_rcom *rc; |
@@ -381,11 +371,6 @@ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in) | |||
381 | send_rcom(ls, mh, rc); | 371 | send_rcom(ls, mh, rc); |
382 | } | 372 | } |
383 | 373 | ||
384 | static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) | ||
385 | { | ||
386 | dlm_recover_process_copy(ls, rc_in); | ||
387 | } | ||
388 | |||
389 | /* If the lockspace doesn't exist then still send a status message | 374 | /* If the lockspace doesn't exist then still send a status message |
390 | back; it's possible that it just doesn't have its global_id yet. */ | 375 | back; it's possible that it just doesn't have its global_id yet. */ |
391 | 376 | ||
@@ -481,11 +466,11 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) | |||
481 | break; | 466 | break; |
482 | 467 | ||
483 | case DLM_RCOM_STATUS_REPLY: | 468 | case DLM_RCOM_STATUS_REPLY: |
484 | receive_rcom_status_reply(ls, rc); | 469 | receive_sync_reply(ls, rc); |
485 | break; | 470 | break; |
486 | 471 | ||
487 | case DLM_RCOM_NAMES_REPLY: | 472 | case DLM_RCOM_NAMES_REPLY: |
488 | receive_rcom_names_reply(ls, rc); | 473 | receive_sync_reply(ls, rc); |
489 | break; | 474 | break; |
490 | 475 | ||
491 | case DLM_RCOM_LOOKUP_REPLY: | 476 | case DLM_RCOM_LOOKUP_REPLY: |
@@ -493,11 +478,11 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) | |||
493 | break; | 478 | break; |
494 | 479 | ||
495 | case DLM_RCOM_LOCK_REPLY: | 480 | case DLM_RCOM_LOCK_REPLY: |
496 | receive_rcom_lock_reply(ls, rc); | 481 | dlm_recover_process_copy(ls, rc); |
497 | break; | 482 | break; |
498 | 483 | ||
499 | default: | 484 | default: |
500 | DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type);); | 485 | log_error(ls, "receive_rcom bad type %d", rc->rc_type); |
501 | } | 486 | } |
502 | out: | 487 | out: |
503 | return; | 488 | return; |
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index c2cc7694cd16..df075dc300fa 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c | |||
@@ -629,7 +629,7 @@ static void recover_lvb(struct dlm_rsb *r) | |||
629 | goto out; | 629 | goto out; |
630 | 630 | ||
631 | if (!r->res_lvbptr) { | 631 | if (!r->res_lvbptr) { |
632 | r->res_lvbptr = allocate_lvb(r->res_ls); | 632 | r->res_lvbptr = dlm_allocate_lvb(r->res_ls); |
633 | if (!r->res_lvbptr) | 633 | if (!r->res_lvbptr) |
634 | goto out; | 634 | goto out; |
635 | } | 635 | } |
@@ -731,6 +731,20 @@ int dlm_create_root_list(struct dlm_ls *ls) | |||
731 | list_add(&r->res_root_list, &ls->ls_root_list); | 731 | list_add(&r->res_root_list, &ls->ls_root_list); |
732 | dlm_hold_rsb(r); | 732 | dlm_hold_rsb(r); |
733 | } | 733 | } |
734 | |||
735 | /* If we're using a directory, add tossed rsbs to the root | ||
736 | list; they'll have entries created in the new directory, | ||
737 | but no other recovery steps should do anything with them. */ | ||
738 | |||
739 | if (dlm_no_directory(ls)) { | ||
740 | read_unlock(&ls->ls_rsbtbl[i].lock); | ||
741 | continue; | ||
742 | } | ||
743 | |||
744 | list_for_each_entry(r, &ls->ls_rsbtbl[i].toss, res_hashchain) { | ||
745 | list_add(&r->res_root_list, &ls->ls_root_list); | ||
746 | dlm_hold_rsb(r); | ||
747 | } | ||
734 | read_unlock(&ls->ls_rsbtbl[i].lock); | 748 | read_unlock(&ls->ls_rsbtbl[i].lock); |
735 | } | 749 | } |
736 | out: | 750 | out: |
@@ -750,6 +764,11 @@ void dlm_release_root_list(struct dlm_ls *ls) | |||
750 | up_write(&ls->ls_root_sem); | 764 | up_write(&ls->ls_root_sem); |
751 | } | 765 | } |
752 | 766 | ||
767 | /* If not using a directory, clear the entire toss list, there's no benefit to | ||
768 | caching the master value since it's fixed. If we are using a dir, keep the | ||
769 | rsb's we're the master of. Recovery will add them to the root list and from | ||
770 | there they'll be entered in the rebuilt directory. */ | ||
771 | |||
753 | void dlm_clear_toss_list(struct dlm_ls *ls) | 772 | void dlm_clear_toss_list(struct dlm_ls *ls) |
754 | { | 773 | { |
755 | struct dlm_rsb *r, *safe; | 774 | struct dlm_rsb *r, *safe; |
@@ -759,8 +778,10 @@ void dlm_clear_toss_list(struct dlm_ls *ls) | |||
759 | write_lock(&ls->ls_rsbtbl[i].lock); | 778 | write_lock(&ls->ls_rsbtbl[i].lock); |
760 | list_for_each_entry_safe(r, safe, &ls->ls_rsbtbl[i].toss, | 779 | list_for_each_entry_safe(r, safe, &ls->ls_rsbtbl[i].toss, |
761 | res_hashchain) { | 780 | res_hashchain) { |
762 | list_del(&r->res_hashchain); | 781 | if (dlm_no_directory(ls) || !is_master(r)) { |
763 | free_rsb(r); | 782 | list_del(&r->res_hashchain); |
783 | dlm_free_rsb(r); | ||
784 | } | ||
764 | } | 785 | } |
765 | write_unlock(&ls->ls_rsbtbl[i].lock); | 786 | write_unlock(&ls->ls_rsbtbl[i].lock); |
766 | } | 787 | } |
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 4b89e20eebe7..997f9531d594 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c | |||
@@ -67,17 +67,18 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
67 | dlm_astd_resume(); | 67 | dlm_astd_resume(); |
68 | 68 | ||
69 | /* | 69 | /* |
70 | * This list of root rsb's will be the basis of most of the recovery | 70 | * Free non-master tossed rsb's. Master rsb's are kept on toss |
71 | * routines. | 71 | * list and put on root list to be included in resdir recovery. |
72 | */ | 72 | */ |
73 | 73 | ||
74 | dlm_create_root_list(ls); | 74 | dlm_clear_toss_list(ls); |
75 | 75 | ||
76 | /* | 76 | /* |
77 | * Free all the tossed rsb's so we don't have to recover them. | 77 | * This list of root rsb's will be the basis of most of the recovery |
78 | * routines. | ||
78 | */ | 79 | */ |
79 | 80 | ||
80 | dlm_clear_toss_list(ls); | 81 | dlm_create_root_list(ls); |
81 | 82 | ||
82 | /* | 83 | /* |
83 | * Add or remove nodes from the lockspace's ls_nodes list. | 84 | * Add or remove nodes from the lockspace's ls_nodes list. |
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 4f741546f4bb..7cbc6826239b 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
@@ -24,8 +24,7 @@ | |||
24 | #include "lvb_table.h" | 24 | #include "lvb_table.h" |
25 | #include "user.h" | 25 | #include "user.h" |
26 | 26 | ||
27 | static const char *name_prefix="dlm"; | 27 | static const char name_prefix[] = "dlm"; |
28 | static struct miscdevice ctl_device; | ||
29 | static const struct file_operations device_fops; | 28 | static const struct file_operations device_fops; |
30 | 29 | ||
31 | #ifdef CONFIG_COMPAT | 30 | #ifdef CONFIG_COMPAT |
@@ -82,7 +81,8 @@ struct dlm_lock_result32 { | |||
82 | }; | 81 | }; |
83 | 82 | ||
84 | static void compat_input(struct dlm_write_request *kb, | 83 | static void compat_input(struct dlm_write_request *kb, |
85 | struct dlm_write_request32 *kb32) | 84 | struct dlm_write_request32 *kb32, |
85 | int max_namelen) | ||
86 | { | 86 | { |
87 | kb->version[0] = kb32->version[0]; | 87 | kb->version[0] = kb32->version[0]; |
88 | kb->version[1] = kb32->version[1]; | 88 | kb->version[1] = kb32->version[1]; |
@@ -112,7 +112,11 @@ static void compat_input(struct dlm_write_request *kb, | |||
112 | kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr; | 112 | kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr; |
113 | kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb; | 113 | kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb; |
114 | memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN); | 114 | memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN); |
115 | memcpy(kb->i.lock.name, kb32->i.lock.name, kb->i.lock.namelen); | 115 | if (kb->i.lock.namelen <= max_namelen) |
116 | memcpy(kb->i.lock.name, kb32->i.lock.name, | ||
117 | kb->i.lock.namelen); | ||
118 | else | ||
119 | kb->i.lock.namelen = max_namelen; | ||
116 | } | 120 | } |
117 | } | 121 | } |
118 | 122 | ||
@@ -236,12 +240,12 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type) | |||
236 | spin_unlock(&proc->asts_spin); | 240 | spin_unlock(&proc->asts_spin); |
237 | 241 | ||
238 | if (eol) { | 242 | if (eol) { |
239 | spin_lock(&ua->proc->locks_spin); | 243 | spin_lock(&proc->locks_spin); |
240 | if (!list_empty(&lkb->lkb_ownqueue)) { | 244 | if (!list_empty(&lkb->lkb_ownqueue)) { |
241 | list_del_init(&lkb->lkb_ownqueue); | 245 | list_del_init(&lkb->lkb_ownqueue); |
242 | dlm_put_lkb(lkb); | 246 | dlm_put_lkb(lkb); |
243 | } | 247 | } |
244 | spin_unlock(&ua->proc->locks_spin); | 248 | spin_unlock(&proc->locks_spin); |
245 | } | 249 | } |
246 | out: | 250 | out: |
247 | mutex_unlock(&ls->ls_clear_proc_locks); | 251 | mutex_unlock(&ls->ls_clear_proc_locks); |
@@ -529,7 +533,8 @@ static ssize_t device_write(struct file *file, const char __user *buf, | |||
529 | 533 | ||
530 | if (proc) | 534 | if (proc) |
531 | set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); | 535 | set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); |
532 | compat_input(kbuf, k32buf); | 536 | compat_input(kbuf, k32buf, |
537 | count - sizeof(struct dlm_write_request32)); | ||
533 | kfree(k32buf); | 538 | kfree(k32buf); |
534 | } | 539 | } |
535 | #endif | 540 | #endif |
@@ -896,14 +901,16 @@ static const struct file_operations ctl_device_fops = { | |||
896 | .owner = THIS_MODULE, | 901 | .owner = THIS_MODULE, |
897 | }; | 902 | }; |
898 | 903 | ||
904 | static struct miscdevice ctl_device = { | ||
905 | .name = "dlm-control", | ||
906 | .fops = &ctl_device_fops, | ||
907 | .minor = MISC_DYNAMIC_MINOR, | ||
908 | }; | ||
909 | |||
899 | int dlm_user_init(void) | 910 | int dlm_user_init(void) |
900 | { | 911 | { |
901 | int error; | 912 | int error; |
902 | 913 | ||
903 | ctl_device.name = "dlm-control"; | ||
904 | ctl_device.fops = &ctl_device_fops; | ||
905 | ctl_device.minor = MISC_DYNAMIC_MINOR; | ||
906 | |||
907 | error = misc_register(&ctl_device); | 914 | error = misc_register(&ctl_device); |
908 | if (error) | 915 | if (error) |
909 | log_print("misc_register failed for control device"); | 916 | log_print("misc_register failed for control device"); |
diff --git a/fs/dlm/util.c b/fs/dlm/util.c index 963889cf6740..4d9c1f4e1bd1 100644 --- a/fs/dlm/util.c +++ b/fs/dlm/util.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. | 4 | ** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. |
5 | ** | 5 | ** |
6 | ** This copyrighted material is made available to anyone wishing to use, | 6 | ** This copyrighted material is made available to anyone wishing to use, |
7 | ** modify, copy, or redistribute it subject to the terms and conditions | 7 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -14,6 +14,14 @@ | |||
14 | #include "rcom.h" | 14 | #include "rcom.h" |
15 | #include "util.h" | 15 | #include "util.h" |
16 | 16 | ||
17 | #define DLM_ERRNO_EDEADLK 35 | ||
18 | #define DLM_ERRNO_EBADR 53 | ||
19 | #define DLM_ERRNO_EBADSLT 57 | ||
20 | #define DLM_ERRNO_EPROTO 71 | ||
21 | #define DLM_ERRNO_EOPNOTSUPP 95 | ||
22 | #define DLM_ERRNO_ETIMEDOUT 110 | ||
23 | #define DLM_ERRNO_EINPROGRESS 115 | ||
24 | |||
17 | static void header_out(struct dlm_header *hd) | 25 | static void header_out(struct dlm_header *hd) |
18 | { | 26 | { |
19 | hd->h_version = cpu_to_le32(hd->h_version); | 27 | hd->h_version = cpu_to_le32(hd->h_version); |
@@ -30,11 +38,54 @@ static void header_in(struct dlm_header *hd) | |||
30 | hd->h_length = le16_to_cpu(hd->h_length); | 38 | hd->h_length = le16_to_cpu(hd->h_length); |
31 | } | 39 | } |
32 | 40 | ||
33 | void dlm_message_out(struct dlm_message *ms) | 41 | /* higher errno values are inconsistent across architectures, so select |
42 | one set of values for on the wire */ | ||
43 | |||
44 | static int to_dlm_errno(int err) | ||
45 | { | ||
46 | switch (err) { | ||
47 | case -EDEADLK: | ||
48 | return -DLM_ERRNO_EDEADLK; | ||
49 | case -EBADR: | ||
50 | return -DLM_ERRNO_EBADR; | ||
51 | case -EBADSLT: | ||
52 | return -DLM_ERRNO_EBADSLT; | ||
53 | case -EPROTO: | ||
54 | return -DLM_ERRNO_EPROTO; | ||
55 | case -EOPNOTSUPP: | ||
56 | return -DLM_ERRNO_EOPNOTSUPP; | ||
57 | case -ETIMEDOUT: | ||
58 | return -DLM_ERRNO_ETIMEDOUT; | ||
59 | case -EINPROGRESS: | ||
60 | return -DLM_ERRNO_EINPROGRESS; | ||
61 | } | ||
62 | return err; | ||
63 | } | ||
64 | |||
65 | static int from_dlm_errno(int err) | ||
34 | { | 66 | { |
35 | struct dlm_header *hd = (struct dlm_header *) ms; | 67 | switch (err) { |
68 | case -DLM_ERRNO_EDEADLK: | ||
69 | return -EDEADLK; | ||
70 | case -DLM_ERRNO_EBADR: | ||
71 | return -EBADR; | ||
72 | case -DLM_ERRNO_EBADSLT: | ||
73 | return -EBADSLT; | ||
74 | case -DLM_ERRNO_EPROTO: | ||
75 | return -EPROTO; | ||
76 | case -DLM_ERRNO_EOPNOTSUPP: | ||
77 | return -EOPNOTSUPP; | ||
78 | case -DLM_ERRNO_ETIMEDOUT: | ||
79 | return -ETIMEDOUT; | ||
80 | case -DLM_ERRNO_EINPROGRESS: | ||
81 | return -EINPROGRESS; | ||
82 | } | ||
83 | return err; | ||
84 | } | ||
36 | 85 | ||
37 | header_out(hd); | 86 | void dlm_message_out(struct dlm_message *ms) |
87 | { | ||
88 | header_out(&ms->m_header); | ||
38 | 89 | ||
39 | ms->m_type = cpu_to_le32(ms->m_type); | 90 | ms->m_type = cpu_to_le32(ms->m_type); |
40 | ms->m_nodeid = cpu_to_le32(ms->m_nodeid); | 91 | ms->m_nodeid = cpu_to_le32(ms->m_nodeid); |
@@ -53,14 +104,12 @@ void dlm_message_out(struct dlm_message *ms) | |||
53 | ms->m_rqmode = cpu_to_le32(ms->m_rqmode); | 104 | ms->m_rqmode = cpu_to_le32(ms->m_rqmode); |
54 | ms->m_bastmode = cpu_to_le32(ms->m_bastmode); | 105 | ms->m_bastmode = cpu_to_le32(ms->m_bastmode); |
55 | ms->m_asts = cpu_to_le32(ms->m_asts); | 106 | ms->m_asts = cpu_to_le32(ms->m_asts); |
56 | ms->m_result = cpu_to_le32(ms->m_result); | 107 | ms->m_result = cpu_to_le32(to_dlm_errno(ms->m_result)); |
57 | } | 108 | } |
58 | 109 | ||
59 | void dlm_message_in(struct dlm_message *ms) | 110 | void dlm_message_in(struct dlm_message *ms) |
60 | { | 111 | { |
61 | struct dlm_header *hd = (struct dlm_header *) ms; | 112 | header_in(&ms->m_header); |
62 | |||
63 | header_in(hd); | ||
64 | 113 | ||
65 | ms->m_type = le32_to_cpu(ms->m_type); | 114 | ms->m_type = le32_to_cpu(ms->m_type); |
66 | ms->m_nodeid = le32_to_cpu(ms->m_nodeid); | 115 | ms->m_nodeid = le32_to_cpu(ms->m_nodeid); |
@@ -79,7 +128,7 @@ void dlm_message_in(struct dlm_message *ms) | |||
79 | ms->m_rqmode = le32_to_cpu(ms->m_rqmode); | 128 | ms->m_rqmode = le32_to_cpu(ms->m_rqmode); |
80 | ms->m_bastmode = le32_to_cpu(ms->m_bastmode); | 129 | ms->m_bastmode = le32_to_cpu(ms->m_bastmode); |
81 | ms->m_asts = le32_to_cpu(ms->m_asts); | 130 | ms->m_asts = le32_to_cpu(ms->m_asts); |
82 | ms->m_result = le32_to_cpu(ms->m_result); | 131 | ms->m_result = from_dlm_errno(le32_to_cpu(ms->m_result)); |
83 | } | 132 | } |
84 | 133 | ||
85 | static void rcom_lock_out(struct rcom_lock *rl) | 134 | static void rcom_lock_out(struct rcom_lock *rl) |
@@ -126,10 +175,9 @@ static void rcom_config_in(struct rcom_config *rf) | |||
126 | 175 | ||
127 | void dlm_rcom_out(struct dlm_rcom *rc) | 176 | void dlm_rcom_out(struct dlm_rcom *rc) |
128 | { | 177 | { |
129 | struct dlm_header *hd = (struct dlm_header *) rc; | ||
130 | int type = rc->rc_type; | 178 | int type = rc->rc_type; |
131 | 179 | ||
132 | header_out(hd); | 180 | header_out(&rc->rc_header); |
133 | 181 | ||
134 | rc->rc_type = cpu_to_le32(rc->rc_type); | 182 | rc->rc_type = cpu_to_le32(rc->rc_type); |
135 | rc->rc_result = cpu_to_le32(rc->rc_result); | 183 | rc->rc_result = cpu_to_le32(rc->rc_result); |
@@ -137,7 +185,7 @@ void dlm_rcom_out(struct dlm_rcom *rc) | |||
137 | rc->rc_seq = cpu_to_le64(rc->rc_seq); | 185 | rc->rc_seq = cpu_to_le64(rc->rc_seq); |
138 | rc->rc_seq_reply = cpu_to_le64(rc->rc_seq_reply); | 186 | rc->rc_seq_reply = cpu_to_le64(rc->rc_seq_reply); |
139 | 187 | ||
140 | if (type == DLM_RCOM_LOCK) | 188 | if ((type == DLM_RCOM_LOCK) || (type == DLM_RCOM_LOCK_REPLY)) |
141 | rcom_lock_out((struct rcom_lock *) rc->rc_buf); | 189 | rcom_lock_out((struct rcom_lock *) rc->rc_buf); |
142 | 190 | ||
143 | else if (type == DLM_RCOM_STATUS_REPLY) | 191 | else if (type == DLM_RCOM_STATUS_REPLY) |
@@ -146,9 +194,9 @@ void dlm_rcom_out(struct dlm_rcom *rc) | |||
146 | 194 | ||
147 | void dlm_rcom_in(struct dlm_rcom *rc) | 195 | void dlm_rcom_in(struct dlm_rcom *rc) |
148 | { | 196 | { |
149 | struct dlm_header *hd = (struct dlm_header *) rc; | 197 | int type; |
150 | 198 | ||
151 | header_in(hd); | 199 | header_in(&rc->rc_header); |
152 | 200 | ||
153 | rc->rc_type = le32_to_cpu(rc->rc_type); | 201 | rc->rc_type = le32_to_cpu(rc->rc_type); |
154 | rc->rc_result = le32_to_cpu(rc->rc_result); | 202 | rc->rc_result = le32_to_cpu(rc->rc_result); |
@@ -156,10 +204,12 @@ void dlm_rcom_in(struct dlm_rcom *rc) | |||
156 | rc->rc_seq = le64_to_cpu(rc->rc_seq); | 204 | rc->rc_seq = le64_to_cpu(rc->rc_seq); |
157 | rc->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply); | 205 | rc->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply); |
158 | 206 | ||
159 | if (rc->rc_type == DLM_RCOM_LOCK) | 207 | type = rc->rc_type; |
208 | |||
209 | if ((type == DLM_RCOM_LOCK) || (type == DLM_RCOM_LOCK_REPLY)) | ||
160 | rcom_lock_in((struct rcom_lock *) rc->rc_buf); | 210 | rcom_lock_in((struct rcom_lock *) rc->rc_buf); |
161 | 211 | ||
162 | else if (rc->rc_type == DLM_RCOM_STATUS_REPLY) | 212 | else if (type == DLM_RCOM_STATUS_REPLY) |
163 | rcom_config_in((struct rcom_config *) rc->rc_buf); | 213 | rcom_config_in((struct rcom_config *) rc->rc_buf); |
164 | } | 214 | } |
165 | 215 | ||
diff --git a/fs/dquot.c b/fs/dquot.c index 2809768d9c41..cee7c6f428f0 100644 --- a/fs/dquot.c +++ b/fs/dquot.c | |||
@@ -827,6 +827,18 @@ static inline void dquot_decr_space(struct dquot *dquot, qsize_t number) | |||
827 | clear_bit(DQ_BLKS_B, &dquot->dq_flags); | 827 | clear_bit(DQ_BLKS_B, &dquot->dq_flags); |
828 | } | 828 | } |
829 | 829 | ||
830 | static int warning_issued(struct dquot *dquot, const int warntype) | ||
831 | { | ||
832 | int flag = (warntype == QUOTA_NL_BHARDWARN || | ||
833 | warntype == QUOTA_NL_BSOFTLONGWARN) ? DQ_BLKS_B : | ||
834 | ((warntype == QUOTA_NL_IHARDWARN || | ||
835 | warntype == QUOTA_NL_ISOFTLONGWARN) ? DQ_INODES_B : 0); | ||
836 | |||
837 | if (!flag) | ||
838 | return 0; | ||
839 | return test_and_set_bit(flag, &dquot->dq_flags); | ||
840 | } | ||
841 | |||
830 | #ifdef CONFIG_PRINT_QUOTA_WARNING | 842 | #ifdef CONFIG_PRINT_QUOTA_WARNING |
831 | static int flag_print_warnings = 1; | 843 | static int flag_print_warnings = 1; |
832 | 844 | ||
@@ -845,16 +857,12 @@ static inline int need_print_warning(struct dquot *dquot) | |||
845 | } | 857 | } |
846 | 858 | ||
847 | /* Print warning to user which exceeded quota */ | 859 | /* Print warning to user which exceeded quota */ |
848 | static void print_warning(struct dquot *dquot, const char warntype) | 860 | static void print_warning(struct dquot *dquot, const int warntype) |
849 | { | 861 | { |
850 | char *msg = NULL; | 862 | char *msg = NULL; |
851 | struct tty_struct *tty; | 863 | struct tty_struct *tty; |
852 | int flag = (warntype == QUOTA_NL_BHARDWARN || | ||
853 | warntype == QUOTA_NL_BSOFTLONGWARN) ? DQ_BLKS_B : | ||
854 | ((warntype == QUOTA_NL_IHARDWARN || | ||
855 | warntype == QUOTA_NL_ISOFTLONGWARN) ? DQ_INODES_B : 0); | ||
856 | 864 | ||
857 | if (!need_print_warning(dquot) || (flag && test_and_set_bit(flag, &dquot->dq_flags))) | 865 | if (!need_print_warning(dquot)) |
858 | return; | 866 | return; |
859 | 867 | ||
860 | mutex_lock(&tty_mutex); | 868 | mutex_lock(&tty_mutex); |
@@ -895,9 +903,6 @@ out_lock: | |||
895 | 903 | ||
896 | #ifdef CONFIG_QUOTA_NETLINK_INTERFACE | 904 | #ifdef CONFIG_QUOTA_NETLINK_INTERFACE |
897 | 905 | ||
898 | /* Size of quota netlink message - actually an upperbound for buffer size */ | ||
899 | #define QUOTA_NL_MSG_SIZE 32 | ||
900 | |||
901 | /* Netlink family structure for quota */ | 906 | /* Netlink family structure for quota */ |
902 | static struct genl_family quota_genl_family = { | 907 | static struct genl_family quota_genl_family = { |
903 | .id = GENL_ID_GENERATE, | 908 | .id = GENL_ID_GENERATE, |
@@ -914,11 +919,13 @@ static void send_warning(const struct dquot *dquot, const char warntype) | |||
914 | struct sk_buff *skb; | 919 | struct sk_buff *skb; |
915 | void *msg_head; | 920 | void *msg_head; |
916 | int ret; | 921 | int ret; |
922 | int msg_size = 4 * nla_total_size(sizeof(u32)) + | ||
923 | 2 * nla_total_size(sizeof(u64)); | ||
917 | 924 | ||
918 | /* We have to allocate using GFP_NOFS as we are called from a | 925 | /* We have to allocate using GFP_NOFS as we are called from a |
919 | * filesystem performing write and thus further recursion into | 926 | * filesystem performing write and thus further recursion into |
920 | * the fs to free some data could cause deadlocks. */ | 927 | * the fs to free some data could cause deadlocks. */ |
921 | skb = genlmsg_new(QUOTA_NL_MSG_SIZE, GFP_NOFS); | 928 | skb = genlmsg_new(msg_size, GFP_NOFS); |
922 | if (!skb) { | 929 | if (!skb) { |
923 | printk(KERN_ERR | 930 | printk(KERN_ERR |
924 | "VFS: Not enough memory to send quota warning.\n"); | 931 | "VFS: Not enough memory to send quota warning.\n"); |
@@ -959,18 +966,19 @@ static void send_warning(const struct dquot *dquot, const char warntype) | |||
959 | "VFS: Failed to send notification message: %d\n", ret); | 966 | "VFS: Failed to send notification message: %d\n", ret); |
960 | return; | 967 | return; |
961 | attr_err_out: | 968 | attr_err_out: |
962 | printk(KERN_ERR "VFS: Failed to compose quota message: %d\n", ret); | 969 | printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); |
963 | err_out: | 970 | err_out: |
964 | kfree_skb(skb); | 971 | kfree_skb(skb); |
965 | } | 972 | } |
966 | #endif | 973 | #endif |
967 | 974 | ||
968 | static inline void flush_warnings(struct dquot **dquots, char *warntype) | 975 | static inline void flush_warnings(struct dquot * const *dquots, char *warntype) |
969 | { | 976 | { |
970 | int i; | 977 | int i; |
971 | 978 | ||
972 | for (i = 0; i < MAXQUOTAS; i++) | 979 | for (i = 0; i < MAXQUOTAS; i++) |
973 | if (dquots[i] != NODQUOT && warntype[i] != QUOTA_NL_NOWARN) { | 980 | if (dquots[i] != NODQUOT && warntype[i] != QUOTA_NL_NOWARN && |
981 | !warning_issued(dquots[i], warntype[i])) { | ||
974 | #ifdef CONFIG_PRINT_QUOTA_WARNING | 982 | #ifdef CONFIG_PRINT_QUOTA_WARNING |
975 | print_warning(dquots[i], warntype[i]); | 983 | print_warning(dquots[i], warntype[i]); |
976 | #endif | 984 | #endif |
@@ -1216,7 +1224,7 @@ warn_put_all: | |||
1216 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) | 1224 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) |
1217 | if (inode->i_dquot[cnt]) | 1225 | if (inode->i_dquot[cnt]) |
1218 | mark_dquot_dirty(inode->i_dquot[cnt]); | 1226 | mark_dquot_dirty(inode->i_dquot[cnt]); |
1219 | flush_warnings((struct dquot **)inode->i_dquot, warntype); | 1227 | flush_warnings(inode->i_dquot, warntype); |
1220 | up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | 1228 | up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); |
1221 | return ret; | 1229 | return ret; |
1222 | } | 1230 | } |
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index bbed2fd40fdc..f8ef0af919e7 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c | |||
@@ -799,7 +799,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat) | |||
799 | rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name, | 799 | rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name, |
800 | crypt_stat->cipher, "cbc"); | 800 | crypt_stat->cipher, "cbc"); |
801 | if (rc) | 801 | if (rc) |
802 | goto out; | 802 | goto out_unlock; |
803 | crypt_stat->tfm = crypto_alloc_blkcipher(full_alg_name, 0, | 803 | crypt_stat->tfm = crypto_alloc_blkcipher(full_alg_name, 0, |
804 | CRYPTO_ALG_ASYNC); | 804 | CRYPTO_ALG_ASYNC); |
805 | kfree(full_alg_name); | 805 | kfree(full_alg_name); |
@@ -808,12 +808,12 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat) | |||
808 | ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): " | 808 | ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): " |
809 | "Error initializing cipher [%s]\n", | 809 | "Error initializing cipher [%s]\n", |
810 | crypt_stat->cipher); | 810 | crypt_stat->cipher); |
811 | mutex_unlock(&crypt_stat->cs_tfm_mutex); | 811 | goto out_unlock; |
812 | goto out; | ||
813 | } | 812 | } |
814 | crypto_blkcipher_set_flags(crypt_stat->tfm, CRYPTO_TFM_REQ_WEAK_KEY); | 813 | crypto_blkcipher_set_flags(crypt_stat->tfm, CRYPTO_TFM_REQ_WEAK_KEY); |
815 | mutex_unlock(&crypt_stat->cs_tfm_mutex); | ||
816 | rc = 0; | 814 | rc = 0; |
815 | out_unlock: | ||
816 | mutex_unlock(&crypt_stat->cs_tfm_mutex); | ||
817 | out: | 817 | out: |
818 | return rc; | 818 | return rc; |
819 | } | 819 | } |
@@ -1847,6 +1847,7 @@ ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name, | |||
1847 | mutex_init(&tmp_tfm->key_tfm_mutex); | 1847 | mutex_init(&tmp_tfm->key_tfm_mutex); |
1848 | strncpy(tmp_tfm->cipher_name, cipher_name, | 1848 | strncpy(tmp_tfm->cipher_name, cipher_name, |
1849 | ECRYPTFS_MAX_CIPHER_NAME_SIZE); | 1849 | ECRYPTFS_MAX_CIPHER_NAME_SIZE); |
1850 | tmp_tfm->cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE] = '\0'; | ||
1850 | tmp_tfm->key_size = key_size; | 1851 | tmp_tfm->key_size = key_size; |
1851 | rc = ecryptfs_process_key_cipher(&tmp_tfm->key_tfm, | 1852 | rc = ecryptfs_process_key_cipher(&tmp_tfm->key_tfm, |
1852 | tmp_tfm->cipher_name, | 1853 | tmp_tfm->cipher_name, |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 0b1ab016fa2e..5a719180983c 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -120,22 +120,9 @@ ecryptfs_do_create(struct inode *directory_inode, | |||
120 | rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode, | 120 | rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode, |
121 | ecryptfs_dentry, mode, nd); | 121 | ecryptfs_dentry, mode, nd); |
122 | if (rc) { | 122 | if (rc) { |
123 | struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode; | 123 | printk(KERN_ERR "%s: Failure to create dentry in lower fs; " |
124 | struct ecryptfs_inode_info *inode_info = | 124 | "rc = [%d]\n", __FUNCTION__, rc); |
125 | ecryptfs_inode_to_private(ecryptfs_inode); | 125 | goto out_lock; |
126 | |||
127 | printk(KERN_WARNING "%s: Error creating underlying file; " | ||
128 | "rc = [%d]; checking for existing\n", __FUNCTION__, rc); | ||
129 | if (inode_info) { | ||
130 | mutex_lock(&inode_info->lower_file_mutex); | ||
131 | if (!inode_info->lower_file) { | ||
132 | mutex_unlock(&inode_info->lower_file_mutex); | ||
133 | printk(KERN_ERR "%s: Failure to set underlying " | ||
134 | "file; rc = [%d]\n", __FUNCTION__, rc); | ||
135 | goto out_lock; | ||
136 | } | ||
137 | mutex_unlock(&inode_info->lower_file_mutex); | ||
138 | } | ||
139 | } | 126 | } |
140 | rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, | 127 | rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, |
141 | directory_inode->i_sb, 0); | 128 | directory_inode->i_sb, 0); |
@@ -451,6 +438,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) | |||
451 | dentry->d_inode->i_nlink = | 438 | dentry->d_inode->i_nlink = |
452 | ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink; | 439 | ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink; |
453 | dentry->d_inode->i_ctime = dir->i_ctime; | 440 | dentry->d_inode->i_ctime = dir->i_ctime; |
441 | d_drop(dentry); | ||
454 | out_unlock: | 442 | out_unlock: |
455 | unlock_parent(lower_dentry); | 443 | unlock_parent(lower_dentry); |
456 | return rc; | 444 | return rc; |
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 263fed88c0ca..f458c1f35565 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c | |||
@@ -1860,7 +1860,7 @@ ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat, | |||
1860 | struct ecryptfs_global_auth_tok *new_auth_tok; | 1860 | struct ecryptfs_global_auth_tok *new_auth_tok; |
1861 | int rc = 0; | 1861 | int rc = 0; |
1862 | 1862 | ||
1863 | new_auth_tok = kmem_cache_alloc(ecryptfs_global_auth_tok_cache, | 1863 | new_auth_tok = kmem_cache_zalloc(ecryptfs_global_auth_tok_cache, |
1864 | GFP_KERNEL); | 1864 | GFP_KERNEL); |
1865 | if (!new_auth_tok) { | 1865 | if (!new_auth_tok) { |
1866 | rc = -ENOMEM; | 1866 | rc = -ENOMEM; |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index b83a512b7e08..0249aa4ae181 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -138,11 +138,14 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) | |||
138 | inode_info->lower_file = dentry_open(lower_dentry, | 138 | inode_info->lower_file = dentry_open(lower_dentry, |
139 | lower_mnt, | 139 | lower_mnt, |
140 | (O_RDWR | O_LARGEFILE)); | 140 | (O_RDWR | O_LARGEFILE)); |
141 | if (IS_ERR(inode_info->lower_file)) | 141 | if (IS_ERR(inode_info->lower_file)) { |
142 | dget(lower_dentry); | ||
143 | mntget(lower_mnt); | ||
142 | inode_info->lower_file = dentry_open(lower_dentry, | 144 | inode_info->lower_file = dentry_open(lower_dentry, |
143 | lower_mnt, | 145 | lower_mnt, |
144 | (O_RDONLY | 146 | (O_RDONLY |
145 | | O_LARGEFILE)); | 147 | | O_LARGEFILE)); |
148 | } | ||
146 | if (IS_ERR(inode_info->lower_file)) { | 149 | if (IS_ERR(inode_info->lower_file)) { |
147 | printk(KERN_ERR "Error opening lower persistent file " | 150 | printk(KERN_ERR "Error opening lower persistent file " |
148 | "for lower_dentry [0x%p] and lower_mnt [0x%p]\n", | 151 | "for lower_dentry [0x%p] and lower_mnt [0x%p]\n", |
@@ -523,6 +526,7 @@ static int ecryptfs_read_super(struct super_block *sb, const char *dev_name) | |||
523 | lower_mnt = nd.mnt; | 526 | lower_mnt = nd.mnt; |
524 | ecryptfs_set_superblock_lower(sb, lower_root->d_sb); | 527 | ecryptfs_set_superblock_lower(sb, lower_root->d_sb); |
525 | sb->s_maxbytes = lower_root->d_sb->s_maxbytes; | 528 | sb->s_maxbytes = lower_root->d_sb->s_maxbytes; |
529 | sb->s_blocksize = lower_root->d_sb->s_blocksize; | ||
526 | ecryptfs_set_dentry_lower(sb->s_root, lower_root); | 530 | ecryptfs_set_dentry_lower(sb->s_root, lower_root); |
527 | ecryptfs_set_dentry_lower_mnt(sb->s_root, lower_mnt); | 531 | ecryptfs_set_dentry_lower_mnt(sb->s_root, lower_mnt); |
528 | rc = ecryptfs_interpose(lower_root, sb->s_root, sb, 0); | 532 | rc = ecryptfs_interpose(lower_root, sb->s_root, sb, 0); |
@@ -730,127 +734,40 @@ static int ecryptfs_init_kmem_caches(void) | |||
730 | return 0; | 734 | return 0; |
731 | } | 735 | } |
732 | 736 | ||
733 | struct ecryptfs_obj { | 737 | static struct kobject *ecryptfs_kobj; |
734 | char *name; | ||
735 | struct list_head slot_list; | ||
736 | struct kobject kobj; | ||
737 | }; | ||
738 | |||
739 | struct ecryptfs_attribute { | ||
740 | struct attribute attr; | ||
741 | ssize_t(*show) (struct ecryptfs_obj *, char *); | ||
742 | ssize_t(*store) (struct ecryptfs_obj *, const char *, size_t); | ||
743 | }; | ||
744 | |||
745 | static ssize_t | ||
746 | ecryptfs_attr_store(struct kobject *kobj, | ||
747 | struct attribute *attr, const char *buf, size_t len) | ||
748 | { | ||
749 | struct ecryptfs_obj *obj = container_of(kobj, struct ecryptfs_obj, | ||
750 | kobj); | ||
751 | struct ecryptfs_attribute *attribute = | ||
752 | container_of(attr, struct ecryptfs_attribute, attr); | ||
753 | |||
754 | return (attribute->store ? attribute->store(obj, buf, len) : 0); | ||
755 | } | ||
756 | 738 | ||
757 | static ssize_t | 739 | static ssize_t version_show(struct kobject *kobj, |
758 | ecryptfs_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) | 740 | struct kobj_attribute *attr, char *buff) |
759 | { | 741 | { |
760 | struct ecryptfs_obj *obj = container_of(kobj, struct ecryptfs_obj, | 742 | return snprintf(buff, PAGE_SIZE, "%d\n", ECRYPTFS_VERSIONING_MASK); |
761 | kobj); | ||
762 | struct ecryptfs_attribute *attribute = | ||
763 | container_of(attr, struct ecryptfs_attribute, attr); | ||
764 | |||
765 | return (attribute->show ? attribute->show(obj, buf) : 0); | ||
766 | } | 743 | } |
767 | 744 | ||
768 | static struct sysfs_ops ecryptfs_sysfs_ops = { | 745 | static struct kobj_attribute version_attr = __ATTR_RO(version); |
769 | .show = ecryptfs_attr_show, | ||
770 | .store = ecryptfs_attr_store | ||
771 | }; | ||
772 | 746 | ||
773 | static struct kobj_type ecryptfs_ktype = { | 747 | static struct attribute *attributes[] = { |
774 | .sysfs_ops = &ecryptfs_sysfs_ops | 748 | &version_attr.attr, |
749 | NULL, | ||
775 | }; | 750 | }; |
776 | 751 | ||
777 | static decl_subsys(ecryptfs, &ecryptfs_ktype, NULL); | 752 | static struct attribute_group attr_group = { |
778 | 753 | .attrs = attributes, | |
779 | static ssize_t version_show(struct ecryptfs_obj *obj, char *buff) | ||
780 | { | ||
781 | return snprintf(buff, PAGE_SIZE, "%d\n", ECRYPTFS_VERSIONING_MASK); | ||
782 | } | ||
783 | |||
784 | static struct ecryptfs_attribute sysfs_attr_version = __ATTR_RO(version); | ||
785 | |||
786 | static struct ecryptfs_version_str_map_elem { | ||
787 | u32 flag; | ||
788 | char *str; | ||
789 | } ecryptfs_version_str_map[] = { | ||
790 | {ECRYPTFS_VERSIONING_PASSPHRASE, "passphrase"}, | ||
791 | {ECRYPTFS_VERSIONING_PUBKEY, "pubkey"}, | ||
792 | {ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH, "plaintext passthrough"}, | ||
793 | {ECRYPTFS_VERSIONING_POLICY, "policy"}, | ||
794 | {ECRYPTFS_VERSIONING_XATTR, "metadata in extended attribute"}, | ||
795 | {ECRYPTFS_VERSIONING_MULTKEY, "multiple keys per file"} | ||
796 | }; | 754 | }; |
797 | 755 | ||
798 | static ssize_t version_str_show(struct ecryptfs_obj *obj, char *buff) | ||
799 | { | ||
800 | int i; | ||
801 | int remaining = PAGE_SIZE; | ||
802 | int total_written = 0; | ||
803 | |||
804 | buff[0] = '\0'; | ||
805 | for (i = 0; i < ARRAY_SIZE(ecryptfs_version_str_map); i++) { | ||
806 | int entry_size; | ||
807 | |||
808 | if (!(ECRYPTFS_VERSIONING_MASK | ||
809 | & ecryptfs_version_str_map[i].flag)) | ||
810 | continue; | ||
811 | entry_size = strlen(ecryptfs_version_str_map[i].str); | ||
812 | if ((entry_size + 2) > remaining) | ||
813 | goto out; | ||
814 | memcpy(buff, ecryptfs_version_str_map[i].str, entry_size); | ||
815 | buff[entry_size++] = '\n'; | ||
816 | buff[entry_size] = '\0'; | ||
817 | buff += entry_size; | ||
818 | total_written += entry_size; | ||
819 | remaining -= entry_size; | ||
820 | } | ||
821 | out: | ||
822 | return total_written; | ||
823 | } | ||
824 | |||
825 | static struct ecryptfs_attribute sysfs_attr_version_str = __ATTR_RO(version_str); | ||
826 | |||
827 | static int do_sysfs_registration(void) | 756 | static int do_sysfs_registration(void) |
828 | { | 757 | { |
829 | int rc; | 758 | int rc; |
830 | 759 | ||
831 | rc = subsystem_register(&ecryptfs_subsys); | 760 | ecryptfs_kobj = kobject_create_and_add("ecryptfs", fs_kobj); |
832 | if (rc) { | 761 | if (!ecryptfs_kobj) { |
833 | printk(KERN_ERR | 762 | printk(KERN_ERR "Unable to create ecryptfs kset\n"); |
834 | "Unable to register ecryptfs sysfs subsystem\n"); | 763 | rc = -ENOMEM; |
835 | goto out; | ||
836 | } | ||
837 | rc = sysfs_create_file(&ecryptfs_subsys.kobj, | ||
838 | &sysfs_attr_version.attr); | ||
839 | if (rc) { | ||
840 | printk(KERN_ERR | ||
841 | "Unable to create ecryptfs version attribute\n"); | ||
842 | subsystem_unregister(&ecryptfs_subsys); | ||
843 | goto out; | 764 | goto out; |
844 | } | 765 | } |
845 | rc = sysfs_create_file(&ecryptfs_subsys.kobj, | 766 | rc = sysfs_create_group(ecryptfs_kobj, &attr_group); |
846 | &sysfs_attr_version_str.attr); | ||
847 | if (rc) { | 767 | if (rc) { |
848 | printk(KERN_ERR | 768 | printk(KERN_ERR |
849 | "Unable to create ecryptfs version_str attribute\n"); | 769 | "Unable to create ecryptfs version attributes\n"); |
850 | sysfs_remove_file(&ecryptfs_subsys.kobj, | 770 | kobject_put(ecryptfs_kobj); |
851 | &sysfs_attr_version.attr); | ||
852 | subsystem_unregister(&ecryptfs_subsys); | ||
853 | goto out; | ||
854 | } | 771 | } |
855 | out: | 772 | out: |
856 | return rc; | 773 | return rc; |
@@ -858,11 +775,8 @@ out: | |||
858 | 775 | ||
859 | static void do_sysfs_unregistration(void) | 776 | static void do_sysfs_unregistration(void) |
860 | { | 777 | { |
861 | sysfs_remove_file(&ecryptfs_subsys.kobj, | 778 | sysfs_remove_group(ecryptfs_kobj, &attr_group); |
862 | &sysfs_attr_version.attr); | 779 | kobject_put(ecryptfs_kobj); |
863 | sysfs_remove_file(&ecryptfs_subsys.kobj, | ||
864 | &sysfs_attr_version_str.attr); | ||
865 | subsystem_unregister(&ecryptfs_subsys); | ||
866 | } | 780 | } |
867 | 781 | ||
868 | static int __init ecryptfs_init(void) | 782 | static int __init ecryptfs_init(void) |
@@ -890,7 +804,6 @@ static int __init ecryptfs_init(void) | |||
890 | printk(KERN_ERR "Failed to register filesystem\n"); | 804 | printk(KERN_ERR "Failed to register filesystem\n"); |
891 | goto out_free_kmem_caches; | 805 | goto out_free_kmem_caches; |
892 | } | 806 | } |
893 | kobj_set_kset_s(&ecryptfs_subsys, fs_subsys); | ||
894 | rc = do_sysfs_registration(); | 807 | rc = do_sysfs_registration(); |
895 | if (rc) { | 808 | if (rc) { |
896 | printk(KERN_ERR "sysfs registration failed\n"); | 809 | printk(KERN_ERR "sysfs registration failed\n"); |
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index a96d341d154d..9cc2aec27b0d 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c | |||
@@ -427,6 +427,7 @@ int ecryptfs_init_messaging(unsigned int transport) | |||
427 | if (!ecryptfs_daemon_id_hash) { | 427 | if (!ecryptfs_daemon_id_hash) { |
428 | rc = -ENOMEM; | 428 | rc = -ENOMEM; |
429 | ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); | 429 | ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); |
430 | mutex_unlock(&ecryptfs_daemon_id_hash_mux); | ||
430 | goto out; | 431 | goto out; |
431 | } | 432 | } |
432 | for (i = 0; i < ecryptfs_hash_buckets; i++) | 433 | for (i = 0; i < ecryptfs_hash_buckets; i++) |
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 16a7a555f392..32c5711d79a3 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c | |||
@@ -263,14 +263,13 @@ out: | |||
263 | return 0; | 263 | return 0; |
264 | } | 264 | } |
265 | 265 | ||
266 | /* This function must zero any hole we create */ | ||
266 | static int ecryptfs_prepare_write(struct file *file, struct page *page, | 267 | static int ecryptfs_prepare_write(struct file *file, struct page *page, |
267 | unsigned from, unsigned to) | 268 | unsigned from, unsigned to) |
268 | { | 269 | { |
269 | int rc = 0; | 270 | int rc = 0; |
271 | loff_t prev_page_end_size; | ||
270 | 272 | ||
271 | if (from == 0 && to == PAGE_CACHE_SIZE) | ||
272 | goto out; /* If we are writing a full page, it will be | ||
273 | up to date. */ | ||
274 | if (!PageUptodate(page)) { | 273 | if (!PageUptodate(page)) { |
275 | rc = ecryptfs_read_lower_page_segment(page, page->index, 0, | 274 | rc = ecryptfs_read_lower_page_segment(page, page->index, 0, |
276 | PAGE_CACHE_SIZE, | 275 | PAGE_CACHE_SIZE, |
@@ -283,22 +282,32 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, | |||
283 | } else | 282 | } else |
284 | SetPageUptodate(page); | 283 | SetPageUptodate(page); |
285 | } | 284 | } |
286 | if (page->index != 0) { | ||
287 | loff_t end_of_prev_pg_pos = | ||
288 | (((loff_t)page->index << PAGE_CACHE_SHIFT) - 1); | ||
289 | 285 | ||
290 | if (end_of_prev_pg_pos > i_size_read(page->mapping->host)) { | 286 | prev_page_end_size = ((loff_t)page->index << PAGE_CACHE_SHIFT); |
287 | |||
288 | /* | ||
289 | * If creating a page or more of holes, zero them out via truncate. | ||
290 | * Note, this will increase i_size. | ||
291 | */ | ||
292 | if (page->index != 0) { | ||
293 | if (prev_page_end_size > i_size_read(page->mapping->host)) { | ||
291 | rc = ecryptfs_truncate(file->f_path.dentry, | 294 | rc = ecryptfs_truncate(file->f_path.dentry, |
292 | end_of_prev_pg_pos); | 295 | prev_page_end_size); |
293 | if (rc) { | 296 | if (rc) { |
294 | printk(KERN_ERR "Error on attempt to " | 297 | printk(KERN_ERR "Error on attempt to " |
295 | "truncate to (higher) offset [%lld];" | 298 | "truncate to (higher) offset [%lld];" |
296 | " rc = [%d]\n", end_of_prev_pg_pos, rc); | 299 | " rc = [%d]\n", prev_page_end_size, rc); |
297 | goto out; | 300 | goto out; |
298 | } | 301 | } |
299 | } | 302 | } |
300 | if (end_of_prev_pg_pos + 1 > i_size_read(page->mapping->host)) | 303 | } |
301 | zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); | 304 | /* |
305 | * Writing to a new page, and creating a small hole from start of page? | ||
306 | * Zero it out. | ||
307 | */ | ||
308 | if ((i_size_read(page->mapping->host) == prev_page_end_size) && | ||
309 | (from != 0)) { | ||
310 | zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); | ||
302 | } | 311 | } |
303 | out: | 312 | out: |
304 | return rc; | 313 | return rc; |
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c index 9aa345121e09..f638a698dc52 100644 --- a/fs/ecryptfs/netlink.c +++ b/fs/ecryptfs/netlink.c | |||
@@ -237,7 +237,6 @@ out: | |||
237 | */ | 237 | */ |
238 | void ecryptfs_release_netlink(void) | 238 | void ecryptfs_release_netlink(void) |
239 | { | 239 | { |
240 | if (ecryptfs_nl_sock && ecryptfs_nl_sock->sk_socket) | 240 | netlink_kernel_release(ecryptfs_nl_sock); |
241 | sock_release(ecryptfs_nl_sock->sk_socket); | ||
242 | ecryptfs_nl_sock = NULL; | 241 | ecryptfs_nl_sock = NULL; |
243 | } | 242 | } |
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 6b7474a4336a..948f57624c05 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c | |||
@@ -124,6 +124,10 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, | |||
124 | loff_t pos; | 124 | loff_t pos; |
125 | int rc = 0; | 125 | int rc = 0; |
126 | 126 | ||
127 | /* | ||
128 | * if we are writing beyond current size, then start pos | ||
129 | * at the current size - we'll fill in zeros from there. | ||
130 | */ | ||
127 | if (offset > ecryptfs_file_size) | 131 | if (offset > ecryptfs_file_size) |
128 | pos = ecryptfs_file_size; | 132 | pos = ecryptfs_file_size; |
129 | else | 133 | else |
@@ -137,6 +141,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, | |||
137 | if (num_bytes > total_remaining_bytes) | 141 | if (num_bytes > total_remaining_bytes) |
138 | num_bytes = total_remaining_bytes; | 142 | num_bytes = total_remaining_bytes; |
139 | if (pos < offset) { | 143 | if (pos < offset) { |
144 | /* remaining zeros to write, up to destination offset */ | ||
140 | size_t total_remaining_zeros = (offset - pos); | 145 | size_t total_remaining_zeros = (offset - pos); |
141 | 146 | ||
142 | if (num_bytes > total_remaining_zeros) | 147 | if (num_bytes > total_remaining_zeros) |
@@ -167,17 +172,27 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, | |||
167 | } | 172 | } |
168 | } | 173 | } |
169 | ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0); | 174 | ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0); |
175 | |||
176 | /* | ||
177 | * pos: where we're now writing, offset: where the request was | ||
178 | * If current pos is before request, we are filling zeros | ||
179 | * If we are at or beyond request, we are writing the *data* | ||
180 | * If we're in a fresh page beyond eof, zero it in either case | ||
181 | */ | ||
182 | if (pos < offset || !start_offset_in_page) { | ||
183 | /* We are extending past the previous end of the file. | ||
184 | * Fill in zero values to the end of the page */ | ||
185 | memset(((char *)ecryptfs_page_virt | ||
186 | + start_offset_in_page), 0, | ||
187 | PAGE_CACHE_SIZE - start_offset_in_page); | ||
188 | } | ||
189 | |||
190 | /* pos >= offset, we are now writing the data request */ | ||
170 | if (pos >= offset) { | 191 | if (pos >= offset) { |
171 | memcpy(((char *)ecryptfs_page_virt | 192 | memcpy(((char *)ecryptfs_page_virt |
172 | + start_offset_in_page), | 193 | + start_offset_in_page), |
173 | (data + data_offset), num_bytes); | 194 | (data + data_offset), num_bytes); |
174 | data_offset += num_bytes; | 195 | data_offset += num_bytes; |
175 | } else { | ||
176 | /* We are extending past the previous end of the file. | ||
177 | * Fill in zero values up to the start of where we | ||
178 | * will be writing data. */ | ||
179 | memset(((char *)ecryptfs_page_virt | ||
180 | + start_offset_in_page), 0, num_bytes); | ||
181 | } | 196 | } |
182 | kunmap_atomic(ecryptfs_page_virt, KM_USER0); | 197 | kunmap_atomic(ecryptfs_page_virt, KM_USER0); |
183 | flush_dcache_page(ecryptfs_page); | 198 | flush_dcache_page(ecryptfs_page); |
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index f8cdab2bee3d..4859c4eecd65 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c | |||
@@ -86,7 +86,6 @@ static void ecryptfs_destroy_inode(struct inode *inode) | |||
86 | fput(inode_info->lower_file); | 86 | fput(inode_info->lower_file); |
87 | inode_info->lower_file = NULL; | 87 | inode_info->lower_file = NULL; |
88 | d_drop(lower_dentry); | 88 | d_drop(lower_dentry); |
89 | d_delete(lower_dentry); | ||
90 | } | 89 | } |
91 | } | 90 | } |
92 | mutex_unlock(&inode_info->lower_file_mutex); | 91 | mutex_unlock(&inode_info->lower_file_mutex); |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 154e25f13d77..6abaf75163f0 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -680,11 +680,31 @@ static int ext2_check_descriptors (struct super_block * sb) | |||
680 | static loff_t ext2_max_size(int bits) | 680 | static loff_t ext2_max_size(int bits) |
681 | { | 681 | { |
682 | loff_t res = EXT2_NDIR_BLOCKS; | 682 | loff_t res = EXT2_NDIR_BLOCKS; |
683 | /* This constant is calculated to be the largest file size for a | 683 | int meta_blocks; |
684 | * dense, 4k-blocksize file such that the total number of | 684 | loff_t upper_limit; |
685 | |||
686 | /* This is calculated to be the largest file size for a | ||
687 | * dense, file such that the total number of | ||
685 | * sectors in the file, including data and all indirect blocks, | 688 | * sectors in the file, including data and all indirect blocks, |
686 | * does not exceed 2^32. */ | 689 | * does not exceed 2^32 -1 |
687 | const loff_t upper_limit = 0x1ff7fffd000LL; | 690 | * __u32 i_blocks representing the total number of |
691 | * 512 bytes blocks of the file | ||
692 | */ | ||
693 | upper_limit = (1LL << 32) - 1; | ||
694 | |||
695 | /* total blocks in file system block size */ | ||
696 | upper_limit >>= (bits - 9); | ||
697 | |||
698 | |||
699 | /* indirect blocks */ | ||
700 | meta_blocks = 1; | ||
701 | /* double indirect blocks */ | ||
702 | meta_blocks += 1 + (1LL << (bits-2)); | ||
703 | /* tripple indirect blocks */ | ||
704 | meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); | ||
705 | |||
706 | upper_limit -= meta_blocks; | ||
707 | upper_limit <<= bits; | ||
688 | 708 | ||
689 | res += 1LL << (bits-2); | 709 | res += 1LL << (bits-2); |
690 | res += 1LL << (2*(bits-2)); | 710 | res += 1LL << (2*(bits-2)); |
@@ -692,6 +712,10 @@ static loff_t ext2_max_size(int bits) | |||
692 | res <<= bits; | 712 | res <<= bits; |
693 | if (res > upper_limit) | 713 | if (res > upper_limit) |
694 | res = upper_limit; | 714 | res = upper_limit; |
715 | |||
716 | if (res > MAX_LFS_FILESIZE) | ||
717 | res = MAX_LFS_FILESIZE; | ||
718 | |||
695 | return res; | 719 | return res; |
696 | } | 720 | } |
697 | 721 | ||
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index de55da9e28ba..f3675cc630e9 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -1436,11 +1436,31 @@ static void ext3_orphan_cleanup (struct super_block * sb, | |||
1436 | static loff_t ext3_max_size(int bits) | 1436 | static loff_t ext3_max_size(int bits) |
1437 | { | 1437 | { |
1438 | loff_t res = EXT3_NDIR_BLOCKS; | 1438 | loff_t res = EXT3_NDIR_BLOCKS; |
1439 | /* This constant is calculated to be the largest file size for a | 1439 | int meta_blocks; |
1440 | * dense, 4k-blocksize file such that the total number of | 1440 | loff_t upper_limit; |
1441 | |||
1442 | /* This is calculated to be the largest file size for a | ||
1443 | * dense, file such that the total number of | ||
1441 | * sectors in the file, including data and all indirect blocks, | 1444 | * sectors in the file, including data and all indirect blocks, |
1442 | * does not exceed 2^32. */ | 1445 | * does not exceed 2^32 -1 |
1443 | const loff_t upper_limit = 0x1ff7fffd000LL; | 1446 | * __u32 i_blocks representing the total number of |
1447 | * 512 bytes blocks of the file | ||
1448 | */ | ||
1449 | upper_limit = (1LL << 32) - 1; | ||
1450 | |||
1451 | /* total blocks in file system block size */ | ||
1452 | upper_limit >>= (bits - 9); | ||
1453 | |||
1454 | |||
1455 | /* indirect blocks */ | ||
1456 | meta_blocks = 1; | ||
1457 | /* double indirect blocks */ | ||
1458 | meta_blocks += 1 + (1LL << (bits-2)); | ||
1459 | /* tripple indirect blocks */ | ||
1460 | meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); | ||
1461 | |||
1462 | upper_limit -= meta_blocks; | ||
1463 | upper_limit <<= bits; | ||
1444 | 1464 | ||
1445 | res += 1LL << (bits-2); | 1465 | res += 1LL << (bits-2); |
1446 | res += 1LL << (2*(bits-2)); | 1466 | res += 1LL << (2*(bits-2)); |
@@ -1448,6 +1468,10 @@ static loff_t ext3_max_size(int bits) | |||
1448 | res <<= bits; | 1468 | res <<= bits; |
1449 | if (res > upper_limit) | 1469 | if (res > upper_limit) |
1450 | res = upper_limit; | 1470 | res = upper_limit; |
1471 | |||
1472 | if (res > MAX_LFS_FILESIZE) | ||
1473 | res = MAX_LFS_FILESIZE; | ||
1474 | |||
1451 | return res; | 1475 | return res; |
1452 | } | 1476 | } |
1453 | 1477 | ||
@@ -1676,7 +1700,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
1676 | sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); | 1700 | sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); |
1677 | sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); | 1701 | sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); |
1678 | sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); | 1702 | sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); |
1679 | if (EXT3_INODE_SIZE(sb) == 0) | 1703 | if (EXT3_INODE_SIZE(sb) == 0 || EXT3_INODES_PER_GROUP(sb) == 0) |
1680 | goto cantfind_ext3; | 1704 | goto cantfind_ext3; |
1681 | sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb); | 1705 | sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb); |
1682 | if (sbi->s_inodes_per_block == 0) | 1706 | if (sbi->s_inodes_per_block == 0) |
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index ae6e7e502ac9..ac6fa8ca0a2f 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile | |||
@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o | |||
6 | 6 | ||
7 | ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ | 7 | ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ |
8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ | 8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ |
9 | ext4_jbd2.o | 9 | ext4_jbd2.o migrate.o mballoc.o |
10 | 10 | ||
11 | ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o | 11 | ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o |
12 | ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o | 12 | ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 71ee95e534fd..ac75ea953d83 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -29,7 +29,7 @@ | |||
29 | * Calculate the block group number and offset, given a block number | 29 | * Calculate the block group number and offset, given a block number |
30 | */ | 30 | */ |
31 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, | 31 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, |
32 | unsigned long *blockgrpp, ext4_grpblk_t *offsetp) | 32 | ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp) |
33 | { | 33 | { |
34 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | 34 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; |
35 | ext4_grpblk_t offset; | 35 | ext4_grpblk_t offset; |
@@ -46,7 +46,7 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, | |||
46 | /* Initializes an uninitialized block bitmap if given, and returns the | 46 | /* Initializes an uninitialized block bitmap if given, and returns the |
47 | * number of blocks free in the group. */ | 47 | * number of blocks free in the group. */ |
48 | unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | 48 | unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, |
49 | int block_group, struct ext4_group_desc *gdp) | 49 | ext4_group_t block_group, struct ext4_group_desc *gdp) |
50 | { | 50 | { |
51 | unsigned long start; | 51 | unsigned long start; |
52 | int bit, bit_max; | 52 | int bit, bit_max; |
@@ -60,7 +60,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
60 | * essentially implementing a per-group read-only flag. */ | 60 | * essentially implementing a per-group read-only flag. */ |
61 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { | 61 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { |
62 | ext4_error(sb, __FUNCTION__, | 62 | ext4_error(sb, __FUNCTION__, |
63 | "Checksum bad for group %u\n", block_group); | 63 | "Checksum bad for group %lu\n", block_group); |
64 | gdp->bg_free_blocks_count = 0; | 64 | gdp->bg_free_blocks_count = 0; |
65 | gdp->bg_free_inodes_count = 0; | 65 | gdp->bg_free_inodes_count = 0; |
66 | gdp->bg_itable_unused = 0; | 66 | gdp->bg_itable_unused = 0; |
@@ -153,7 +153,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
153 | * group descriptor | 153 | * group descriptor |
154 | */ | 154 | */ |
155 | struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | 155 | struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, |
156 | unsigned int block_group, | 156 | ext4_group_t block_group, |
157 | struct buffer_head ** bh) | 157 | struct buffer_head ** bh) |
158 | { | 158 | { |
159 | unsigned long group_desc; | 159 | unsigned long group_desc; |
@@ -164,7 +164,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | |||
164 | if (block_group >= sbi->s_groups_count) { | 164 | if (block_group >= sbi->s_groups_count) { |
165 | ext4_error (sb, "ext4_get_group_desc", | 165 | ext4_error (sb, "ext4_get_group_desc", |
166 | "block_group >= groups_count - " | 166 | "block_group >= groups_count - " |
167 | "block_group = %d, groups_count = %lu", | 167 | "block_group = %lu, groups_count = %lu", |
168 | block_group, sbi->s_groups_count); | 168 | block_group, sbi->s_groups_count); |
169 | 169 | ||
170 | return NULL; | 170 | return NULL; |
@@ -176,7 +176,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | |||
176 | if (!sbi->s_group_desc[group_desc]) { | 176 | if (!sbi->s_group_desc[group_desc]) { |
177 | ext4_error (sb, "ext4_get_group_desc", | 177 | ext4_error (sb, "ext4_get_group_desc", |
178 | "Group descriptor not loaded - " | 178 | "Group descriptor not loaded - " |
179 | "block_group = %d, group_desc = %lu, desc = %lu", | 179 | "block_group = %lu, group_desc = %lu, desc = %lu", |
180 | block_group, group_desc, offset); | 180 | block_group, group_desc, offset); |
181 | return NULL; | 181 | return NULL; |
182 | } | 182 | } |
@@ -189,18 +189,70 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | |||
189 | return desc; | 189 | return desc; |
190 | } | 190 | } |
191 | 191 | ||
192 | static int ext4_valid_block_bitmap(struct super_block *sb, | ||
193 | struct ext4_group_desc *desc, | ||
194 | unsigned int block_group, | ||
195 | struct buffer_head *bh) | ||
196 | { | ||
197 | ext4_grpblk_t offset; | ||
198 | ext4_grpblk_t next_zero_bit; | ||
199 | ext4_fsblk_t bitmap_blk; | ||
200 | ext4_fsblk_t group_first_block; | ||
201 | |||
202 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { | ||
203 | /* with FLEX_BG, the inode/block bitmaps and itable | ||
204 | * blocks may not be in the group at all | ||
205 | * so the bitmap validation will be skipped for those groups | ||
206 | * or it has to also read the block group where the bitmaps | ||
207 | * are located to verify they are set. | ||
208 | */ | ||
209 | return 1; | ||
210 | } | ||
211 | group_first_block = ext4_group_first_block_no(sb, block_group); | ||
212 | |||
213 | /* check whether block bitmap block number is set */ | ||
214 | bitmap_blk = ext4_block_bitmap(sb, desc); | ||
215 | offset = bitmap_blk - group_first_block; | ||
216 | if (!ext4_test_bit(offset, bh->b_data)) | ||
217 | /* bad block bitmap */ | ||
218 | goto err_out; | ||
219 | |||
220 | /* check whether the inode bitmap block number is set */ | ||
221 | bitmap_blk = ext4_inode_bitmap(sb, desc); | ||
222 | offset = bitmap_blk - group_first_block; | ||
223 | if (!ext4_test_bit(offset, bh->b_data)) | ||
224 | /* bad block bitmap */ | ||
225 | goto err_out; | ||
226 | |||
227 | /* check whether the inode table block number is set */ | ||
228 | bitmap_blk = ext4_inode_table(sb, desc); | ||
229 | offset = bitmap_blk - group_first_block; | ||
230 | next_zero_bit = ext4_find_next_zero_bit(bh->b_data, | ||
231 | offset + EXT4_SB(sb)->s_itb_per_group, | ||
232 | offset); | ||
233 | if (next_zero_bit >= offset + EXT4_SB(sb)->s_itb_per_group) | ||
234 | /* good bitmap for inode tables */ | ||
235 | return 1; | ||
236 | |||
237 | err_out: | ||
238 | ext4_error(sb, __FUNCTION__, | ||
239 | "Invalid block bitmap - " | ||
240 | "block_group = %d, block = %llu", | ||
241 | block_group, bitmap_blk); | ||
242 | return 0; | ||
243 | } | ||
192 | /** | 244 | /** |
193 | * read_block_bitmap() | 245 | * read_block_bitmap() |
194 | * @sb: super block | 246 | * @sb: super block |
195 | * @block_group: given block group | 247 | * @block_group: given block group |
196 | * | 248 | * |
197 | * Read the bitmap for a given block_group, reading into the specified | 249 | * Read the bitmap for a given block_group,and validate the |
198 | * slot in the superblock's bitmap cache. | 250 | * bits for block/inode/inode tables are set in the bitmaps |
199 | * | 251 | * |
200 | * Return buffer_head on success or NULL in case of failure. | 252 | * Return buffer_head on success or NULL in case of failure. |
201 | */ | 253 | */ |
202 | struct buffer_head * | 254 | struct buffer_head * |
203 | read_block_bitmap(struct super_block *sb, unsigned int block_group) | 255 | read_block_bitmap(struct super_block *sb, ext4_group_t block_group) |
204 | { | 256 | { |
205 | struct ext4_group_desc * desc; | 257 | struct ext4_group_desc * desc; |
206 | struct buffer_head * bh = NULL; | 258 | struct buffer_head * bh = NULL; |
@@ -210,25 +262,36 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group) | |||
210 | if (!desc) | 262 | if (!desc) |
211 | return NULL; | 263 | return NULL; |
212 | bitmap_blk = ext4_block_bitmap(sb, desc); | 264 | bitmap_blk = ext4_block_bitmap(sb, desc); |
265 | bh = sb_getblk(sb, bitmap_blk); | ||
266 | if (unlikely(!bh)) { | ||
267 | ext4_error(sb, __FUNCTION__, | ||
268 | "Cannot read block bitmap - " | ||
269 | "block_group = %d, block_bitmap = %llu", | ||
270 | (int)block_group, (unsigned long long)bitmap_blk); | ||
271 | return NULL; | ||
272 | } | ||
273 | if (bh_uptodate_or_lock(bh)) | ||
274 | return bh; | ||
275 | |||
213 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 276 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
214 | bh = sb_getblk(sb, bitmap_blk); | 277 | ext4_init_block_bitmap(sb, bh, block_group, desc); |
215 | if (!buffer_uptodate(bh)) { | 278 | set_buffer_uptodate(bh); |
216 | lock_buffer(bh); | 279 | unlock_buffer(bh); |
217 | if (!buffer_uptodate(bh)) { | 280 | return bh; |
218 | ext4_init_block_bitmap(sb, bh, block_group, | ||
219 | desc); | ||
220 | set_buffer_uptodate(bh); | ||
221 | } | ||
222 | unlock_buffer(bh); | ||
223 | } | ||
224 | } else { | ||
225 | bh = sb_bread(sb, bitmap_blk); | ||
226 | } | 281 | } |
227 | if (!bh) | 282 | if (bh_submit_read(bh) < 0) { |
228 | ext4_error (sb, __FUNCTION__, | 283 | put_bh(bh); |
284 | ext4_error(sb, __FUNCTION__, | ||
229 | "Cannot read block bitmap - " | 285 | "Cannot read block bitmap - " |
230 | "block_group = %d, block_bitmap = %llu", | 286 | "block_group = %d, block_bitmap = %llu", |
231 | block_group, bitmap_blk); | 287 | (int)block_group, (unsigned long long)bitmap_blk); |
288 | return NULL; | ||
289 | } | ||
290 | if (!ext4_valid_block_bitmap(sb, desc, block_group, bh)) { | ||
291 | put_bh(bh); | ||
292 | return NULL; | ||
293 | } | ||
294 | |||
232 | return bh; | 295 | return bh; |
233 | } | 296 | } |
234 | /* | 297 | /* |
@@ -320,7 +383,7 @@ restart: | |||
320 | */ | 383 | */ |
321 | static int | 384 | static int |
322 | goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal, | 385 | goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal, |
323 | unsigned int group, struct super_block * sb) | 386 | ext4_group_t group, struct super_block *sb) |
324 | { | 387 | { |
325 | ext4_fsblk_t group_first_block, group_last_block; | 388 | ext4_fsblk_t group_first_block, group_last_block; |
326 | 389 | ||
@@ -463,7 +526,7 @@ static inline int rsv_is_empty(struct ext4_reserve_window *rsv) | |||
463 | * when setting the reservation window size through ioctl before the file | 526 | * when setting the reservation window size through ioctl before the file |
464 | * is open for write (needs block allocation). | 527 | * is open for write (needs block allocation). |
465 | * | 528 | * |
466 | * Needs truncate_mutex protection prior to call this function. | 529 | * Needs down_write(i_data_sem) protection prior to call this function. |
467 | */ | 530 | */ |
468 | void ext4_init_block_alloc_info(struct inode *inode) | 531 | void ext4_init_block_alloc_info(struct inode *inode) |
469 | { | 532 | { |
@@ -514,6 +577,8 @@ void ext4_discard_reservation(struct inode *inode) | |||
514 | struct ext4_reserve_window_node *rsv; | 577 | struct ext4_reserve_window_node *rsv; |
515 | spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock; | 578 | spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock; |
516 | 579 | ||
580 | ext4_mb_discard_inode_preallocations(inode); | ||
581 | |||
517 | if (!block_i) | 582 | if (!block_i) |
518 | return; | 583 | return; |
519 | 584 | ||
@@ -540,7 +605,7 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | |||
540 | { | 605 | { |
541 | struct buffer_head *bitmap_bh = NULL; | 606 | struct buffer_head *bitmap_bh = NULL; |
542 | struct buffer_head *gd_bh; | 607 | struct buffer_head *gd_bh; |
543 | unsigned long block_group; | 608 | ext4_group_t block_group; |
544 | ext4_grpblk_t bit; | 609 | ext4_grpblk_t bit; |
545 | unsigned long i; | 610 | unsigned long i; |
546 | unsigned long overflow; | 611 | unsigned long overflow; |
@@ -587,11 +652,13 @@ do_more: | |||
587 | in_range(ext4_inode_bitmap(sb, desc), block, count) || | 652 | in_range(ext4_inode_bitmap(sb, desc), block, count) || |
588 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || | 653 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || |
589 | in_range(block + count - 1, ext4_inode_table(sb, desc), | 654 | in_range(block + count - 1, ext4_inode_table(sb, desc), |
590 | sbi->s_itb_per_group)) | 655 | sbi->s_itb_per_group)) { |
591 | ext4_error (sb, "ext4_free_blocks", | 656 | ext4_error (sb, "ext4_free_blocks", |
592 | "Freeing blocks in system zones - " | 657 | "Freeing blocks in system zones - " |
593 | "Block = %llu, count = %lu", | 658 | "Block = %llu, count = %lu", |
594 | block, count); | 659 | block, count); |
660 | goto error_return; | ||
661 | } | ||
595 | 662 | ||
596 | /* | 663 | /* |
597 | * We are about to start releasing blocks in the bitmap, | 664 | * We are about to start releasing blocks in the bitmap, |
@@ -720,19 +787,29 @@ error_return: | |||
720 | * @inode: inode | 787 | * @inode: inode |
721 | * @block: start physical block to free | 788 | * @block: start physical block to free |
722 | * @count: number of blocks to count | 789 | * @count: number of blocks to count |
790 | * @metadata: Are these metadata blocks | ||
723 | */ | 791 | */ |
724 | void ext4_free_blocks(handle_t *handle, struct inode *inode, | 792 | void ext4_free_blocks(handle_t *handle, struct inode *inode, |
725 | ext4_fsblk_t block, unsigned long count) | 793 | ext4_fsblk_t block, unsigned long count, |
794 | int metadata) | ||
726 | { | 795 | { |
727 | struct super_block * sb; | 796 | struct super_block * sb; |
728 | unsigned long dquot_freed_blocks; | 797 | unsigned long dquot_freed_blocks; |
729 | 798 | ||
799 | /* this isn't the right place to decide whether block is metadata | ||
800 | * inode.c/extents.c knows better, but for safety ... */ | ||
801 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || | ||
802 | ext4_should_journal_data(inode)) | ||
803 | metadata = 1; | ||
804 | |||
730 | sb = inode->i_sb; | 805 | sb = inode->i_sb; |
731 | if (!sb) { | 806 | |
732 | printk ("ext4_free_blocks: nonexistent device"); | 807 | if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info) |
733 | return; | 808 | ext4_free_blocks_sb(handle, sb, block, count, |
734 | } | 809 | &dquot_freed_blocks); |
735 | ext4_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); | 810 | else |
811 | ext4_mb_free_blocks(handle, inode, block, count, | ||
812 | metadata, &dquot_freed_blocks); | ||
736 | if (dquot_freed_blocks) | 813 | if (dquot_freed_blocks) |
737 | DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); | 814 | DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); |
738 | return; | 815 | return; |
@@ -920,9 +997,10 @@ claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh) | |||
920 | * ext4_journal_release_buffer(), else we'll run out of credits. | 997 | * ext4_journal_release_buffer(), else we'll run out of credits. |
921 | */ | 998 | */ |
922 | static ext4_grpblk_t | 999 | static ext4_grpblk_t |
923 | ext4_try_to_allocate(struct super_block *sb, handle_t *handle, int group, | 1000 | ext4_try_to_allocate(struct super_block *sb, handle_t *handle, |
924 | struct buffer_head *bitmap_bh, ext4_grpblk_t grp_goal, | 1001 | ext4_group_t group, struct buffer_head *bitmap_bh, |
925 | unsigned long *count, struct ext4_reserve_window *my_rsv) | 1002 | ext4_grpblk_t grp_goal, unsigned long *count, |
1003 | struct ext4_reserve_window *my_rsv) | ||
926 | { | 1004 | { |
927 | ext4_fsblk_t group_first_block; | 1005 | ext4_fsblk_t group_first_block; |
928 | ext4_grpblk_t start, end; | 1006 | ext4_grpblk_t start, end; |
@@ -1156,7 +1234,7 @@ static int find_next_reservable_window( | |||
1156 | */ | 1234 | */ |
1157 | static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv, | 1235 | static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv, |
1158 | ext4_grpblk_t grp_goal, struct super_block *sb, | 1236 | ext4_grpblk_t grp_goal, struct super_block *sb, |
1159 | unsigned int group, struct buffer_head *bitmap_bh) | 1237 | ext4_group_t group, struct buffer_head *bitmap_bh) |
1160 | { | 1238 | { |
1161 | struct ext4_reserve_window_node *search_head; | 1239 | struct ext4_reserve_window_node *search_head; |
1162 | ext4_fsblk_t group_first_block, group_end_block, start_block; | 1240 | ext4_fsblk_t group_first_block, group_end_block, start_block; |
@@ -1354,7 +1432,7 @@ static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv, | |||
1354 | */ | 1432 | */ |
1355 | static ext4_grpblk_t | 1433 | static ext4_grpblk_t |
1356 | ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, | 1434 | ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, |
1357 | unsigned int group, struct buffer_head *bitmap_bh, | 1435 | ext4_group_t group, struct buffer_head *bitmap_bh, |
1358 | ext4_grpblk_t grp_goal, | 1436 | ext4_grpblk_t grp_goal, |
1359 | struct ext4_reserve_window_node * my_rsv, | 1437 | struct ext4_reserve_window_node * my_rsv, |
1360 | unsigned long *count, int *errp) | 1438 | unsigned long *count, int *errp) |
@@ -1510,7 +1588,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) | |||
1510 | } | 1588 | } |
1511 | 1589 | ||
1512 | /** | 1590 | /** |
1513 | * ext4_new_blocks() -- core block(s) allocation function | 1591 | * ext4_new_blocks_old() -- core block(s) allocation function |
1514 | * @handle: handle to this transaction | 1592 | * @handle: handle to this transaction |
1515 | * @inode: file inode | 1593 | * @inode: file inode |
1516 | * @goal: given target block(filesystem wide) | 1594 | * @goal: given target block(filesystem wide) |
@@ -1523,17 +1601,17 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) | |||
1523 | * any specific goal block. | 1601 | * any specific goal block. |
1524 | * | 1602 | * |
1525 | */ | 1603 | */ |
1526 | ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, | 1604 | ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, |
1527 | ext4_fsblk_t goal, unsigned long *count, int *errp) | 1605 | ext4_fsblk_t goal, unsigned long *count, int *errp) |
1528 | { | 1606 | { |
1529 | struct buffer_head *bitmap_bh = NULL; | 1607 | struct buffer_head *bitmap_bh = NULL; |
1530 | struct buffer_head *gdp_bh; | 1608 | struct buffer_head *gdp_bh; |
1531 | unsigned long group_no; | 1609 | ext4_group_t group_no; |
1532 | int goal_group; | 1610 | ext4_group_t goal_group; |
1533 | ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */ | 1611 | ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */ |
1534 | ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ | 1612 | ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ |
1535 | ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */ | 1613 | ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */ |
1536 | int bgi; /* blockgroup iteration index */ | 1614 | ext4_group_t bgi; /* blockgroup iteration index */ |
1537 | int fatal = 0, err; | 1615 | int fatal = 0, err; |
1538 | int performed_allocation = 0; | 1616 | int performed_allocation = 0; |
1539 | ext4_grpblk_t free_blocks; /* number of free blocks in a group */ | 1617 | ext4_grpblk_t free_blocks; /* number of free blocks in a group */ |
@@ -1544,10 +1622,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, | |||
1544 | struct ext4_reserve_window_node *my_rsv = NULL; | 1622 | struct ext4_reserve_window_node *my_rsv = NULL; |
1545 | struct ext4_block_alloc_info *block_i; | 1623 | struct ext4_block_alloc_info *block_i; |
1546 | unsigned short windowsz = 0; | 1624 | unsigned short windowsz = 0; |
1547 | #ifdef EXT4FS_DEBUG | 1625 | ext4_group_t ngroups; |
1548 | static int goal_hits, goal_attempts; | ||
1549 | #endif | ||
1550 | unsigned long ngroups; | ||
1551 | unsigned long num = *count; | 1626 | unsigned long num = *count; |
1552 | 1627 | ||
1553 | *errp = -ENOSPC; | 1628 | *errp = -ENOSPC; |
@@ -1567,7 +1642,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, | |||
1567 | 1642 | ||
1568 | sbi = EXT4_SB(sb); | 1643 | sbi = EXT4_SB(sb); |
1569 | es = EXT4_SB(sb)->s_es; | 1644 | es = EXT4_SB(sb)->s_es; |
1570 | ext4_debug("goal=%lu.\n", goal); | 1645 | ext4_debug("goal=%llu.\n", goal); |
1571 | /* | 1646 | /* |
1572 | * Allocate a block from reservation only when | 1647 | * Allocate a block from reservation only when |
1573 | * filesystem is mounted with reservation(default,-o reservation), and | 1648 | * filesystem is mounted with reservation(default,-o reservation), and |
@@ -1677,7 +1752,7 @@ retry_alloc: | |||
1677 | 1752 | ||
1678 | allocated: | 1753 | allocated: |
1679 | 1754 | ||
1680 | ext4_debug("using block group %d(%d)\n", | 1755 | ext4_debug("using block group %lu(%d)\n", |
1681 | group_no, gdp->bg_free_blocks_count); | 1756 | group_no, gdp->bg_free_blocks_count); |
1682 | 1757 | ||
1683 | BUFFER_TRACE(gdp_bh, "get_write_access"); | 1758 | BUFFER_TRACE(gdp_bh, "get_write_access"); |
@@ -1692,11 +1767,13 @@ allocated: | |||
1692 | in_range(ret_block, ext4_inode_table(sb, gdp), | 1767 | in_range(ret_block, ext4_inode_table(sb, gdp), |
1693 | EXT4_SB(sb)->s_itb_per_group) || | 1768 | EXT4_SB(sb)->s_itb_per_group) || |
1694 | in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), | 1769 | in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), |
1695 | EXT4_SB(sb)->s_itb_per_group)) | 1770 | EXT4_SB(sb)->s_itb_per_group)) { |
1696 | ext4_error(sb, "ext4_new_block", | 1771 | ext4_error(sb, "ext4_new_block", |
1697 | "Allocating block in system zone - " | 1772 | "Allocating block in system zone - " |
1698 | "blocks from %llu, length %lu", | 1773 | "blocks from %llu, length %lu", |
1699 | ret_block, num); | 1774 | ret_block, num); |
1775 | goto out; | ||
1776 | } | ||
1700 | 1777 | ||
1701 | performed_allocation = 1; | 1778 | performed_allocation = 1; |
1702 | 1779 | ||
@@ -1743,9 +1820,6 @@ allocated: | |||
1743 | * list of some description. We don't know in advance whether | 1820 | * list of some description. We don't know in advance whether |
1744 | * the caller wants to use it as metadata or data. | 1821 | * the caller wants to use it as metadata or data. |
1745 | */ | 1822 | */ |
1746 | ext4_debug("allocating block %lu. Goal hits %d of %d.\n", | ||
1747 | ret_block, goal_hits, goal_attempts); | ||
1748 | |||
1749 | spin_lock(sb_bgl_lock(sbi, group_no)); | 1823 | spin_lock(sb_bgl_lock(sbi, group_no)); |
1750 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) | 1824 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) |
1751 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | 1825 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); |
@@ -1787,13 +1861,46 @@ out: | |||
1787 | } | 1861 | } |
1788 | 1862 | ||
1789 | ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode, | 1863 | ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode, |
1790 | ext4_fsblk_t goal, int *errp) | 1864 | ext4_fsblk_t goal, int *errp) |
1791 | { | 1865 | { |
1792 | unsigned long count = 1; | 1866 | struct ext4_allocation_request ar; |
1867 | ext4_fsblk_t ret; | ||
1793 | 1868 | ||
1794 | return ext4_new_blocks(handle, inode, goal, &count, errp); | 1869 | if (!test_opt(inode->i_sb, MBALLOC)) { |
1870 | unsigned long count = 1; | ||
1871 | ret = ext4_new_blocks_old(handle, inode, goal, &count, errp); | ||
1872 | return ret; | ||
1873 | } | ||
1874 | |||
1875 | memset(&ar, 0, sizeof(ar)); | ||
1876 | ar.inode = inode; | ||
1877 | ar.goal = goal; | ||
1878 | ar.len = 1; | ||
1879 | ret = ext4_mb_new_blocks(handle, &ar, errp); | ||
1880 | return ret; | ||
1881 | } | ||
1882 | |||
1883 | ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, | ||
1884 | ext4_fsblk_t goal, unsigned long *count, int *errp) | ||
1885 | { | ||
1886 | struct ext4_allocation_request ar; | ||
1887 | ext4_fsblk_t ret; | ||
1888 | |||
1889 | if (!test_opt(inode->i_sb, MBALLOC)) { | ||
1890 | ret = ext4_new_blocks_old(handle, inode, goal, count, errp); | ||
1891 | return ret; | ||
1892 | } | ||
1893 | |||
1894 | memset(&ar, 0, sizeof(ar)); | ||
1895 | ar.inode = inode; | ||
1896 | ar.goal = goal; | ||
1897 | ar.len = *count; | ||
1898 | ret = ext4_mb_new_blocks(handle, &ar, errp); | ||
1899 | *count = ar.len; | ||
1900 | return ret; | ||
1795 | } | 1901 | } |
1796 | 1902 | ||
1903 | |||
1797 | /** | 1904 | /** |
1798 | * ext4_count_free_blocks() -- count filesystem free blocks | 1905 | * ext4_count_free_blocks() -- count filesystem free blocks |
1799 | * @sb: superblock | 1906 | * @sb: superblock |
@@ -1804,8 +1911,8 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
1804 | { | 1911 | { |
1805 | ext4_fsblk_t desc_count; | 1912 | ext4_fsblk_t desc_count; |
1806 | struct ext4_group_desc *gdp; | 1913 | struct ext4_group_desc *gdp; |
1807 | int i; | 1914 | ext4_group_t i; |
1808 | unsigned long ngroups = EXT4_SB(sb)->s_groups_count; | 1915 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; |
1809 | #ifdef EXT4FS_DEBUG | 1916 | #ifdef EXT4FS_DEBUG |
1810 | struct ext4_super_block *es; | 1917 | struct ext4_super_block *es; |
1811 | ext4_fsblk_t bitmap_count; | 1918 | ext4_fsblk_t bitmap_count; |
@@ -1829,14 +1936,14 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
1829 | continue; | 1936 | continue; |
1830 | 1937 | ||
1831 | x = ext4_count_free(bitmap_bh, sb->s_blocksize); | 1938 | x = ext4_count_free(bitmap_bh, sb->s_blocksize); |
1832 | printk("group %d: stored = %d, counted = %lu\n", | 1939 | printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", |
1833 | i, le16_to_cpu(gdp->bg_free_blocks_count), x); | 1940 | i, le16_to_cpu(gdp->bg_free_blocks_count), x); |
1834 | bitmap_count += x; | 1941 | bitmap_count += x; |
1835 | } | 1942 | } |
1836 | brelse(bitmap_bh); | 1943 | brelse(bitmap_bh); |
1837 | printk("ext4_count_free_blocks: stored = %llu" | 1944 | printk("ext4_count_free_blocks: stored = %llu" |
1838 | ", computed = %llu, %llu\n", | 1945 | ", computed = %llu, %llu\n", |
1839 | EXT4_FREE_BLOCKS_COUNT(es), | 1946 | ext4_free_blocks_count(es), |
1840 | desc_count, bitmap_count); | 1947 | desc_count, bitmap_count); |
1841 | return bitmap_count; | 1948 | return bitmap_count; |
1842 | #else | 1949 | #else |
@@ -1853,7 +1960,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
1853 | #endif | 1960 | #endif |
1854 | } | 1961 | } |
1855 | 1962 | ||
1856 | static inline int test_root(int a, int b) | 1963 | static inline int test_root(ext4_group_t a, int b) |
1857 | { | 1964 | { |
1858 | int num = b; | 1965 | int num = b; |
1859 | 1966 | ||
@@ -1862,7 +1969,7 @@ static inline int test_root(int a, int b) | |||
1862 | return num == a; | 1969 | return num == a; |
1863 | } | 1970 | } |
1864 | 1971 | ||
1865 | static int ext4_group_sparse(int group) | 1972 | static int ext4_group_sparse(ext4_group_t group) |
1866 | { | 1973 | { |
1867 | if (group <= 1) | 1974 | if (group <= 1) |
1868 | return 1; | 1975 | return 1; |
@@ -1880,7 +1987,7 @@ static int ext4_group_sparse(int group) | |||
1880 | * Return the number of blocks used by the superblock (primary or backup) | 1987 | * Return the number of blocks used by the superblock (primary or backup) |
1881 | * in this group. Currently this will be only 0 or 1. | 1988 | * in this group. Currently this will be only 0 or 1. |
1882 | */ | 1989 | */ |
1883 | int ext4_bg_has_super(struct super_block *sb, int group) | 1990 | int ext4_bg_has_super(struct super_block *sb, ext4_group_t group) |
1884 | { | 1991 | { |
1885 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 1992 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
1886 | EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && | 1993 | EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && |
@@ -1889,18 +1996,20 @@ int ext4_bg_has_super(struct super_block *sb, int group) | |||
1889 | return 1; | 1996 | return 1; |
1890 | } | 1997 | } |
1891 | 1998 | ||
1892 | static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, int group) | 1999 | static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, |
2000 | ext4_group_t group) | ||
1893 | { | 2001 | { |
1894 | unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb); | 2002 | unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb); |
1895 | unsigned long first = metagroup * EXT4_DESC_PER_BLOCK(sb); | 2003 | ext4_group_t first = metagroup * EXT4_DESC_PER_BLOCK(sb); |
1896 | unsigned long last = first + EXT4_DESC_PER_BLOCK(sb) - 1; | 2004 | ext4_group_t last = first + EXT4_DESC_PER_BLOCK(sb) - 1; |
1897 | 2005 | ||
1898 | if (group == first || group == first + 1 || group == last) | 2006 | if (group == first || group == first + 1 || group == last) |
1899 | return 1; | 2007 | return 1; |
1900 | return 0; | 2008 | return 0; |
1901 | } | 2009 | } |
1902 | 2010 | ||
1903 | static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, int group) | 2011 | static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, |
2012 | ext4_group_t group) | ||
1904 | { | 2013 | { |
1905 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 2014 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
1906 | EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && | 2015 | EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && |
@@ -1918,7 +2027,7 @@ static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, int group) | |||
1918 | * (primary or backup) in this group. In the future there may be a | 2027 | * (primary or backup) in this group. In the future there may be a |
1919 | * different number of descriptor blocks in each group. | 2028 | * different number of descriptor blocks in each group. |
1920 | */ | 2029 | */ |
1921 | unsigned long ext4_bg_num_gdb(struct super_block *sb, int group) | 2030 | unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) |
1922 | { | 2031 | { |
1923 | unsigned long first_meta_bg = | 2032 | unsigned long first_meta_bg = |
1924 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); | 2033 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index f612bef98315..33888bb58144 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -67,7 +67,7 @@ int ext4_check_dir_entry (const char * function, struct inode * dir, | |||
67 | unsigned long offset) | 67 | unsigned long offset) |
68 | { | 68 | { |
69 | const char * error_msg = NULL; | 69 | const char * error_msg = NULL; |
70 | const int rlen = le16_to_cpu(de->rec_len); | 70 | const int rlen = ext4_rec_len_from_disk(de->rec_len); |
71 | 71 | ||
72 | if (rlen < EXT4_DIR_REC_LEN(1)) | 72 | if (rlen < EXT4_DIR_REC_LEN(1)) |
73 | error_msg = "rec_len is smaller than minimal"; | 73 | error_msg = "rec_len is smaller than minimal"; |
@@ -124,7 +124,7 @@ static int ext4_readdir(struct file * filp, | |||
124 | offset = filp->f_pos & (sb->s_blocksize - 1); | 124 | offset = filp->f_pos & (sb->s_blocksize - 1); |
125 | 125 | ||
126 | while (!error && !stored && filp->f_pos < inode->i_size) { | 126 | while (!error && !stored && filp->f_pos < inode->i_size) { |
127 | unsigned long blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); | 127 | ext4_lblk_t blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); |
128 | struct buffer_head map_bh; | 128 | struct buffer_head map_bh; |
129 | struct buffer_head *bh = NULL; | 129 | struct buffer_head *bh = NULL; |
130 | 130 | ||
@@ -172,10 +172,10 @@ revalidate: | |||
172 | * least that it is non-zero. A | 172 | * least that it is non-zero. A |
173 | * failure will be detected in the | 173 | * failure will be detected in the |
174 | * dirent test below. */ | 174 | * dirent test below. */ |
175 | if (le16_to_cpu(de->rec_len) < | 175 | if (ext4_rec_len_from_disk(de->rec_len) |
176 | EXT4_DIR_REC_LEN(1)) | 176 | < EXT4_DIR_REC_LEN(1)) |
177 | break; | 177 | break; |
178 | i += le16_to_cpu(de->rec_len); | 178 | i += ext4_rec_len_from_disk(de->rec_len); |
179 | } | 179 | } |
180 | offset = i; | 180 | offset = i; |
181 | filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) | 181 | filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) |
@@ -197,7 +197,7 @@ revalidate: | |||
197 | ret = stored; | 197 | ret = stored; |
198 | goto out; | 198 | goto out; |
199 | } | 199 | } |
200 | offset += le16_to_cpu(de->rec_len); | 200 | offset += ext4_rec_len_from_disk(de->rec_len); |
201 | if (le32_to_cpu(de->inode)) { | 201 | if (le32_to_cpu(de->inode)) { |
202 | /* We might block in the next section | 202 | /* We might block in the next section |
203 | * if the data destination is | 203 | * if the data destination is |
@@ -219,7 +219,7 @@ revalidate: | |||
219 | goto revalidate; | 219 | goto revalidate; |
220 | stored ++; | 220 | stored ++; |
221 | } | 221 | } |
222 | filp->f_pos += le16_to_cpu(de->rec_len); | 222 | filp->f_pos += ext4_rec_len_from_disk(de->rec_len); |
223 | } | 223 | } |
224 | offset = 0; | 224 | offset = 0; |
225 | brelse (bh); | 225 | brelse (bh); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 85287742f2ae..bc7081f1fbe8 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -61,7 +61,7 @@ static ext4_fsblk_t ext_pblock(struct ext4_extent *ex) | |||
61 | * idx_pblock: | 61 | * idx_pblock: |
62 | * combine low and high parts of a leaf physical block number into ext4_fsblk_t | 62 | * combine low and high parts of a leaf physical block number into ext4_fsblk_t |
63 | */ | 63 | */ |
64 | static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) | 64 | ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) |
65 | { | 65 | { |
66 | ext4_fsblk_t block; | 66 | ext4_fsblk_t block; |
67 | 67 | ||
@@ -75,7 +75,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) | |||
75 | * stores a large physical block number into an extent struct, | 75 | * stores a large physical block number into an extent struct, |
76 | * breaking it into parts | 76 | * breaking it into parts |
77 | */ | 77 | */ |
78 | static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) | 78 | void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) |
79 | { | 79 | { |
80 | ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); | 80 | ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); |
81 | ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); | 81 | ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); |
@@ -144,7 +144,7 @@ static int ext4_ext_dirty(handle_t *handle, struct inode *inode, | |||
144 | 144 | ||
145 | static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, | 145 | static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, |
146 | struct ext4_ext_path *path, | 146 | struct ext4_ext_path *path, |
147 | ext4_fsblk_t block) | 147 | ext4_lblk_t block) |
148 | { | 148 | { |
149 | struct ext4_inode_info *ei = EXT4_I(inode); | 149 | struct ext4_inode_info *ei = EXT4_I(inode); |
150 | ext4_fsblk_t bg_start; | 150 | ext4_fsblk_t bg_start; |
@@ -367,13 +367,14 @@ static void ext4_ext_drop_refs(struct ext4_ext_path *path) | |||
367 | * the header must be checked before calling this | 367 | * the header must be checked before calling this |
368 | */ | 368 | */ |
369 | static void | 369 | static void |
370 | ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int block) | 370 | ext4_ext_binsearch_idx(struct inode *inode, |
371 | struct ext4_ext_path *path, ext4_lblk_t block) | ||
371 | { | 372 | { |
372 | struct ext4_extent_header *eh = path->p_hdr; | 373 | struct ext4_extent_header *eh = path->p_hdr; |
373 | struct ext4_extent_idx *r, *l, *m; | 374 | struct ext4_extent_idx *r, *l, *m; |
374 | 375 | ||
375 | 376 | ||
376 | ext_debug("binsearch for %d(idx): ", block); | 377 | ext_debug("binsearch for %u(idx): ", block); |
377 | 378 | ||
378 | l = EXT_FIRST_INDEX(eh) + 1; | 379 | l = EXT_FIRST_INDEX(eh) + 1; |
379 | r = EXT_LAST_INDEX(eh); | 380 | r = EXT_LAST_INDEX(eh); |
@@ -425,7 +426,8 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int bloc | |||
425 | * the header must be checked before calling this | 426 | * the header must be checked before calling this |
426 | */ | 427 | */ |
427 | static void | 428 | static void |
428 | ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block) | 429 | ext4_ext_binsearch(struct inode *inode, |
430 | struct ext4_ext_path *path, ext4_lblk_t block) | ||
429 | { | 431 | { |
430 | struct ext4_extent_header *eh = path->p_hdr; | 432 | struct ext4_extent_header *eh = path->p_hdr; |
431 | struct ext4_extent *r, *l, *m; | 433 | struct ext4_extent *r, *l, *m; |
@@ -438,7 +440,7 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block) | |||
438 | return; | 440 | return; |
439 | } | 441 | } |
440 | 442 | ||
441 | ext_debug("binsearch for %d: ", block); | 443 | ext_debug("binsearch for %u: ", block); |
442 | 444 | ||
443 | l = EXT_FIRST_EXTENT(eh) + 1; | 445 | l = EXT_FIRST_EXTENT(eh) + 1; |
444 | r = EXT_LAST_EXTENT(eh); | 446 | r = EXT_LAST_EXTENT(eh); |
@@ -494,7 +496,8 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode) | |||
494 | } | 496 | } |
495 | 497 | ||
496 | struct ext4_ext_path * | 498 | struct ext4_ext_path * |
497 | ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path) | 499 | ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, |
500 | struct ext4_ext_path *path) | ||
498 | { | 501 | { |
499 | struct ext4_extent_header *eh; | 502 | struct ext4_extent_header *eh; |
500 | struct buffer_head *bh; | 503 | struct buffer_head *bh; |
@@ -763,7 +766,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
763 | while (k--) { | 766 | while (k--) { |
764 | oldblock = newblock; | 767 | oldblock = newblock; |
765 | newblock = ablocks[--a]; | 768 | newblock = ablocks[--a]; |
766 | bh = sb_getblk(inode->i_sb, (ext4_fsblk_t)newblock); | 769 | bh = sb_getblk(inode->i_sb, newblock); |
767 | if (!bh) { | 770 | if (!bh) { |
768 | err = -EIO; | 771 | err = -EIO; |
769 | goto cleanup; | 772 | goto cleanup; |
@@ -783,9 +786,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
783 | fidx->ei_block = border; | 786 | fidx->ei_block = border; |
784 | ext4_idx_store_pblock(fidx, oldblock); | 787 | ext4_idx_store_pblock(fidx, oldblock); |
785 | 788 | ||
786 | ext_debug("int.index at %d (block %llu): %lu -> %llu\n", i, | 789 | ext_debug("int.index at %d (block %llu): %u -> %llu\n", |
787 | newblock, (unsigned long) le32_to_cpu(border), | 790 | i, newblock, le32_to_cpu(border), oldblock); |
788 | oldblock); | ||
789 | /* copy indexes */ | 791 | /* copy indexes */ |
790 | m = 0; | 792 | m = 0; |
791 | path[i].p_idx++; | 793 | path[i].p_idx++; |
@@ -851,7 +853,7 @@ cleanup: | |||
851 | for (i = 0; i < depth; i++) { | 853 | for (i = 0; i < depth; i++) { |
852 | if (!ablocks[i]) | 854 | if (!ablocks[i]) |
853 | continue; | 855 | continue; |
854 | ext4_free_blocks(handle, inode, ablocks[i], 1); | 856 | ext4_free_blocks(handle, inode, ablocks[i], 1, 1); |
855 | } | 857 | } |
856 | } | 858 | } |
857 | kfree(ablocks); | 859 | kfree(ablocks); |
@@ -979,8 +981,8 @@ repeat: | |||
979 | /* refill path */ | 981 | /* refill path */ |
980 | ext4_ext_drop_refs(path); | 982 | ext4_ext_drop_refs(path); |
981 | path = ext4_ext_find_extent(inode, | 983 | path = ext4_ext_find_extent(inode, |
982 | le32_to_cpu(newext->ee_block), | 984 | (ext4_lblk_t)le32_to_cpu(newext->ee_block), |
983 | path); | 985 | path); |
984 | if (IS_ERR(path)) | 986 | if (IS_ERR(path)) |
985 | err = PTR_ERR(path); | 987 | err = PTR_ERR(path); |
986 | } else { | 988 | } else { |
@@ -992,8 +994,8 @@ repeat: | |||
992 | /* refill path */ | 994 | /* refill path */ |
993 | ext4_ext_drop_refs(path); | 995 | ext4_ext_drop_refs(path); |
994 | path = ext4_ext_find_extent(inode, | 996 | path = ext4_ext_find_extent(inode, |
995 | le32_to_cpu(newext->ee_block), | 997 | (ext4_lblk_t)le32_to_cpu(newext->ee_block), |
996 | path); | 998 | path); |
997 | if (IS_ERR(path)) { | 999 | if (IS_ERR(path)) { |
998 | err = PTR_ERR(path); | 1000 | err = PTR_ERR(path); |
999 | goto out; | 1001 | goto out; |
@@ -1015,13 +1017,157 @@ out: | |||
1015 | } | 1017 | } |
1016 | 1018 | ||
1017 | /* | 1019 | /* |
1020 | * search the closest allocated block to the left for *logical | ||
1021 | * and returns it at @logical + it's physical address at @phys | ||
1022 | * if *logical is the smallest allocated block, the function | ||
1023 | * returns 0 at @phys | ||
1024 | * return value contains 0 (success) or error code | ||
1025 | */ | ||
1026 | int | ||
1027 | ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, | ||
1028 | ext4_lblk_t *logical, ext4_fsblk_t *phys) | ||
1029 | { | ||
1030 | struct ext4_extent_idx *ix; | ||
1031 | struct ext4_extent *ex; | ||
1032 | int depth, ee_len; | ||
1033 | |||
1034 | BUG_ON(path == NULL); | ||
1035 | depth = path->p_depth; | ||
1036 | *phys = 0; | ||
1037 | |||
1038 | if (depth == 0 && path->p_ext == NULL) | ||
1039 | return 0; | ||
1040 | |||
1041 | /* usually extent in the path covers blocks smaller | ||
1042 | * then *logical, but it can be that extent is the | ||
1043 | * first one in the file */ | ||
1044 | |||
1045 | ex = path[depth].p_ext; | ||
1046 | ee_len = ext4_ext_get_actual_len(ex); | ||
1047 | if (*logical < le32_to_cpu(ex->ee_block)) { | ||
1048 | BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); | ||
1049 | while (--depth >= 0) { | ||
1050 | ix = path[depth].p_idx; | ||
1051 | BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); | ||
1052 | } | ||
1053 | return 0; | ||
1054 | } | ||
1055 | |||
1056 | BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len)); | ||
1057 | |||
1058 | *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; | ||
1059 | *phys = ext_pblock(ex) + ee_len - 1; | ||
1060 | return 0; | ||
1061 | } | ||
1062 | |||
1063 | /* | ||
1064 | * search the closest allocated block to the right for *logical | ||
1065 | * and returns it at @logical + it's physical address at @phys | ||
1066 | * if *logical is the smallest allocated block, the function | ||
1067 | * returns 0 at @phys | ||
1068 | * return value contains 0 (success) or error code | ||
1069 | */ | ||
1070 | int | ||
1071 | ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, | ||
1072 | ext4_lblk_t *logical, ext4_fsblk_t *phys) | ||
1073 | { | ||
1074 | struct buffer_head *bh = NULL; | ||
1075 | struct ext4_extent_header *eh; | ||
1076 | struct ext4_extent_idx *ix; | ||
1077 | struct ext4_extent *ex; | ||
1078 | ext4_fsblk_t block; | ||
1079 | int depth, ee_len; | ||
1080 | |||
1081 | BUG_ON(path == NULL); | ||
1082 | depth = path->p_depth; | ||
1083 | *phys = 0; | ||
1084 | |||
1085 | if (depth == 0 && path->p_ext == NULL) | ||
1086 | return 0; | ||
1087 | |||
1088 | /* usually extent in the path covers blocks smaller | ||
1089 | * then *logical, but it can be that extent is the | ||
1090 | * first one in the file */ | ||
1091 | |||
1092 | ex = path[depth].p_ext; | ||
1093 | ee_len = ext4_ext_get_actual_len(ex); | ||
1094 | if (*logical < le32_to_cpu(ex->ee_block)) { | ||
1095 | BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); | ||
1096 | while (--depth >= 0) { | ||
1097 | ix = path[depth].p_idx; | ||
1098 | BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); | ||
1099 | } | ||
1100 | *logical = le32_to_cpu(ex->ee_block); | ||
1101 | *phys = ext_pblock(ex); | ||
1102 | return 0; | ||
1103 | } | ||
1104 | |||
1105 | BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len)); | ||
1106 | |||
1107 | if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { | ||
1108 | /* next allocated block in this leaf */ | ||
1109 | ex++; | ||
1110 | *logical = le32_to_cpu(ex->ee_block); | ||
1111 | *phys = ext_pblock(ex); | ||
1112 | return 0; | ||
1113 | } | ||
1114 | |||
1115 | /* go up and search for index to the right */ | ||
1116 | while (--depth >= 0) { | ||
1117 | ix = path[depth].p_idx; | ||
1118 | if (ix != EXT_LAST_INDEX(path[depth].p_hdr)) | ||
1119 | break; | ||
1120 | } | ||
1121 | |||
1122 | if (depth < 0) { | ||
1123 | /* we've gone up to the root and | ||
1124 | * found no index to the right */ | ||
1125 | return 0; | ||
1126 | } | ||
1127 | |||
1128 | /* we've found index to the right, let's | ||
1129 | * follow it and find the closest allocated | ||
1130 | * block to the right */ | ||
1131 | ix++; | ||
1132 | block = idx_pblock(ix); | ||
1133 | while (++depth < path->p_depth) { | ||
1134 | bh = sb_bread(inode->i_sb, block); | ||
1135 | if (bh == NULL) | ||
1136 | return -EIO; | ||
1137 | eh = ext_block_hdr(bh); | ||
1138 | if (ext4_ext_check_header(inode, eh, depth)) { | ||
1139 | put_bh(bh); | ||
1140 | return -EIO; | ||
1141 | } | ||
1142 | ix = EXT_FIRST_INDEX(eh); | ||
1143 | block = idx_pblock(ix); | ||
1144 | put_bh(bh); | ||
1145 | } | ||
1146 | |||
1147 | bh = sb_bread(inode->i_sb, block); | ||
1148 | if (bh == NULL) | ||
1149 | return -EIO; | ||
1150 | eh = ext_block_hdr(bh); | ||
1151 | if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) { | ||
1152 | put_bh(bh); | ||
1153 | return -EIO; | ||
1154 | } | ||
1155 | ex = EXT_FIRST_EXTENT(eh); | ||
1156 | *logical = le32_to_cpu(ex->ee_block); | ||
1157 | *phys = ext_pblock(ex); | ||
1158 | put_bh(bh); | ||
1159 | return 0; | ||
1160 | |||
1161 | } | ||
1162 | |||
1163 | /* | ||
1018 | * ext4_ext_next_allocated_block: | 1164 | * ext4_ext_next_allocated_block: |
1019 | * returns allocated block in subsequent extent or EXT_MAX_BLOCK. | 1165 | * returns allocated block in subsequent extent or EXT_MAX_BLOCK. |
1020 | * NOTE: it considers block number from index entry as | 1166 | * NOTE: it considers block number from index entry as |
1021 | * allocated block. Thus, index entries have to be consistent | 1167 | * allocated block. Thus, index entries have to be consistent |
1022 | * with leaves. | 1168 | * with leaves. |
1023 | */ | 1169 | */ |
1024 | static unsigned long | 1170 | static ext4_lblk_t |
1025 | ext4_ext_next_allocated_block(struct ext4_ext_path *path) | 1171 | ext4_ext_next_allocated_block(struct ext4_ext_path *path) |
1026 | { | 1172 | { |
1027 | int depth; | 1173 | int depth; |
@@ -1054,7 +1200,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path) | |||
1054 | * ext4_ext_next_leaf_block: | 1200 | * ext4_ext_next_leaf_block: |
1055 | * returns first allocated block from next leaf or EXT_MAX_BLOCK | 1201 | * returns first allocated block from next leaf or EXT_MAX_BLOCK |
1056 | */ | 1202 | */ |
1057 | static unsigned ext4_ext_next_leaf_block(struct inode *inode, | 1203 | static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, |
1058 | struct ext4_ext_path *path) | 1204 | struct ext4_ext_path *path) |
1059 | { | 1205 | { |
1060 | int depth; | 1206 | int depth; |
@@ -1072,7 +1218,8 @@ static unsigned ext4_ext_next_leaf_block(struct inode *inode, | |||
1072 | while (depth >= 0) { | 1218 | while (depth >= 0) { |
1073 | if (path[depth].p_idx != | 1219 | if (path[depth].p_idx != |
1074 | EXT_LAST_INDEX(path[depth].p_hdr)) | 1220 | EXT_LAST_INDEX(path[depth].p_hdr)) |
1075 | return le32_to_cpu(path[depth].p_idx[1].ei_block); | 1221 | return (ext4_lblk_t) |
1222 | le32_to_cpu(path[depth].p_idx[1].ei_block); | ||
1076 | depth--; | 1223 | depth--; |
1077 | } | 1224 | } |
1078 | 1225 | ||
@@ -1085,7 +1232,7 @@ static unsigned ext4_ext_next_leaf_block(struct inode *inode, | |||
1085 | * then we have to correct all indexes above. | 1232 | * then we have to correct all indexes above. |
1086 | * TODO: do we need to correct tree in all cases? | 1233 | * TODO: do we need to correct tree in all cases? |
1087 | */ | 1234 | */ |
1088 | int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, | 1235 | static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, |
1089 | struct ext4_ext_path *path) | 1236 | struct ext4_ext_path *path) |
1090 | { | 1237 | { |
1091 | struct ext4_extent_header *eh; | 1238 | struct ext4_extent_header *eh; |
@@ -1171,7 +1318,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
1171 | if (ext1_ee_len + ext2_ee_len > max_len) | 1318 | if (ext1_ee_len + ext2_ee_len > max_len) |
1172 | return 0; | 1319 | return 0; |
1173 | #ifdef AGGRESSIVE_TEST | 1320 | #ifdef AGGRESSIVE_TEST |
1174 | if (le16_to_cpu(ex1->ee_len) >= 4) | 1321 | if (ext1_ee_len >= 4) |
1175 | return 0; | 1322 | return 0; |
1176 | #endif | 1323 | #endif |
1177 | 1324 | ||
@@ -1239,7 +1386,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode, | |||
1239 | struct ext4_extent *newext, | 1386 | struct ext4_extent *newext, |
1240 | struct ext4_ext_path *path) | 1387 | struct ext4_ext_path *path) |
1241 | { | 1388 | { |
1242 | unsigned long b1, b2; | 1389 | ext4_lblk_t b1, b2; |
1243 | unsigned int depth, len1; | 1390 | unsigned int depth, len1; |
1244 | unsigned int ret = 0; | 1391 | unsigned int ret = 0; |
1245 | 1392 | ||
@@ -1260,7 +1407,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode, | |||
1260 | goto out; | 1407 | goto out; |
1261 | } | 1408 | } |
1262 | 1409 | ||
1263 | /* check for wrap through zero */ | 1410 | /* check for wrap through zero on extent logical start block*/ |
1264 | if (b1 + len1 < b1) { | 1411 | if (b1 + len1 < b1) { |
1265 | len1 = EXT_MAX_BLOCK - b1; | 1412 | len1 = EXT_MAX_BLOCK - b1; |
1266 | newext->ee_len = cpu_to_le16(len1); | 1413 | newext->ee_len = cpu_to_le16(len1); |
@@ -1290,7 +1437,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1290 | struct ext4_extent *ex, *fex; | 1437 | struct ext4_extent *ex, *fex; |
1291 | struct ext4_extent *nearex; /* nearest extent */ | 1438 | struct ext4_extent *nearex; /* nearest extent */ |
1292 | struct ext4_ext_path *npath = NULL; | 1439 | struct ext4_ext_path *npath = NULL; |
1293 | int depth, len, err, next; | 1440 | int depth, len, err; |
1441 | ext4_lblk_t next; | ||
1294 | unsigned uninitialized = 0; | 1442 | unsigned uninitialized = 0; |
1295 | 1443 | ||
1296 | BUG_ON(ext4_ext_get_actual_len(newext) == 0); | 1444 | BUG_ON(ext4_ext_get_actual_len(newext) == 0); |
@@ -1435,114 +1583,8 @@ cleanup: | |||
1435 | return err; | 1583 | return err; |
1436 | } | 1584 | } |
1437 | 1585 | ||
1438 | int ext4_ext_walk_space(struct inode *inode, unsigned long block, | ||
1439 | unsigned long num, ext_prepare_callback func, | ||
1440 | void *cbdata) | ||
1441 | { | ||
1442 | struct ext4_ext_path *path = NULL; | ||
1443 | struct ext4_ext_cache cbex; | ||
1444 | struct ext4_extent *ex; | ||
1445 | unsigned long next, start = 0, end = 0; | ||
1446 | unsigned long last = block + num; | ||
1447 | int depth, exists, err = 0; | ||
1448 | |||
1449 | BUG_ON(func == NULL); | ||
1450 | BUG_ON(inode == NULL); | ||
1451 | |||
1452 | while (block < last && block != EXT_MAX_BLOCK) { | ||
1453 | num = last - block; | ||
1454 | /* find extent for this block */ | ||
1455 | path = ext4_ext_find_extent(inode, block, path); | ||
1456 | if (IS_ERR(path)) { | ||
1457 | err = PTR_ERR(path); | ||
1458 | path = NULL; | ||
1459 | break; | ||
1460 | } | ||
1461 | |||
1462 | depth = ext_depth(inode); | ||
1463 | BUG_ON(path[depth].p_hdr == NULL); | ||
1464 | ex = path[depth].p_ext; | ||
1465 | next = ext4_ext_next_allocated_block(path); | ||
1466 | |||
1467 | exists = 0; | ||
1468 | if (!ex) { | ||
1469 | /* there is no extent yet, so try to allocate | ||
1470 | * all requested space */ | ||
1471 | start = block; | ||
1472 | end = block + num; | ||
1473 | } else if (le32_to_cpu(ex->ee_block) > block) { | ||
1474 | /* need to allocate space before found extent */ | ||
1475 | start = block; | ||
1476 | end = le32_to_cpu(ex->ee_block); | ||
1477 | if (block + num < end) | ||
1478 | end = block + num; | ||
1479 | } else if (block >= le32_to_cpu(ex->ee_block) | ||
1480 | + ext4_ext_get_actual_len(ex)) { | ||
1481 | /* need to allocate space after found extent */ | ||
1482 | start = block; | ||
1483 | end = block + num; | ||
1484 | if (end >= next) | ||
1485 | end = next; | ||
1486 | } else if (block >= le32_to_cpu(ex->ee_block)) { | ||
1487 | /* | ||
1488 | * some part of requested space is covered | ||
1489 | * by found extent | ||
1490 | */ | ||
1491 | start = block; | ||
1492 | end = le32_to_cpu(ex->ee_block) | ||
1493 | + ext4_ext_get_actual_len(ex); | ||
1494 | if (block + num < end) | ||
1495 | end = block + num; | ||
1496 | exists = 1; | ||
1497 | } else { | ||
1498 | BUG(); | ||
1499 | } | ||
1500 | BUG_ON(end <= start); | ||
1501 | |||
1502 | if (!exists) { | ||
1503 | cbex.ec_block = start; | ||
1504 | cbex.ec_len = end - start; | ||
1505 | cbex.ec_start = 0; | ||
1506 | cbex.ec_type = EXT4_EXT_CACHE_GAP; | ||
1507 | } else { | ||
1508 | cbex.ec_block = le32_to_cpu(ex->ee_block); | ||
1509 | cbex.ec_len = ext4_ext_get_actual_len(ex); | ||
1510 | cbex.ec_start = ext_pblock(ex); | ||
1511 | cbex.ec_type = EXT4_EXT_CACHE_EXTENT; | ||
1512 | } | ||
1513 | |||
1514 | BUG_ON(cbex.ec_len == 0); | ||
1515 | err = func(inode, path, &cbex, cbdata); | ||
1516 | ext4_ext_drop_refs(path); | ||
1517 | |||
1518 | if (err < 0) | ||
1519 | break; | ||
1520 | if (err == EXT_REPEAT) | ||
1521 | continue; | ||
1522 | else if (err == EXT_BREAK) { | ||
1523 | err = 0; | ||
1524 | break; | ||
1525 | } | ||
1526 | |||
1527 | if (ext_depth(inode) != depth) { | ||
1528 | /* depth was changed. we have to realloc path */ | ||
1529 | kfree(path); | ||
1530 | path = NULL; | ||
1531 | } | ||
1532 | |||
1533 | block = cbex.ec_block + cbex.ec_len; | ||
1534 | } | ||
1535 | |||
1536 | if (path) { | ||
1537 | ext4_ext_drop_refs(path); | ||
1538 | kfree(path); | ||
1539 | } | ||
1540 | |||
1541 | return err; | ||
1542 | } | ||
1543 | |||
1544 | static void | 1586 | static void |
1545 | ext4_ext_put_in_cache(struct inode *inode, __u32 block, | 1587 | ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, |
1546 | __u32 len, ext4_fsblk_t start, int type) | 1588 | __u32 len, ext4_fsblk_t start, int type) |
1547 | { | 1589 | { |
1548 | struct ext4_ext_cache *cex; | 1590 | struct ext4_ext_cache *cex; |
@@ -1561,10 +1603,11 @@ ext4_ext_put_in_cache(struct inode *inode, __u32 block, | |||
1561 | */ | 1603 | */ |
1562 | static void | 1604 | static void |
1563 | ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | 1605 | ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, |
1564 | unsigned long block) | 1606 | ext4_lblk_t block) |
1565 | { | 1607 | { |
1566 | int depth = ext_depth(inode); | 1608 | int depth = ext_depth(inode); |
1567 | unsigned long lblock, len; | 1609 | unsigned long len; |
1610 | ext4_lblk_t lblock; | ||
1568 | struct ext4_extent *ex; | 1611 | struct ext4_extent *ex; |
1569 | 1612 | ||
1570 | ex = path[depth].p_ext; | 1613 | ex = path[depth].p_ext; |
@@ -1576,32 +1619,34 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
1576 | } else if (block < le32_to_cpu(ex->ee_block)) { | 1619 | } else if (block < le32_to_cpu(ex->ee_block)) { |
1577 | lblock = block; | 1620 | lblock = block; |
1578 | len = le32_to_cpu(ex->ee_block) - block; | 1621 | len = le32_to_cpu(ex->ee_block) - block; |
1579 | ext_debug("cache gap(before): %lu [%lu:%lu]", | 1622 | ext_debug("cache gap(before): %u [%u:%u]", |
1580 | (unsigned long) block, | 1623 | block, |
1581 | (unsigned long) le32_to_cpu(ex->ee_block), | 1624 | le32_to_cpu(ex->ee_block), |
1582 | (unsigned long) ext4_ext_get_actual_len(ex)); | 1625 | ext4_ext_get_actual_len(ex)); |
1583 | } else if (block >= le32_to_cpu(ex->ee_block) | 1626 | } else if (block >= le32_to_cpu(ex->ee_block) |
1584 | + ext4_ext_get_actual_len(ex)) { | 1627 | + ext4_ext_get_actual_len(ex)) { |
1628 | ext4_lblk_t next; | ||
1585 | lblock = le32_to_cpu(ex->ee_block) | 1629 | lblock = le32_to_cpu(ex->ee_block) |
1586 | + ext4_ext_get_actual_len(ex); | 1630 | + ext4_ext_get_actual_len(ex); |
1587 | len = ext4_ext_next_allocated_block(path); | 1631 | |
1588 | ext_debug("cache gap(after): [%lu:%lu] %lu", | 1632 | next = ext4_ext_next_allocated_block(path); |
1589 | (unsigned long) le32_to_cpu(ex->ee_block), | 1633 | ext_debug("cache gap(after): [%u:%u] %u", |
1590 | (unsigned long) ext4_ext_get_actual_len(ex), | 1634 | le32_to_cpu(ex->ee_block), |
1591 | (unsigned long) block); | 1635 | ext4_ext_get_actual_len(ex), |
1592 | BUG_ON(len == lblock); | 1636 | block); |
1593 | len = len - lblock; | 1637 | BUG_ON(next == lblock); |
1638 | len = next - lblock; | ||
1594 | } else { | 1639 | } else { |
1595 | lblock = len = 0; | 1640 | lblock = len = 0; |
1596 | BUG(); | 1641 | BUG(); |
1597 | } | 1642 | } |
1598 | 1643 | ||
1599 | ext_debug(" -> %lu:%lu\n", (unsigned long) lblock, len); | 1644 | ext_debug(" -> %u:%lu\n", lblock, len); |
1600 | ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP); | 1645 | ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP); |
1601 | } | 1646 | } |
1602 | 1647 | ||
1603 | static int | 1648 | static int |
1604 | ext4_ext_in_cache(struct inode *inode, unsigned long block, | 1649 | ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, |
1605 | struct ext4_extent *ex) | 1650 | struct ext4_extent *ex) |
1606 | { | 1651 | { |
1607 | struct ext4_ext_cache *cex; | 1652 | struct ext4_ext_cache *cex; |
@@ -1618,11 +1663,9 @@ ext4_ext_in_cache(struct inode *inode, unsigned long block, | |||
1618 | ex->ee_block = cpu_to_le32(cex->ec_block); | 1663 | ex->ee_block = cpu_to_le32(cex->ec_block); |
1619 | ext4_ext_store_pblock(ex, cex->ec_start); | 1664 | ext4_ext_store_pblock(ex, cex->ec_start); |
1620 | ex->ee_len = cpu_to_le16(cex->ec_len); | 1665 | ex->ee_len = cpu_to_le16(cex->ec_len); |
1621 | ext_debug("%lu cached by %lu:%lu:%llu\n", | 1666 | ext_debug("%u cached by %u:%u:%llu\n", |
1622 | (unsigned long) block, | 1667 | block, |
1623 | (unsigned long) cex->ec_block, | 1668 | cex->ec_block, cex->ec_len, cex->ec_start); |
1624 | (unsigned long) cex->ec_len, | ||
1625 | cex->ec_start); | ||
1626 | return cex->ec_type; | 1669 | return cex->ec_type; |
1627 | } | 1670 | } |
1628 | 1671 | ||
@@ -1636,7 +1679,7 @@ ext4_ext_in_cache(struct inode *inode, unsigned long block, | |||
1636 | * It's used in truncate case only, thus all requests are for | 1679 | * It's used in truncate case only, thus all requests are for |
1637 | * last index in the block only. | 1680 | * last index in the block only. |
1638 | */ | 1681 | */ |
1639 | int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | 1682 | static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, |
1640 | struct ext4_ext_path *path) | 1683 | struct ext4_ext_path *path) |
1641 | { | 1684 | { |
1642 | struct buffer_head *bh; | 1685 | struct buffer_head *bh; |
@@ -1657,7 +1700,7 @@ int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
1657 | ext_debug("index is empty, remove it, free block %llu\n", leaf); | 1700 | ext_debug("index is empty, remove it, free block %llu\n", leaf); |
1658 | bh = sb_find_get_block(inode->i_sb, leaf); | 1701 | bh = sb_find_get_block(inode->i_sb, leaf); |
1659 | ext4_forget(handle, 1, inode, bh, leaf); | 1702 | ext4_forget(handle, 1, inode, bh, leaf); |
1660 | ext4_free_blocks(handle, inode, leaf, 1); | 1703 | ext4_free_blocks(handle, inode, leaf, 1, 1); |
1661 | return err; | 1704 | return err; |
1662 | } | 1705 | } |
1663 | 1706 | ||
@@ -1666,7 +1709,7 @@ int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
1666 | * This routine returns max. credits that the extent tree can consume. | 1709 | * This routine returns max. credits that the extent tree can consume. |
1667 | * It should be OK for low-performance paths like ->writepage() | 1710 | * It should be OK for low-performance paths like ->writepage() |
1668 | * To allow many writing processes to fit into a single transaction, | 1711 | * To allow many writing processes to fit into a single transaction, |
1669 | * the caller should calculate credits under truncate_mutex and | 1712 | * the caller should calculate credits under i_data_sem and |
1670 | * pass the actual path. | 1713 | * pass the actual path. |
1671 | */ | 1714 | */ |
1672 | int ext4_ext_calc_credits_for_insert(struct inode *inode, | 1715 | int ext4_ext_calc_credits_for_insert(struct inode *inode, |
@@ -1714,12 +1757,14 @@ int ext4_ext_calc_credits_for_insert(struct inode *inode, | |||
1714 | 1757 | ||
1715 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | 1758 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, |
1716 | struct ext4_extent *ex, | 1759 | struct ext4_extent *ex, |
1717 | unsigned long from, unsigned long to) | 1760 | ext4_lblk_t from, ext4_lblk_t to) |
1718 | { | 1761 | { |
1719 | struct buffer_head *bh; | 1762 | struct buffer_head *bh; |
1720 | unsigned short ee_len = ext4_ext_get_actual_len(ex); | 1763 | unsigned short ee_len = ext4_ext_get_actual_len(ex); |
1721 | int i; | 1764 | int i, metadata = 0; |
1722 | 1765 | ||
1766 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
1767 | metadata = 1; | ||
1723 | #ifdef EXTENTS_STATS | 1768 | #ifdef EXTENTS_STATS |
1724 | { | 1769 | { |
1725 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1770 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
@@ -1738,42 +1783,45 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
1738 | if (from >= le32_to_cpu(ex->ee_block) | 1783 | if (from >= le32_to_cpu(ex->ee_block) |
1739 | && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { | 1784 | && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { |
1740 | /* tail removal */ | 1785 | /* tail removal */ |
1741 | unsigned long num; | 1786 | ext4_lblk_t num; |
1742 | ext4_fsblk_t start; | 1787 | ext4_fsblk_t start; |
1788 | |||
1743 | num = le32_to_cpu(ex->ee_block) + ee_len - from; | 1789 | num = le32_to_cpu(ex->ee_block) + ee_len - from; |
1744 | start = ext_pblock(ex) + ee_len - num; | 1790 | start = ext_pblock(ex) + ee_len - num; |
1745 | ext_debug("free last %lu blocks starting %llu\n", num, start); | 1791 | ext_debug("free last %u blocks starting %llu\n", num, start); |
1746 | for (i = 0; i < num; i++) { | 1792 | for (i = 0; i < num; i++) { |
1747 | bh = sb_find_get_block(inode->i_sb, start + i); | 1793 | bh = sb_find_get_block(inode->i_sb, start + i); |
1748 | ext4_forget(handle, 0, inode, bh, start + i); | 1794 | ext4_forget(handle, 0, inode, bh, start + i); |
1749 | } | 1795 | } |
1750 | ext4_free_blocks(handle, inode, start, num); | 1796 | ext4_free_blocks(handle, inode, start, num, metadata); |
1751 | } else if (from == le32_to_cpu(ex->ee_block) | 1797 | } else if (from == le32_to_cpu(ex->ee_block) |
1752 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { | 1798 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { |
1753 | printk("strange request: removal %lu-%lu from %u:%u\n", | 1799 | printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", |
1754 | from, to, le32_to_cpu(ex->ee_block), ee_len); | 1800 | from, to, le32_to_cpu(ex->ee_block), ee_len); |
1755 | } else { | 1801 | } else { |
1756 | printk("strange request: removal(2) %lu-%lu from %u:%u\n", | 1802 | printk(KERN_INFO "strange request: removal(2) " |
1757 | from, to, le32_to_cpu(ex->ee_block), ee_len); | 1803 | "%u-%u from %u:%u\n", |
1804 | from, to, le32_to_cpu(ex->ee_block), ee_len); | ||
1758 | } | 1805 | } |
1759 | return 0; | 1806 | return 0; |
1760 | } | 1807 | } |
1761 | 1808 | ||
1762 | static int | 1809 | static int |
1763 | ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | 1810 | ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, |
1764 | struct ext4_ext_path *path, unsigned long start) | 1811 | struct ext4_ext_path *path, ext4_lblk_t start) |
1765 | { | 1812 | { |
1766 | int err = 0, correct_index = 0; | 1813 | int err = 0, correct_index = 0; |
1767 | int depth = ext_depth(inode), credits; | 1814 | int depth = ext_depth(inode), credits; |
1768 | struct ext4_extent_header *eh; | 1815 | struct ext4_extent_header *eh; |
1769 | unsigned a, b, block, num; | 1816 | ext4_lblk_t a, b, block; |
1770 | unsigned long ex_ee_block; | 1817 | unsigned num; |
1818 | ext4_lblk_t ex_ee_block; | ||
1771 | unsigned short ex_ee_len; | 1819 | unsigned short ex_ee_len; |
1772 | unsigned uninitialized = 0; | 1820 | unsigned uninitialized = 0; |
1773 | struct ext4_extent *ex; | 1821 | struct ext4_extent *ex; |
1774 | 1822 | ||
1775 | /* the header must be checked already in ext4_ext_remove_space() */ | 1823 | /* the header must be checked already in ext4_ext_remove_space() */ |
1776 | ext_debug("truncate since %lu in leaf\n", start); | 1824 | ext_debug("truncate since %u in leaf\n", start); |
1777 | if (!path[depth].p_hdr) | 1825 | if (!path[depth].p_hdr) |
1778 | path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); | 1826 | path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); |
1779 | eh = path[depth].p_hdr; | 1827 | eh = path[depth].p_hdr; |
@@ -1904,7 +1952,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path) | |||
1904 | return 1; | 1952 | return 1; |
1905 | } | 1953 | } |
1906 | 1954 | ||
1907 | int ext4_ext_remove_space(struct inode *inode, unsigned long start) | 1955 | static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) |
1908 | { | 1956 | { |
1909 | struct super_block *sb = inode->i_sb; | 1957 | struct super_block *sb = inode->i_sb; |
1910 | int depth = ext_depth(inode); | 1958 | int depth = ext_depth(inode); |
@@ -1912,7 +1960,7 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start) | |||
1912 | handle_t *handle; | 1960 | handle_t *handle; |
1913 | int i = 0, err = 0; | 1961 | int i = 0, err = 0; |
1914 | 1962 | ||
1915 | ext_debug("truncate since %lu\n", start); | 1963 | ext_debug("truncate since %u\n", start); |
1916 | 1964 | ||
1917 | /* probably first extent we're gonna free will be last in block */ | 1965 | /* probably first extent we're gonna free will be last in block */ |
1918 | handle = ext4_journal_start(inode, depth + 1); | 1966 | handle = ext4_journal_start(inode, depth + 1); |
@@ -2094,17 +2142,19 @@ void ext4_ext_release(struct super_block *sb) | |||
2094 | * b> Splits in two extents: Write is happening at either end of the extent | 2142 | * b> Splits in two extents: Write is happening at either end of the extent |
2095 | * c> Splits in three extents: Somone is writing in middle of the extent | 2143 | * c> Splits in three extents: Somone is writing in middle of the extent |
2096 | */ | 2144 | */ |
2097 | int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode, | 2145 | static int ext4_ext_convert_to_initialized(handle_t *handle, |
2098 | struct ext4_ext_path *path, | 2146 | struct inode *inode, |
2099 | ext4_fsblk_t iblock, | 2147 | struct ext4_ext_path *path, |
2100 | unsigned long max_blocks) | 2148 | ext4_lblk_t iblock, |
2149 | unsigned long max_blocks) | ||
2101 | { | 2150 | { |
2102 | struct ext4_extent *ex, newex; | 2151 | struct ext4_extent *ex, newex; |
2103 | struct ext4_extent *ex1 = NULL; | 2152 | struct ext4_extent *ex1 = NULL; |
2104 | struct ext4_extent *ex2 = NULL; | 2153 | struct ext4_extent *ex2 = NULL; |
2105 | struct ext4_extent *ex3 = NULL; | 2154 | struct ext4_extent *ex3 = NULL; |
2106 | struct ext4_extent_header *eh; | 2155 | struct ext4_extent_header *eh; |
2107 | unsigned int allocated, ee_block, ee_len, depth; | 2156 | ext4_lblk_t ee_block; |
2157 | unsigned int allocated, ee_len, depth; | ||
2108 | ext4_fsblk_t newblock; | 2158 | ext4_fsblk_t newblock; |
2109 | int err = 0; | 2159 | int err = 0; |
2110 | int ret = 0; | 2160 | int ret = 0; |
@@ -2225,8 +2275,13 @@ out: | |||
2225 | return err ? err : allocated; | 2275 | return err ? err : allocated; |
2226 | } | 2276 | } |
2227 | 2277 | ||
2278 | /* | ||
2279 | * Need to be called with | ||
2280 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block | ||
2281 | * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) | ||
2282 | */ | ||
2228 | int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | 2283 | int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, |
2229 | ext4_fsblk_t iblock, | 2284 | ext4_lblk_t iblock, |
2230 | unsigned long max_blocks, struct buffer_head *bh_result, | 2285 | unsigned long max_blocks, struct buffer_head *bh_result, |
2231 | int create, int extend_disksize) | 2286 | int create, int extend_disksize) |
2232 | { | 2287 | { |
@@ -2236,11 +2291,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2236 | ext4_fsblk_t goal, newblock; | 2291 | ext4_fsblk_t goal, newblock; |
2237 | int err = 0, depth, ret; | 2292 | int err = 0, depth, ret; |
2238 | unsigned long allocated = 0; | 2293 | unsigned long allocated = 0; |
2294 | struct ext4_allocation_request ar; | ||
2239 | 2295 | ||
2240 | __clear_bit(BH_New, &bh_result->b_state); | 2296 | __clear_bit(BH_New, &bh_result->b_state); |
2241 | ext_debug("blocks %d/%lu requested for inode %u\n", (int) iblock, | 2297 | ext_debug("blocks %u/%lu requested for inode %u\n", |
2242 | max_blocks, (unsigned) inode->i_ino); | 2298 | iblock, max_blocks, inode->i_ino); |
2243 | mutex_lock(&EXT4_I(inode)->truncate_mutex); | ||
2244 | 2299 | ||
2245 | /* check in cache */ | 2300 | /* check in cache */ |
2246 | goal = ext4_ext_in_cache(inode, iblock, &newex); | 2301 | goal = ext4_ext_in_cache(inode, iblock, &newex); |
@@ -2260,7 +2315,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2260 | - le32_to_cpu(newex.ee_block) | 2315 | - le32_to_cpu(newex.ee_block) |
2261 | + ext_pblock(&newex); | 2316 | + ext_pblock(&newex); |
2262 | /* number of remaining blocks in the extent */ | 2317 | /* number of remaining blocks in the extent */ |
2263 | allocated = le16_to_cpu(newex.ee_len) - | 2318 | allocated = ext4_ext_get_actual_len(&newex) - |
2264 | (iblock - le32_to_cpu(newex.ee_block)); | 2319 | (iblock - le32_to_cpu(newex.ee_block)); |
2265 | goto out; | 2320 | goto out; |
2266 | } else { | 2321 | } else { |
@@ -2288,7 +2343,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2288 | 2343 | ||
2289 | ex = path[depth].p_ext; | 2344 | ex = path[depth].p_ext; |
2290 | if (ex) { | 2345 | if (ex) { |
2291 | unsigned long ee_block = le32_to_cpu(ex->ee_block); | 2346 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); |
2292 | ext4_fsblk_t ee_start = ext_pblock(ex); | 2347 | ext4_fsblk_t ee_start = ext_pblock(ex); |
2293 | unsigned short ee_len; | 2348 | unsigned short ee_len; |
2294 | 2349 | ||
@@ -2302,7 +2357,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2302 | newblock = iblock - ee_block + ee_start; | 2357 | newblock = iblock - ee_block + ee_start; |
2303 | /* number of remaining blocks in the extent */ | 2358 | /* number of remaining blocks in the extent */ |
2304 | allocated = ee_len - (iblock - ee_block); | 2359 | allocated = ee_len - (iblock - ee_block); |
2305 | ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock, | 2360 | ext_debug("%u fit into %lu:%d -> %llu\n", iblock, |
2306 | ee_block, ee_len, newblock); | 2361 | ee_block, ee_len, newblock); |
2307 | 2362 | ||
2308 | /* Do not put uninitialized extent in the cache */ | 2363 | /* Do not put uninitialized extent in the cache */ |
@@ -2320,9 +2375,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2320 | ret = ext4_ext_convert_to_initialized(handle, inode, | 2375 | ret = ext4_ext_convert_to_initialized(handle, inode, |
2321 | path, iblock, | 2376 | path, iblock, |
2322 | max_blocks); | 2377 | max_blocks); |
2323 | if (ret <= 0) | 2378 | if (ret <= 0) { |
2379 | err = ret; | ||
2324 | goto out2; | 2380 | goto out2; |
2325 | else | 2381 | } else |
2326 | allocated = ret; | 2382 | allocated = ret; |
2327 | goto outnew; | 2383 | goto outnew; |
2328 | } | 2384 | } |
@@ -2347,8 +2403,15 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2347 | if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) | 2403 | if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) |
2348 | ext4_init_block_alloc_info(inode); | 2404 | ext4_init_block_alloc_info(inode); |
2349 | 2405 | ||
2350 | /* allocate new block */ | 2406 | /* find neighbour allocated blocks */ |
2351 | goal = ext4_ext_find_goal(inode, path, iblock); | 2407 | ar.lleft = iblock; |
2408 | err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft); | ||
2409 | if (err) | ||
2410 | goto out2; | ||
2411 | ar.lright = iblock; | ||
2412 | err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); | ||
2413 | if (err) | ||
2414 | goto out2; | ||
2352 | 2415 | ||
2353 | /* | 2416 | /* |
2354 | * See if request is beyond maximum number of blocks we can have in | 2417 | * See if request is beyond maximum number of blocks we can have in |
@@ -2368,10 +2431,21 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2368 | newex.ee_len = cpu_to_le16(max_blocks); | 2431 | newex.ee_len = cpu_to_le16(max_blocks); |
2369 | err = ext4_ext_check_overlap(inode, &newex, path); | 2432 | err = ext4_ext_check_overlap(inode, &newex, path); |
2370 | if (err) | 2433 | if (err) |
2371 | allocated = le16_to_cpu(newex.ee_len); | 2434 | allocated = ext4_ext_get_actual_len(&newex); |
2372 | else | 2435 | else |
2373 | allocated = max_blocks; | 2436 | allocated = max_blocks; |
2374 | newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err); | 2437 | |
2438 | /* allocate new block */ | ||
2439 | ar.inode = inode; | ||
2440 | ar.goal = ext4_ext_find_goal(inode, path, iblock); | ||
2441 | ar.logical = iblock; | ||
2442 | ar.len = allocated; | ||
2443 | if (S_ISREG(inode->i_mode)) | ||
2444 | ar.flags = EXT4_MB_HINT_DATA; | ||
2445 | else | ||
2446 | /* disable in-core preallocation for non-regular files */ | ||
2447 | ar.flags = 0; | ||
2448 | newblock = ext4_mb_new_blocks(handle, &ar, &err); | ||
2375 | if (!newblock) | 2449 | if (!newblock) |
2376 | goto out2; | 2450 | goto out2; |
2377 | ext_debug("allocate new block: goal %llu, found %llu/%lu\n", | 2451 | ext_debug("allocate new block: goal %llu, found %llu/%lu\n", |
@@ -2379,14 +2453,17 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2379 | 2453 | ||
2380 | /* try to insert new extent into found leaf and return */ | 2454 | /* try to insert new extent into found leaf and return */ |
2381 | ext4_ext_store_pblock(&newex, newblock); | 2455 | ext4_ext_store_pblock(&newex, newblock); |
2382 | newex.ee_len = cpu_to_le16(allocated); | 2456 | newex.ee_len = cpu_to_le16(ar.len); |
2383 | if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */ | 2457 | if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */ |
2384 | ext4_ext_mark_uninitialized(&newex); | 2458 | ext4_ext_mark_uninitialized(&newex); |
2385 | err = ext4_ext_insert_extent(handle, inode, path, &newex); | 2459 | err = ext4_ext_insert_extent(handle, inode, path, &newex); |
2386 | if (err) { | 2460 | if (err) { |
2387 | /* free data blocks we just allocated */ | 2461 | /* free data blocks we just allocated */ |
2462 | /* not a good idea to call discard here directly, | ||
2463 | * but otherwise we'd need to call it every free() */ | ||
2464 | ext4_mb_discard_inode_preallocations(inode); | ||
2388 | ext4_free_blocks(handle, inode, ext_pblock(&newex), | 2465 | ext4_free_blocks(handle, inode, ext_pblock(&newex), |
2389 | le16_to_cpu(newex.ee_len)); | 2466 | ext4_ext_get_actual_len(&newex), 0); |
2390 | goto out2; | 2467 | goto out2; |
2391 | } | 2468 | } |
2392 | 2469 | ||
@@ -2395,6 +2472,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2395 | 2472 | ||
2396 | /* previous routine could use block we allocated */ | 2473 | /* previous routine could use block we allocated */ |
2397 | newblock = ext_pblock(&newex); | 2474 | newblock = ext_pblock(&newex); |
2475 | allocated = ext4_ext_get_actual_len(&newex); | ||
2398 | outnew: | 2476 | outnew: |
2399 | __set_bit(BH_New, &bh_result->b_state); | 2477 | __set_bit(BH_New, &bh_result->b_state); |
2400 | 2478 | ||
@@ -2414,8 +2492,6 @@ out2: | |||
2414 | ext4_ext_drop_refs(path); | 2492 | ext4_ext_drop_refs(path); |
2415 | kfree(path); | 2493 | kfree(path); |
2416 | } | 2494 | } |
2417 | mutex_unlock(&EXT4_I(inode)->truncate_mutex); | ||
2418 | |||
2419 | return err ? err : allocated; | 2495 | return err ? err : allocated; |
2420 | } | 2496 | } |
2421 | 2497 | ||
@@ -2423,7 +2499,7 @@ void ext4_ext_truncate(struct inode * inode, struct page *page) | |||
2423 | { | 2499 | { |
2424 | struct address_space *mapping = inode->i_mapping; | 2500 | struct address_space *mapping = inode->i_mapping; |
2425 | struct super_block *sb = inode->i_sb; | 2501 | struct super_block *sb = inode->i_sb; |
2426 | unsigned long last_block; | 2502 | ext4_lblk_t last_block; |
2427 | handle_t *handle; | 2503 | handle_t *handle; |
2428 | int err = 0; | 2504 | int err = 0; |
2429 | 2505 | ||
@@ -2445,9 +2521,11 @@ void ext4_ext_truncate(struct inode * inode, struct page *page) | |||
2445 | if (page) | 2521 | if (page) |
2446 | ext4_block_truncate_page(handle, page, mapping, inode->i_size); | 2522 | ext4_block_truncate_page(handle, page, mapping, inode->i_size); |
2447 | 2523 | ||
2448 | mutex_lock(&EXT4_I(inode)->truncate_mutex); | 2524 | down_write(&EXT4_I(inode)->i_data_sem); |
2449 | ext4_ext_invalidate_cache(inode); | 2525 | ext4_ext_invalidate_cache(inode); |
2450 | 2526 | ||
2527 | ext4_mb_discard_inode_preallocations(inode); | ||
2528 | |||
2451 | /* | 2529 | /* |
2452 | * TODO: optimization is possible here. | 2530 | * TODO: optimization is possible here. |
2453 | * Probably we need not scan at all, | 2531 | * Probably we need not scan at all, |
@@ -2481,7 +2559,7 @@ out_stop: | |||
2481 | if (inode->i_nlink) | 2559 | if (inode->i_nlink) |
2482 | ext4_orphan_del(handle, inode); | 2560 | ext4_orphan_del(handle, inode); |
2483 | 2561 | ||
2484 | mutex_unlock(&EXT4_I(inode)->truncate_mutex); | 2562 | up_write(&EXT4_I(inode)->i_data_sem); |
2485 | ext4_journal_stop(handle); | 2563 | ext4_journal_stop(handle); |
2486 | } | 2564 | } |
2487 | 2565 | ||
@@ -2516,7 +2594,8 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) | |||
2516 | long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) | 2594 | long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) |
2517 | { | 2595 | { |
2518 | handle_t *handle; | 2596 | handle_t *handle; |
2519 | ext4_fsblk_t block, max_blocks; | 2597 | ext4_lblk_t block; |
2598 | unsigned long max_blocks; | ||
2520 | ext4_fsblk_t nblocks = 0; | 2599 | ext4_fsblk_t nblocks = 0; |
2521 | int ret = 0; | 2600 | int ret = 0; |
2522 | int ret2 = 0; | 2601 | int ret2 = 0; |
@@ -2544,6 +2623,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) | |||
2544 | * modify 1 super block, 1 block bitmap and 1 group descriptor. | 2623 | * modify 1 super block, 1 block bitmap and 1 group descriptor. |
2545 | */ | 2624 | */ |
2546 | credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; | 2625 | credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; |
2626 | down_write((&EXT4_I(inode)->i_data_sem)); | ||
2547 | retry: | 2627 | retry: |
2548 | while (ret >= 0 && ret < max_blocks) { | 2628 | while (ret >= 0 && ret < max_blocks) { |
2549 | block = block + ret; | 2629 | block = block + ret; |
@@ -2557,12 +2637,12 @@ retry: | |||
2557 | ret = ext4_ext_get_blocks(handle, inode, block, | 2637 | ret = ext4_ext_get_blocks(handle, inode, block, |
2558 | max_blocks, &map_bh, | 2638 | max_blocks, &map_bh, |
2559 | EXT4_CREATE_UNINITIALIZED_EXT, 0); | 2639 | EXT4_CREATE_UNINITIALIZED_EXT, 0); |
2560 | WARN_ON(!ret); | 2640 | WARN_ON(ret <= 0); |
2561 | if (!ret) { | 2641 | if (ret <= 0) { |
2562 | ext4_error(inode->i_sb, "ext4_fallocate", | 2642 | ext4_error(inode->i_sb, "ext4_fallocate", |
2563 | "ext4_ext_get_blocks returned 0! inode#%lu" | 2643 | "ext4_ext_get_blocks returned error: " |
2564 | ", block=%llu, max_blocks=%llu", | 2644 | "inode#%lu, block=%u, max_blocks=%lu", |
2565 | inode->i_ino, block, max_blocks); | 2645 | inode->i_ino, block, max_blocks); |
2566 | ret = -EIO; | 2646 | ret = -EIO; |
2567 | ext4_mark_inode_dirty(handle, inode); | 2647 | ext4_mark_inode_dirty(handle, inode); |
2568 | ret2 = ext4_journal_stop(handle); | 2648 | ret2 = ext4_journal_stop(handle); |
@@ -2600,6 +2680,7 @@ retry: | |||
2600 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 2680 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
2601 | goto retry; | 2681 | goto retry; |
2602 | 2682 | ||
2683 | up_write((&EXT4_I(inode)->i_data_sem)); | ||
2603 | /* | 2684 | /* |
2604 | * Time to update the file size. | 2685 | * Time to update the file size. |
2605 | * Update only when preallocation was requested beyond the file size. | 2686 | * Update only when preallocation was requested beyond the file size. |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 1a81cd66d63b..ac35ec58db55 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -37,9 +37,9 @@ static int ext4_release_file (struct inode * inode, struct file * filp) | |||
37 | if ((filp->f_mode & FMODE_WRITE) && | 37 | if ((filp->f_mode & FMODE_WRITE) && |
38 | (atomic_read(&inode->i_writecount) == 1)) | 38 | (atomic_read(&inode->i_writecount) == 1)) |
39 | { | 39 | { |
40 | mutex_lock(&EXT4_I(inode)->truncate_mutex); | 40 | down_write(&EXT4_I(inode)->i_data_sem); |
41 | ext4_discard_reservation(inode); | 41 | ext4_discard_reservation(inode); |
42 | mutex_unlock(&EXT4_I(inode)->truncate_mutex); | 42 | up_write(&EXT4_I(inode)->i_data_sem); |
43 | } | 43 | } |
44 | if (is_dx(inode) && filp->private_data) | 44 | if (is_dx(inode) && filp->private_data) |
45 | ext4_htree_free_dir_info(filp->private_data); | 45 | ext4_htree_free_dir_info(filp->private_data); |
@@ -56,8 +56,25 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
56 | ssize_t ret; | 56 | ssize_t ret; |
57 | int err; | 57 | int err; |
58 | 58 | ||
59 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); | 59 | /* |
60 | * If we have encountered a bitmap-format file, the size limit | ||
61 | * is smaller than s_maxbytes, which is for extent-mapped files. | ||
62 | */ | ||
63 | |||
64 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) { | ||
65 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
66 | size_t length = iov_length(iov, nr_segs); | ||
60 | 67 | ||
68 | if (pos > sbi->s_bitmap_maxbytes) | ||
69 | return -EFBIG; | ||
70 | |||
71 | if (pos + length > sbi->s_bitmap_maxbytes) { | ||
72 | nr_segs = iov_shorten((struct iovec *)iov, nr_segs, | ||
73 | sbi->s_bitmap_maxbytes - pos); | ||
74 | } | ||
75 | } | ||
76 | |||
77 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
61 | /* | 78 | /* |
62 | * Skip flushing if there was an error, or if nothing was written. | 79 | * Skip flushing if there was an error, or if nothing was written. |
63 | */ | 80 | */ |
diff --git a/fs/ext4/group.h b/fs/ext4/group.h index 1577910bb58b..7eb0604e7eea 100644 --- a/fs/ext4/group.h +++ b/fs/ext4/group.h | |||
@@ -14,14 +14,16 @@ extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, | |||
14 | extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, | 14 | extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, |
15 | struct ext4_group_desc *gdp); | 15 | struct ext4_group_desc *gdp); |
16 | struct buffer_head *read_block_bitmap(struct super_block *sb, | 16 | struct buffer_head *read_block_bitmap(struct super_block *sb, |
17 | unsigned int block_group); | 17 | ext4_group_t block_group); |
18 | extern unsigned ext4_init_block_bitmap(struct super_block *sb, | 18 | extern unsigned ext4_init_block_bitmap(struct super_block *sb, |
19 | struct buffer_head *bh, int group, | 19 | struct buffer_head *bh, |
20 | ext4_group_t group, | ||
20 | struct ext4_group_desc *desc); | 21 | struct ext4_group_desc *desc); |
21 | #define ext4_free_blocks_after_init(sb, group, desc) \ | 22 | #define ext4_free_blocks_after_init(sb, group, desc) \ |
22 | ext4_init_block_bitmap(sb, NULL, group, desc) | 23 | ext4_init_block_bitmap(sb, NULL, group, desc) |
23 | extern unsigned ext4_init_inode_bitmap(struct super_block *sb, | 24 | extern unsigned ext4_init_inode_bitmap(struct super_block *sb, |
24 | struct buffer_head *bh, int group, | 25 | struct buffer_head *bh, |
26 | ext4_group_t group, | ||
25 | struct ext4_group_desc *desc); | 27 | struct ext4_group_desc *desc); |
26 | extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); | 28 | extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); |
27 | #endif /* _LINUX_EXT4_GROUP_H */ | 29 | #endif /* _LINUX_EXT4_GROUP_H */ |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index c61f37fd3f05..575b5215c808 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -64,8 +64,8 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) | |||
64 | } | 64 | } |
65 | 65 | ||
66 | /* Initializes an uninitialized inode bitmap */ | 66 | /* Initializes an uninitialized inode bitmap */ |
67 | unsigned ext4_init_inode_bitmap(struct super_block *sb, | 67 | unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, |
68 | struct buffer_head *bh, int block_group, | 68 | ext4_group_t block_group, |
69 | struct ext4_group_desc *gdp) | 69 | struct ext4_group_desc *gdp) |
70 | { | 70 | { |
71 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 71 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
@@ -75,7 +75,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, | |||
75 | /* If checksum is bad mark all blocks and inodes use to prevent | 75 | /* If checksum is bad mark all blocks and inodes use to prevent |
76 | * allocation, essentially implementing a per-group read-only flag. */ | 76 | * allocation, essentially implementing a per-group read-only flag. */ |
77 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { | 77 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { |
78 | ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n", | 78 | ext4_error(sb, __FUNCTION__, "Checksum bad for group %lu\n", |
79 | block_group); | 79 | block_group); |
80 | gdp->bg_free_blocks_count = 0; | 80 | gdp->bg_free_blocks_count = 0; |
81 | gdp->bg_free_inodes_count = 0; | 81 | gdp->bg_free_inodes_count = 0; |
@@ -98,7 +98,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, | |||
98 | * Return buffer_head of bitmap on success or NULL. | 98 | * Return buffer_head of bitmap on success or NULL. |
99 | */ | 99 | */ |
100 | static struct buffer_head * | 100 | static struct buffer_head * |
101 | read_inode_bitmap(struct super_block * sb, unsigned long block_group) | 101 | read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) |
102 | { | 102 | { |
103 | struct ext4_group_desc *desc; | 103 | struct ext4_group_desc *desc; |
104 | struct buffer_head *bh = NULL; | 104 | struct buffer_head *bh = NULL; |
@@ -152,7 +152,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) | |||
152 | unsigned long ino; | 152 | unsigned long ino; |
153 | struct buffer_head *bitmap_bh = NULL; | 153 | struct buffer_head *bitmap_bh = NULL; |
154 | struct buffer_head *bh2; | 154 | struct buffer_head *bh2; |
155 | unsigned long block_group; | 155 | ext4_group_t block_group; |
156 | unsigned long bit; | 156 | unsigned long bit; |
157 | struct ext4_group_desc * gdp; | 157 | struct ext4_group_desc * gdp; |
158 | struct ext4_super_block * es; | 158 | struct ext4_super_block * es; |
@@ -260,12 +260,14 @@ error_return: | |||
260 | * For other inodes, search forward from the parent directory\'s block | 260 | * For other inodes, search forward from the parent directory\'s block |
261 | * group to find a free inode. | 261 | * group to find a free inode. |
262 | */ | 262 | */ |
263 | static int find_group_dir(struct super_block *sb, struct inode *parent) | 263 | static int find_group_dir(struct super_block *sb, struct inode *parent, |
264 | ext4_group_t *best_group) | ||
264 | { | 265 | { |
265 | int ngroups = EXT4_SB(sb)->s_groups_count; | 266 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; |
266 | unsigned int freei, avefreei; | 267 | unsigned int freei, avefreei; |
267 | struct ext4_group_desc *desc, *best_desc = NULL; | 268 | struct ext4_group_desc *desc, *best_desc = NULL; |
268 | int group, best_group = -1; | 269 | ext4_group_t group; |
270 | int ret = -1; | ||
269 | 271 | ||
270 | freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); | 272 | freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); |
271 | avefreei = freei / ngroups; | 273 | avefreei = freei / ngroups; |
@@ -279,11 +281,12 @@ static int find_group_dir(struct super_block *sb, struct inode *parent) | |||
279 | if (!best_desc || | 281 | if (!best_desc || |
280 | (le16_to_cpu(desc->bg_free_blocks_count) > | 282 | (le16_to_cpu(desc->bg_free_blocks_count) > |
281 | le16_to_cpu(best_desc->bg_free_blocks_count))) { | 283 | le16_to_cpu(best_desc->bg_free_blocks_count))) { |
282 | best_group = group; | 284 | *best_group = group; |
283 | best_desc = desc; | 285 | best_desc = desc; |
286 | ret = 0; | ||
284 | } | 287 | } |
285 | } | 288 | } |
286 | return best_group; | 289 | return ret; |
287 | } | 290 | } |
288 | 291 | ||
289 | /* | 292 | /* |
@@ -314,12 +317,13 @@ static int find_group_dir(struct super_block *sb, struct inode *parent) | |||
314 | #define INODE_COST 64 | 317 | #define INODE_COST 64 |
315 | #define BLOCK_COST 256 | 318 | #define BLOCK_COST 256 |
316 | 319 | ||
317 | static int find_group_orlov(struct super_block *sb, struct inode *parent) | 320 | static int find_group_orlov(struct super_block *sb, struct inode *parent, |
321 | ext4_group_t *group) | ||
318 | { | 322 | { |
319 | int parent_group = EXT4_I(parent)->i_block_group; | 323 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; |
320 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 324 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
321 | struct ext4_super_block *es = sbi->s_es; | 325 | struct ext4_super_block *es = sbi->s_es; |
322 | int ngroups = sbi->s_groups_count; | 326 | ext4_group_t ngroups = sbi->s_groups_count; |
323 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); | 327 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); |
324 | unsigned int freei, avefreei; | 328 | unsigned int freei, avefreei; |
325 | ext4_fsblk_t freeb, avefreeb; | 329 | ext4_fsblk_t freeb, avefreeb; |
@@ -327,7 +331,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) | |||
327 | unsigned int ndirs; | 331 | unsigned int ndirs; |
328 | int max_debt, max_dirs, min_inodes; | 332 | int max_debt, max_dirs, min_inodes; |
329 | ext4_grpblk_t min_blocks; | 333 | ext4_grpblk_t min_blocks; |
330 | int group = -1, i; | 334 | ext4_group_t i; |
331 | struct ext4_group_desc *desc; | 335 | struct ext4_group_desc *desc; |
332 | 336 | ||
333 | freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); | 337 | freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); |
@@ -340,13 +344,14 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) | |||
340 | if ((parent == sb->s_root->d_inode) || | 344 | if ((parent == sb->s_root->d_inode) || |
341 | (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) { | 345 | (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) { |
342 | int best_ndir = inodes_per_group; | 346 | int best_ndir = inodes_per_group; |
343 | int best_group = -1; | 347 | ext4_group_t grp; |
348 | int ret = -1; | ||
344 | 349 | ||
345 | get_random_bytes(&group, sizeof(group)); | 350 | get_random_bytes(&grp, sizeof(grp)); |
346 | parent_group = (unsigned)group % ngroups; | 351 | parent_group = (unsigned)grp % ngroups; |
347 | for (i = 0; i < ngroups; i++) { | 352 | for (i = 0; i < ngroups; i++) { |
348 | group = (parent_group + i) % ngroups; | 353 | grp = (parent_group + i) % ngroups; |
349 | desc = ext4_get_group_desc (sb, group, NULL); | 354 | desc = ext4_get_group_desc(sb, grp, NULL); |
350 | if (!desc || !desc->bg_free_inodes_count) | 355 | if (!desc || !desc->bg_free_inodes_count) |
351 | continue; | 356 | continue; |
352 | if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) | 357 | if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) |
@@ -355,11 +360,12 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) | |||
355 | continue; | 360 | continue; |
356 | if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) | 361 | if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) |
357 | continue; | 362 | continue; |
358 | best_group = group; | 363 | *group = grp; |
364 | ret = 0; | ||
359 | best_ndir = le16_to_cpu(desc->bg_used_dirs_count); | 365 | best_ndir = le16_to_cpu(desc->bg_used_dirs_count); |
360 | } | 366 | } |
361 | if (best_group >= 0) | 367 | if (ret == 0) |
362 | return best_group; | 368 | return ret; |
363 | goto fallback; | 369 | goto fallback; |
364 | } | 370 | } |
365 | 371 | ||
@@ -380,8 +386,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) | |||
380 | max_debt = 1; | 386 | max_debt = 1; |
381 | 387 | ||
382 | for (i = 0; i < ngroups; i++) { | 388 | for (i = 0; i < ngroups; i++) { |
383 | group = (parent_group + i) % ngroups; | 389 | *group = (parent_group + i) % ngroups; |
384 | desc = ext4_get_group_desc (sb, group, NULL); | 390 | desc = ext4_get_group_desc(sb, *group, NULL); |
385 | if (!desc || !desc->bg_free_inodes_count) | 391 | if (!desc || !desc->bg_free_inodes_count) |
386 | continue; | 392 | continue; |
387 | if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) | 393 | if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) |
@@ -390,17 +396,16 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) | |||
390 | continue; | 396 | continue; |
391 | if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) | 397 | if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) |
392 | continue; | 398 | continue; |
393 | return group; | 399 | return 0; |
394 | } | 400 | } |
395 | 401 | ||
396 | fallback: | 402 | fallback: |
397 | for (i = 0; i < ngroups; i++) { | 403 | for (i = 0; i < ngroups; i++) { |
398 | group = (parent_group + i) % ngroups; | 404 | *group = (parent_group + i) % ngroups; |
399 | desc = ext4_get_group_desc (sb, group, NULL); | 405 | desc = ext4_get_group_desc(sb, *group, NULL); |
400 | if (!desc || !desc->bg_free_inodes_count) | 406 | if (desc && desc->bg_free_inodes_count && |
401 | continue; | 407 | le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) |
402 | if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) | 408 | return 0; |
403 | return group; | ||
404 | } | 409 | } |
405 | 410 | ||
406 | if (avefreei) { | 411 | if (avefreei) { |
@@ -415,21 +420,22 @@ fallback: | |||
415 | return -1; | 420 | return -1; |
416 | } | 421 | } |
417 | 422 | ||
418 | static int find_group_other(struct super_block *sb, struct inode *parent) | 423 | static int find_group_other(struct super_block *sb, struct inode *parent, |
424 | ext4_group_t *group) | ||
419 | { | 425 | { |
420 | int parent_group = EXT4_I(parent)->i_block_group; | 426 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; |
421 | int ngroups = EXT4_SB(sb)->s_groups_count; | 427 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; |
422 | struct ext4_group_desc *desc; | 428 | struct ext4_group_desc *desc; |
423 | int group, i; | 429 | ext4_group_t i; |
424 | 430 | ||
425 | /* | 431 | /* |
426 | * Try to place the inode in its parent directory | 432 | * Try to place the inode in its parent directory |
427 | */ | 433 | */ |
428 | group = parent_group; | 434 | *group = parent_group; |
429 | desc = ext4_get_group_desc (sb, group, NULL); | 435 | desc = ext4_get_group_desc(sb, *group, NULL); |
430 | if (desc && le16_to_cpu(desc->bg_free_inodes_count) && | 436 | if (desc && le16_to_cpu(desc->bg_free_inodes_count) && |
431 | le16_to_cpu(desc->bg_free_blocks_count)) | 437 | le16_to_cpu(desc->bg_free_blocks_count)) |
432 | return group; | 438 | return 0; |
433 | 439 | ||
434 | /* | 440 | /* |
435 | * We're going to place this inode in a different blockgroup from its | 441 | * We're going to place this inode in a different blockgroup from its |
@@ -440,33 +446,33 @@ static int find_group_other(struct super_block *sb, struct inode *parent) | |||
440 | * | 446 | * |
441 | * So add our directory's i_ino into the starting point for the hash. | 447 | * So add our directory's i_ino into the starting point for the hash. |
442 | */ | 448 | */ |
443 | group = (group + parent->i_ino) % ngroups; | 449 | *group = (*group + parent->i_ino) % ngroups; |
444 | 450 | ||
445 | /* | 451 | /* |
446 | * Use a quadratic hash to find a group with a free inode and some free | 452 | * Use a quadratic hash to find a group with a free inode and some free |
447 | * blocks. | 453 | * blocks. |
448 | */ | 454 | */ |
449 | for (i = 1; i < ngroups; i <<= 1) { | 455 | for (i = 1; i < ngroups; i <<= 1) { |
450 | group += i; | 456 | *group += i; |
451 | if (group >= ngroups) | 457 | if (*group >= ngroups) |
452 | group -= ngroups; | 458 | *group -= ngroups; |
453 | desc = ext4_get_group_desc (sb, group, NULL); | 459 | desc = ext4_get_group_desc(sb, *group, NULL); |
454 | if (desc && le16_to_cpu(desc->bg_free_inodes_count) && | 460 | if (desc && le16_to_cpu(desc->bg_free_inodes_count) && |
455 | le16_to_cpu(desc->bg_free_blocks_count)) | 461 | le16_to_cpu(desc->bg_free_blocks_count)) |
456 | return group; | 462 | return 0; |
457 | } | 463 | } |
458 | 464 | ||
459 | /* | 465 | /* |
460 | * That failed: try linear search for a free inode, even if that group | 466 | * That failed: try linear search for a free inode, even if that group |
461 | * has no free blocks. | 467 | * has no free blocks. |
462 | */ | 468 | */ |
463 | group = parent_group; | 469 | *group = parent_group; |
464 | for (i = 0; i < ngroups; i++) { | 470 | for (i = 0; i < ngroups; i++) { |
465 | if (++group >= ngroups) | 471 | if (++*group >= ngroups) |
466 | group = 0; | 472 | *group = 0; |
467 | desc = ext4_get_group_desc (sb, group, NULL); | 473 | desc = ext4_get_group_desc(sb, *group, NULL); |
468 | if (desc && le16_to_cpu(desc->bg_free_inodes_count)) | 474 | if (desc && le16_to_cpu(desc->bg_free_inodes_count)) |
469 | return group; | 475 | return 0; |
470 | } | 476 | } |
471 | 477 | ||
472 | return -1; | 478 | return -1; |
@@ -487,16 +493,17 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | |||
487 | struct super_block *sb; | 493 | struct super_block *sb; |
488 | struct buffer_head *bitmap_bh = NULL; | 494 | struct buffer_head *bitmap_bh = NULL; |
489 | struct buffer_head *bh2; | 495 | struct buffer_head *bh2; |
490 | int group; | 496 | ext4_group_t group = 0; |
491 | unsigned long ino = 0; | 497 | unsigned long ino = 0; |
492 | struct inode * inode; | 498 | struct inode * inode; |
493 | struct ext4_group_desc * gdp = NULL; | 499 | struct ext4_group_desc * gdp = NULL; |
494 | struct ext4_super_block * es; | 500 | struct ext4_super_block * es; |
495 | struct ext4_inode_info *ei; | 501 | struct ext4_inode_info *ei; |
496 | struct ext4_sb_info *sbi; | 502 | struct ext4_sb_info *sbi; |
497 | int err = 0; | 503 | int ret2, err = 0; |
498 | struct inode *ret; | 504 | struct inode *ret; |
499 | int i, free = 0; | 505 | ext4_group_t i; |
506 | int free = 0; | ||
500 | 507 | ||
501 | /* Cannot create files in a deleted directory */ | 508 | /* Cannot create files in a deleted directory */ |
502 | if (!dir || !dir->i_nlink) | 509 | if (!dir || !dir->i_nlink) |
@@ -512,14 +519,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | |||
512 | es = sbi->s_es; | 519 | es = sbi->s_es; |
513 | if (S_ISDIR(mode)) { | 520 | if (S_ISDIR(mode)) { |
514 | if (test_opt (sb, OLDALLOC)) | 521 | if (test_opt (sb, OLDALLOC)) |
515 | group = find_group_dir(sb, dir); | 522 | ret2 = find_group_dir(sb, dir, &group); |
516 | else | 523 | else |
517 | group = find_group_orlov(sb, dir); | 524 | ret2 = find_group_orlov(sb, dir, &group); |
518 | } else | 525 | } else |
519 | group = find_group_other(sb, dir); | 526 | ret2 = find_group_other(sb, dir, &group); |
520 | 527 | ||
521 | err = -ENOSPC; | 528 | err = -ENOSPC; |
522 | if (group == -1) | 529 | if (ret2 == -1) |
523 | goto out; | 530 | goto out; |
524 | 531 | ||
525 | for (i = 0; i < sbi->s_groups_count; i++) { | 532 | for (i = 0; i < sbi->s_groups_count; i++) { |
@@ -583,7 +590,7 @@ got: | |||
583 | ino > EXT4_INODES_PER_GROUP(sb)) { | 590 | ino > EXT4_INODES_PER_GROUP(sb)) { |
584 | ext4_error(sb, __FUNCTION__, | 591 | ext4_error(sb, __FUNCTION__, |
585 | "reserved inode or inode > inodes count - " | 592 | "reserved inode or inode > inodes count - " |
586 | "block_group = %d, inode=%lu", group, | 593 | "block_group = %lu, inode=%lu", group, |
587 | ino + group * EXT4_INODES_PER_GROUP(sb)); | 594 | ino + group * EXT4_INODES_PER_GROUP(sb)); |
588 | err = -EIO; | 595 | err = -EIO; |
589 | goto fail; | 596 | goto fail; |
@@ -702,7 +709,6 @@ got: | |||
702 | if (!S_ISDIR(mode)) | 709 | if (!S_ISDIR(mode)) |
703 | ei->i_flags &= ~EXT4_DIRSYNC_FL; | 710 | ei->i_flags &= ~EXT4_DIRSYNC_FL; |
704 | ei->i_file_acl = 0; | 711 | ei->i_file_acl = 0; |
705 | ei->i_dir_acl = 0; | ||
706 | ei->i_dtime = 0; | 712 | ei->i_dtime = 0; |
707 | ei->i_block_alloc_info = NULL; | 713 | ei->i_block_alloc_info = NULL; |
708 | ei->i_block_group = group; | 714 | ei->i_block_group = group; |
@@ -741,13 +747,10 @@ got: | |||
741 | if (test_opt(sb, EXTENTS)) { | 747 | if (test_opt(sb, EXTENTS)) { |
742 | EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; | 748 | EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; |
743 | ext4_ext_tree_init(handle, inode); | 749 | ext4_ext_tree_init(handle, inode); |
744 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { | 750 | err = ext4_update_incompat_feature(handle, sb, |
745 | err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); | 751 | EXT4_FEATURE_INCOMPAT_EXTENTS); |
746 | if (err) goto fail; | 752 | if (err) |
747 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS); | 753 | goto fail; |
748 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "call ext4_journal_dirty_metadata"); | ||
749 | err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); | ||
750 | } | ||
751 | } | 754 | } |
752 | 755 | ||
753 | ext4_debug("allocating inode %lu\n", inode->i_ino); | 756 | ext4_debug("allocating inode %lu\n", inode->i_ino); |
@@ -777,7 +780,7 @@ fail_drop: | |||
777 | struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) | 780 | struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) |
778 | { | 781 | { |
779 | unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count); | 782 | unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count); |
780 | unsigned long block_group; | 783 | ext4_group_t block_group; |
781 | int bit; | 784 | int bit; |
782 | struct buffer_head *bitmap_bh = NULL; | 785 | struct buffer_head *bitmap_bh = NULL; |
783 | struct inode *inode = NULL; | 786 | struct inode *inode = NULL; |
@@ -833,7 +836,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) | |||
833 | { | 836 | { |
834 | unsigned long desc_count; | 837 | unsigned long desc_count; |
835 | struct ext4_group_desc *gdp; | 838 | struct ext4_group_desc *gdp; |
836 | int i; | 839 | ext4_group_t i; |
837 | #ifdef EXT4FS_DEBUG | 840 | #ifdef EXT4FS_DEBUG |
838 | struct ext4_super_block *es; | 841 | struct ext4_super_block *es; |
839 | unsigned long bitmap_count, x; | 842 | unsigned long bitmap_count, x; |
@@ -854,7 +857,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) | |||
854 | continue; | 857 | continue; |
855 | 858 | ||
856 | x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); | 859 | x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); |
857 | printk("group %d: stored = %d, counted = %lu\n", | 860 | printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", |
858 | i, le16_to_cpu(gdp->bg_free_inodes_count), x); | 861 | i, le16_to_cpu(gdp->bg_free_inodes_count), x); |
859 | bitmap_count += x; | 862 | bitmap_count += x; |
860 | } | 863 | } |
@@ -879,7 +882,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) | |||
879 | unsigned long ext4_count_dirs (struct super_block * sb) | 882 | unsigned long ext4_count_dirs (struct super_block * sb) |
880 | { | 883 | { |
881 | unsigned long count = 0; | 884 | unsigned long count = 0; |
882 | int i; | 885 | ext4_group_t i; |
883 | 886 | ||
884 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 887 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { |
885 | struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL); | 888 | struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5489703d9573..bb717cbb749c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -105,7 +105,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, | |||
105 | */ | 105 | */ |
106 | static unsigned long blocks_for_truncate(struct inode *inode) | 106 | static unsigned long blocks_for_truncate(struct inode *inode) |
107 | { | 107 | { |
108 | unsigned long needed; | 108 | ext4_lblk_t needed; |
109 | 109 | ||
110 | needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); | 110 | needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); |
111 | 111 | ||
@@ -243,13 +243,6 @@ static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) | |||
243 | p->bh = bh; | 243 | p->bh = bh; |
244 | } | 244 | } |
245 | 245 | ||
246 | static int verify_chain(Indirect *from, Indirect *to) | ||
247 | { | ||
248 | while (from <= to && from->key == *from->p) | ||
249 | from++; | ||
250 | return (from > to); | ||
251 | } | ||
252 | |||
253 | /** | 246 | /** |
254 | * ext4_block_to_path - parse the block number into array of offsets | 247 | * ext4_block_to_path - parse the block number into array of offsets |
255 | * @inode: inode in question (we are only interested in its superblock) | 248 | * @inode: inode in question (we are only interested in its superblock) |
@@ -282,7 +275,8 @@ static int verify_chain(Indirect *from, Indirect *to) | |||
282 | */ | 275 | */ |
283 | 276 | ||
284 | static int ext4_block_to_path(struct inode *inode, | 277 | static int ext4_block_to_path(struct inode *inode, |
285 | long i_block, int offsets[4], int *boundary) | 278 | ext4_lblk_t i_block, |
279 | ext4_lblk_t offsets[4], int *boundary) | ||
286 | { | 280 | { |
287 | int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); | 281 | int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); |
288 | int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); | 282 | int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); |
@@ -313,7 +307,10 @@ static int ext4_block_to_path(struct inode *inode, | |||
313 | offsets[n++] = i_block & (ptrs - 1); | 307 | offsets[n++] = i_block & (ptrs - 1); |
314 | final = ptrs; | 308 | final = ptrs; |
315 | } else { | 309 | } else { |
316 | ext4_warning(inode->i_sb, "ext4_block_to_path", "block > big"); | 310 | ext4_warning(inode->i_sb, "ext4_block_to_path", |
311 | "block %lu > max", | ||
312 | i_block + direct_blocks + | ||
313 | indirect_blocks + double_blocks); | ||
317 | } | 314 | } |
318 | if (boundary) | 315 | if (boundary) |
319 | *boundary = final - 1 - (i_block & (ptrs - 1)); | 316 | *boundary = final - 1 - (i_block & (ptrs - 1)); |
@@ -344,12 +341,14 @@ static int ext4_block_to_path(struct inode *inode, | |||
344 | * (pointer to last triple returned, *@err == 0) | 341 | * (pointer to last triple returned, *@err == 0) |
345 | * or when it gets an IO error reading an indirect block | 342 | * or when it gets an IO error reading an indirect block |
346 | * (ditto, *@err == -EIO) | 343 | * (ditto, *@err == -EIO) |
347 | * or when it notices that chain had been changed while it was reading | ||
348 | * (ditto, *@err == -EAGAIN) | ||
349 | * or when it reads all @depth-1 indirect blocks successfully and finds | 344 | * or when it reads all @depth-1 indirect blocks successfully and finds |
350 | * the whole chain, all way to the data (returns %NULL, *err == 0). | 345 | * the whole chain, all way to the data (returns %NULL, *err == 0). |
346 | * | ||
347 | * Need to be called with | ||
348 | * down_read(&EXT4_I(inode)->i_data_sem) | ||
351 | */ | 349 | */ |
352 | static Indirect *ext4_get_branch(struct inode *inode, int depth, int *offsets, | 350 | static Indirect *ext4_get_branch(struct inode *inode, int depth, |
351 | ext4_lblk_t *offsets, | ||
353 | Indirect chain[4], int *err) | 352 | Indirect chain[4], int *err) |
354 | { | 353 | { |
355 | struct super_block *sb = inode->i_sb; | 354 | struct super_block *sb = inode->i_sb; |
@@ -365,9 +364,6 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, int *offsets, | |||
365 | bh = sb_bread(sb, le32_to_cpu(p->key)); | 364 | bh = sb_bread(sb, le32_to_cpu(p->key)); |
366 | if (!bh) | 365 | if (!bh) |
367 | goto failure; | 366 | goto failure; |
368 | /* Reader: pointers */ | ||
369 | if (!verify_chain(chain, p)) | ||
370 | goto changed; | ||
371 | add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); | 367 | add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); |
372 | /* Reader: end */ | 368 | /* Reader: end */ |
373 | if (!p->key) | 369 | if (!p->key) |
@@ -375,10 +371,6 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, int *offsets, | |||
375 | } | 371 | } |
376 | return NULL; | 372 | return NULL; |
377 | 373 | ||
378 | changed: | ||
379 | brelse(bh); | ||
380 | *err = -EAGAIN; | ||
381 | goto no_block; | ||
382 | failure: | 374 | failure: |
383 | *err = -EIO; | 375 | *err = -EIO; |
384 | no_block: | 376 | no_block: |
@@ -445,7 +437,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | |||
445 | * stores it in *@goal and returns zero. | 437 | * stores it in *@goal and returns zero. |
446 | */ | 438 | */ |
447 | 439 | ||
448 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, long block, | 440 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, |
449 | Indirect chain[4], Indirect *partial) | 441 | Indirect chain[4], Indirect *partial) |
450 | { | 442 | { |
451 | struct ext4_block_alloc_info *block_i; | 443 | struct ext4_block_alloc_info *block_i; |
@@ -559,7 +551,7 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
559 | return ret; | 551 | return ret; |
560 | failed_out: | 552 | failed_out: |
561 | for (i = 0; i <index; i++) | 553 | for (i = 0; i <index; i++) |
562 | ext4_free_blocks(handle, inode, new_blocks[i], 1); | 554 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); |
563 | return ret; | 555 | return ret; |
564 | } | 556 | } |
565 | 557 | ||
@@ -590,7 +582,7 @@ failed_out: | |||
590 | */ | 582 | */ |
591 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | 583 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, |
592 | int indirect_blks, int *blks, ext4_fsblk_t goal, | 584 | int indirect_blks, int *blks, ext4_fsblk_t goal, |
593 | int *offsets, Indirect *branch) | 585 | ext4_lblk_t *offsets, Indirect *branch) |
594 | { | 586 | { |
595 | int blocksize = inode->i_sb->s_blocksize; | 587 | int blocksize = inode->i_sb->s_blocksize; |
596 | int i, n = 0; | 588 | int i, n = 0; |
@@ -658,9 +650,9 @@ failed: | |||
658 | ext4_journal_forget(handle, branch[i].bh); | 650 | ext4_journal_forget(handle, branch[i].bh); |
659 | } | 651 | } |
660 | for (i = 0; i <indirect_blks; i++) | 652 | for (i = 0; i <indirect_blks; i++) |
661 | ext4_free_blocks(handle, inode, new_blocks[i], 1); | 653 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); |
662 | 654 | ||
663 | ext4_free_blocks(handle, inode, new_blocks[i], num); | 655 | ext4_free_blocks(handle, inode, new_blocks[i], num, 0); |
664 | 656 | ||
665 | return err; | 657 | return err; |
666 | } | 658 | } |
@@ -680,7 +672,7 @@ failed: | |||
680 | * chain to new block and return 0. | 672 | * chain to new block and return 0. |
681 | */ | 673 | */ |
682 | static int ext4_splice_branch(handle_t *handle, struct inode *inode, | 674 | static int ext4_splice_branch(handle_t *handle, struct inode *inode, |
683 | long block, Indirect *where, int num, int blks) | 675 | ext4_lblk_t block, Indirect *where, int num, int blks) |
684 | { | 676 | { |
685 | int i; | 677 | int i; |
686 | int err = 0; | 678 | int err = 0; |
@@ -757,9 +749,10 @@ err_out: | |||
757 | for (i = 1; i <= num; i++) { | 749 | for (i = 1; i <= num; i++) { |
758 | BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget"); | 750 | BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget"); |
759 | ext4_journal_forget(handle, where[i].bh); | 751 | ext4_journal_forget(handle, where[i].bh); |
760 | ext4_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1); | 752 | ext4_free_blocks(handle, inode, |
753 | le32_to_cpu(where[i-1].key), 1, 0); | ||
761 | } | 754 | } |
762 | ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks); | 755 | ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0); |
763 | 756 | ||
764 | return err; | 757 | return err; |
765 | } | 758 | } |
@@ -782,14 +775,19 @@ err_out: | |||
782 | * return > 0, # of blocks mapped or allocated. | 775 | * return > 0, # of blocks mapped or allocated. |
783 | * return = 0, if plain lookup failed. | 776 | * return = 0, if plain lookup failed. |
784 | * return < 0, error case. | 777 | * return < 0, error case. |
778 | * | ||
779 | * | ||
780 | * Need to be called with | ||
781 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block | ||
782 | * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) | ||
785 | */ | 783 | */ |
786 | int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | 784 | int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, |
787 | sector_t iblock, unsigned long maxblocks, | 785 | ext4_lblk_t iblock, unsigned long maxblocks, |
788 | struct buffer_head *bh_result, | 786 | struct buffer_head *bh_result, |
789 | int create, int extend_disksize) | 787 | int create, int extend_disksize) |
790 | { | 788 | { |
791 | int err = -EIO; | 789 | int err = -EIO; |
792 | int offsets[4]; | 790 | ext4_lblk_t offsets[4]; |
793 | Indirect chain[4]; | 791 | Indirect chain[4]; |
794 | Indirect *partial; | 792 | Indirect *partial; |
795 | ext4_fsblk_t goal; | 793 | ext4_fsblk_t goal; |
@@ -803,7 +801,8 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
803 | 801 | ||
804 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); | 802 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); |
805 | J_ASSERT(handle != NULL || create == 0); | 803 | J_ASSERT(handle != NULL || create == 0); |
806 | depth = ext4_block_to_path(inode,iblock,offsets,&blocks_to_boundary); | 804 | depth = ext4_block_to_path(inode, iblock, offsets, |
805 | &blocks_to_boundary); | ||
807 | 806 | ||
808 | if (depth == 0) | 807 | if (depth == 0) |
809 | goto out; | 808 | goto out; |
@@ -819,18 +818,6 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
819 | while (count < maxblocks && count <= blocks_to_boundary) { | 818 | while (count < maxblocks && count <= blocks_to_boundary) { |
820 | ext4_fsblk_t blk; | 819 | ext4_fsblk_t blk; |
821 | 820 | ||
822 | if (!verify_chain(chain, partial)) { | ||
823 | /* | ||
824 | * Indirect block might be removed by | ||
825 | * truncate while we were reading it. | ||
826 | * Handling of that case: forget what we've | ||
827 | * got now. Flag the err as EAGAIN, so it | ||
828 | * will reread. | ||
829 | */ | ||
830 | err = -EAGAIN; | ||
831 | count = 0; | ||
832 | break; | ||
833 | } | ||
834 | blk = le32_to_cpu(*(chain[depth-1].p + count)); | 821 | blk = le32_to_cpu(*(chain[depth-1].p + count)); |
835 | 822 | ||
836 | if (blk == first_block + count) | 823 | if (blk == first_block + count) |
@@ -838,44 +825,13 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
838 | else | 825 | else |
839 | break; | 826 | break; |
840 | } | 827 | } |
841 | if (err != -EAGAIN) | 828 | goto got_it; |
842 | goto got_it; | ||
843 | } | 829 | } |
844 | 830 | ||
845 | /* Next simple case - plain lookup or failed read of indirect block */ | 831 | /* Next simple case - plain lookup or failed read of indirect block */ |
846 | if (!create || err == -EIO) | 832 | if (!create || err == -EIO) |
847 | goto cleanup; | 833 | goto cleanup; |
848 | 834 | ||
849 | mutex_lock(&ei->truncate_mutex); | ||
850 | |||
851 | /* | ||
852 | * If the indirect block is missing while we are reading | ||
853 | * the chain(ext4_get_branch() returns -EAGAIN err), or | ||
854 | * if the chain has been changed after we grab the semaphore, | ||
855 | * (either because another process truncated this branch, or | ||
856 | * another get_block allocated this branch) re-grab the chain to see if | ||
857 | * the request block has been allocated or not. | ||
858 | * | ||
859 | * Since we already block the truncate/other get_block | ||
860 | * at this point, we will have the current copy of the chain when we | ||
861 | * splice the branch into the tree. | ||
862 | */ | ||
863 | if (err == -EAGAIN || !verify_chain(chain, partial)) { | ||
864 | while (partial > chain) { | ||
865 | brelse(partial->bh); | ||
866 | partial--; | ||
867 | } | ||
868 | partial = ext4_get_branch(inode, depth, offsets, chain, &err); | ||
869 | if (!partial) { | ||
870 | count++; | ||
871 | mutex_unlock(&ei->truncate_mutex); | ||
872 | if (err) | ||
873 | goto cleanup; | ||
874 | clear_buffer_new(bh_result); | ||
875 | goto got_it; | ||
876 | } | ||
877 | } | ||
878 | |||
879 | /* | 835 | /* |
880 | * Okay, we need to do block allocation. Lazily initialize the block | 836 | * Okay, we need to do block allocation. Lazily initialize the block |
881 | * allocation info here if necessary | 837 | * allocation info here if necessary |
@@ -911,13 +867,12 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
911 | err = ext4_splice_branch(handle, inode, iblock, | 867 | err = ext4_splice_branch(handle, inode, iblock, |
912 | partial, indirect_blks, count); | 868 | partial, indirect_blks, count); |
913 | /* | 869 | /* |
914 | * i_disksize growing is protected by truncate_mutex. Don't forget to | 870 | * i_disksize growing is protected by i_data_sem. Don't forget to |
915 | * protect it if you're about to implement concurrent | 871 | * protect it if you're about to implement concurrent |
916 | * ext4_get_block() -bzzz | 872 | * ext4_get_block() -bzzz |
917 | */ | 873 | */ |
918 | if (!err && extend_disksize && inode->i_size > ei->i_disksize) | 874 | if (!err && extend_disksize && inode->i_size > ei->i_disksize) |
919 | ei->i_disksize = inode->i_size; | 875 | ei->i_disksize = inode->i_size; |
920 | mutex_unlock(&ei->truncate_mutex); | ||
921 | if (err) | 876 | if (err) |
922 | goto cleanup; | 877 | goto cleanup; |
923 | 878 | ||
@@ -942,6 +897,47 @@ out: | |||
942 | 897 | ||
943 | #define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32) | 898 | #define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32) |
944 | 899 | ||
900 | int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | ||
901 | unsigned long max_blocks, struct buffer_head *bh, | ||
902 | int create, int extend_disksize) | ||
903 | { | ||
904 | int retval; | ||
905 | /* | ||
906 | * Try to see if we can get the block without requesting | ||
907 | * for new file system block. | ||
908 | */ | ||
909 | down_read((&EXT4_I(inode)->i_data_sem)); | ||
910 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | ||
911 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, | ||
912 | bh, 0, 0); | ||
913 | } else { | ||
914 | retval = ext4_get_blocks_handle(handle, | ||
915 | inode, block, max_blocks, bh, 0, 0); | ||
916 | } | ||
917 | up_read((&EXT4_I(inode)->i_data_sem)); | ||
918 | if (!create || (retval > 0)) | ||
919 | return retval; | ||
920 | |||
921 | /* | ||
922 | * We need to allocate new blocks which will result | ||
923 | * in i_data update | ||
924 | */ | ||
925 | down_write((&EXT4_I(inode)->i_data_sem)); | ||
926 | /* | ||
927 | * We need to check for EXT4 here because migrate | ||
928 | * could have changed the inode type in between | ||
929 | */ | ||
930 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | ||
931 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, | ||
932 | bh, create, extend_disksize); | ||
933 | } else { | ||
934 | retval = ext4_get_blocks_handle(handle, inode, block, | ||
935 | max_blocks, bh, create, extend_disksize); | ||
936 | } | ||
937 | up_write((&EXT4_I(inode)->i_data_sem)); | ||
938 | return retval; | ||
939 | } | ||
940 | |||
945 | static int ext4_get_block(struct inode *inode, sector_t iblock, | 941 | static int ext4_get_block(struct inode *inode, sector_t iblock, |
946 | struct buffer_head *bh_result, int create) | 942 | struct buffer_head *bh_result, int create) |
947 | { | 943 | { |
@@ -996,7 +992,7 @@ get_block: | |||
996 | * `handle' can be NULL if create is zero | 992 | * `handle' can be NULL if create is zero |
997 | */ | 993 | */ |
998 | struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | 994 | struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, |
999 | long block, int create, int *errp) | 995 | ext4_lblk_t block, int create, int *errp) |
1000 | { | 996 | { |
1001 | struct buffer_head dummy; | 997 | struct buffer_head dummy; |
1002 | int fatal = 0, err; | 998 | int fatal = 0, err; |
@@ -1063,7 +1059,7 @@ err: | |||
1063 | } | 1059 | } |
1064 | 1060 | ||
1065 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | 1061 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, |
1066 | int block, int create, int *err) | 1062 | ext4_lblk_t block, int create, int *err) |
1067 | { | 1063 | { |
1068 | struct buffer_head * bh; | 1064 | struct buffer_head * bh; |
1069 | 1065 | ||
@@ -1446,7 +1442,7 @@ static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) | |||
1446 | * ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ... | 1442 | * ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ... |
1447 | * | 1443 | * |
1448 | * Same applies to ext4_get_block(). We will deadlock on various things like | 1444 | * Same applies to ext4_get_block(). We will deadlock on various things like |
1449 | * lock_journal and i_truncate_mutex. | 1445 | * lock_journal and i_data_sem |
1450 | * | 1446 | * |
1451 | * Setting PF_MEMALLOC here doesn't work - too many internal memory | 1447 | * Setting PF_MEMALLOC here doesn't work - too many internal memory |
1452 | * allocations fail. | 1448 | * allocations fail. |
@@ -1828,7 +1824,8 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, | |||
1828 | { | 1824 | { |
1829 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 1825 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
1830 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 1826 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
1831 | unsigned blocksize, iblock, length, pos; | 1827 | unsigned blocksize, length, pos; |
1828 | ext4_lblk_t iblock; | ||
1832 | struct inode *inode = mapping->host; | 1829 | struct inode *inode = mapping->host; |
1833 | struct buffer_head *bh; | 1830 | struct buffer_head *bh; |
1834 | int err = 0; | 1831 | int err = 0; |
@@ -1964,7 +1961,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q) | |||
1964 | * (no partially truncated stuff there). */ | 1961 | * (no partially truncated stuff there). */ |
1965 | 1962 | ||
1966 | static Indirect *ext4_find_shared(struct inode *inode, int depth, | 1963 | static Indirect *ext4_find_shared(struct inode *inode, int depth, |
1967 | int offsets[4], Indirect chain[4], __le32 *top) | 1964 | ext4_lblk_t offsets[4], Indirect chain[4], __le32 *top) |
1968 | { | 1965 | { |
1969 | Indirect *partial, *p; | 1966 | Indirect *partial, *p; |
1970 | int k, err; | 1967 | int k, err; |
@@ -2048,15 +2045,15 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
2048 | for (p = first; p < last; p++) { | 2045 | for (p = first; p < last; p++) { |
2049 | u32 nr = le32_to_cpu(*p); | 2046 | u32 nr = le32_to_cpu(*p); |
2050 | if (nr) { | 2047 | if (nr) { |
2051 | struct buffer_head *bh; | 2048 | struct buffer_head *tbh; |
2052 | 2049 | ||
2053 | *p = 0; | 2050 | *p = 0; |
2054 | bh = sb_find_get_block(inode->i_sb, nr); | 2051 | tbh = sb_find_get_block(inode->i_sb, nr); |
2055 | ext4_forget(handle, 0, inode, bh, nr); | 2052 | ext4_forget(handle, 0, inode, tbh, nr); |
2056 | } | 2053 | } |
2057 | } | 2054 | } |
2058 | 2055 | ||
2059 | ext4_free_blocks(handle, inode, block_to_free, count); | 2056 | ext4_free_blocks(handle, inode, block_to_free, count, 0); |
2060 | } | 2057 | } |
2061 | 2058 | ||
2062 | /** | 2059 | /** |
@@ -2229,7 +2226,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
2229 | ext4_journal_test_restart(handle, inode); | 2226 | ext4_journal_test_restart(handle, inode); |
2230 | } | 2227 | } |
2231 | 2228 | ||
2232 | ext4_free_blocks(handle, inode, nr, 1); | 2229 | ext4_free_blocks(handle, inode, nr, 1, 1); |
2233 | 2230 | ||
2234 | if (parent_bh) { | 2231 | if (parent_bh) { |
2235 | /* | 2232 | /* |
@@ -2289,12 +2286,12 @@ void ext4_truncate(struct inode *inode) | |||
2289 | __le32 *i_data = ei->i_data; | 2286 | __le32 *i_data = ei->i_data; |
2290 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); | 2287 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); |
2291 | struct address_space *mapping = inode->i_mapping; | 2288 | struct address_space *mapping = inode->i_mapping; |
2292 | int offsets[4]; | 2289 | ext4_lblk_t offsets[4]; |
2293 | Indirect chain[4]; | 2290 | Indirect chain[4]; |
2294 | Indirect *partial; | 2291 | Indirect *partial; |
2295 | __le32 nr = 0; | 2292 | __le32 nr = 0; |
2296 | int n; | 2293 | int n; |
2297 | long last_block; | 2294 | ext4_lblk_t last_block; |
2298 | unsigned blocksize = inode->i_sb->s_blocksize; | 2295 | unsigned blocksize = inode->i_sb->s_blocksize; |
2299 | struct page *page; | 2296 | struct page *page; |
2300 | 2297 | ||
@@ -2320,8 +2317,10 @@ void ext4_truncate(struct inode *inode) | |||
2320 | return; | 2317 | return; |
2321 | } | 2318 | } |
2322 | 2319 | ||
2323 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 2320 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
2324 | return ext4_ext_truncate(inode, page); | 2321 | ext4_ext_truncate(inode, page); |
2322 | return; | ||
2323 | } | ||
2325 | 2324 | ||
2326 | handle = start_transaction(inode); | 2325 | handle = start_transaction(inode); |
2327 | if (IS_ERR(handle)) { | 2326 | if (IS_ERR(handle)) { |
@@ -2369,7 +2368,7 @@ void ext4_truncate(struct inode *inode) | |||
2369 | * From here we block out all ext4_get_block() callers who want to | 2368 | * From here we block out all ext4_get_block() callers who want to |
2370 | * modify the block allocation tree. | 2369 | * modify the block allocation tree. |
2371 | */ | 2370 | */ |
2372 | mutex_lock(&ei->truncate_mutex); | 2371 | down_write(&ei->i_data_sem); |
2373 | 2372 | ||
2374 | if (n == 1) { /* direct blocks */ | 2373 | if (n == 1) { /* direct blocks */ |
2375 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], | 2374 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], |
@@ -2433,7 +2432,7 @@ do_indirects: | |||
2433 | 2432 | ||
2434 | ext4_discard_reservation(inode); | 2433 | ext4_discard_reservation(inode); |
2435 | 2434 | ||
2436 | mutex_unlock(&ei->truncate_mutex); | 2435 | up_write(&ei->i_data_sem); |
2437 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 2436 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
2438 | ext4_mark_inode_dirty(handle, inode); | 2437 | ext4_mark_inode_dirty(handle, inode); |
2439 | 2438 | ||
@@ -2460,7 +2459,8 @@ out_stop: | |||
2460 | static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, | 2459 | static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, |
2461 | unsigned long ino, struct ext4_iloc *iloc) | 2460 | unsigned long ino, struct ext4_iloc *iloc) |
2462 | { | 2461 | { |
2463 | unsigned long desc, group_desc, block_group; | 2462 | unsigned long desc, group_desc; |
2463 | ext4_group_t block_group; | ||
2464 | unsigned long offset; | 2464 | unsigned long offset; |
2465 | ext4_fsblk_t block; | 2465 | ext4_fsblk_t block; |
2466 | struct buffer_head *bh; | 2466 | struct buffer_head *bh; |
@@ -2547,7 +2547,7 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
2547 | struct ext4_group_desc *desc; | 2547 | struct ext4_group_desc *desc; |
2548 | int inodes_per_buffer; | 2548 | int inodes_per_buffer; |
2549 | int inode_offset, i; | 2549 | int inode_offset, i; |
2550 | int block_group; | 2550 | ext4_group_t block_group; |
2551 | int start; | 2551 | int start; |
2552 | 2552 | ||
2553 | block_group = (inode->i_ino - 1) / | 2553 | block_group = (inode->i_ino - 1) / |
@@ -2660,6 +2660,28 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei) | |||
2660 | if (flags & S_DIRSYNC) | 2660 | if (flags & S_DIRSYNC) |
2661 | ei->i_flags |= EXT4_DIRSYNC_FL; | 2661 | ei->i_flags |= EXT4_DIRSYNC_FL; |
2662 | } | 2662 | } |
2663 | static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, | ||
2664 | struct ext4_inode_info *ei) | ||
2665 | { | ||
2666 | blkcnt_t i_blocks ; | ||
2667 | struct inode *inode = &(ei->vfs_inode); | ||
2668 | struct super_block *sb = inode->i_sb; | ||
2669 | |||
2670 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
2671 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { | ||
2672 | /* we are using combined 48 bit field */ | ||
2673 | i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | | ||
2674 | le32_to_cpu(raw_inode->i_blocks_lo); | ||
2675 | if (ei->i_flags & EXT4_HUGE_FILE_FL) { | ||
2676 | /* i_blocks represent file system block size */ | ||
2677 | return i_blocks << (inode->i_blkbits - 9); | ||
2678 | } else { | ||
2679 | return i_blocks; | ||
2680 | } | ||
2681 | } else { | ||
2682 | return le32_to_cpu(raw_inode->i_blocks_lo); | ||
2683 | } | ||
2684 | } | ||
2663 | 2685 | ||
2664 | void ext4_read_inode(struct inode * inode) | 2686 | void ext4_read_inode(struct inode * inode) |
2665 | { | 2687 | { |
@@ -2687,7 +2709,6 @@ void ext4_read_inode(struct inode * inode) | |||
2687 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; | 2709 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; |
2688 | } | 2710 | } |
2689 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); | 2711 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); |
2690 | inode->i_size = le32_to_cpu(raw_inode->i_size); | ||
2691 | 2712 | ||
2692 | ei->i_state = 0; | 2713 | ei->i_state = 0; |
2693 | ei->i_dir_start_lookup = 0; | 2714 | ei->i_dir_start_lookup = 0; |
@@ -2709,19 +2730,15 @@ void ext4_read_inode(struct inode * inode) | |||
2709 | * recovery code: that's fine, we're about to complete | 2730 | * recovery code: that's fine, we're about to complete |
2710 | * the process of deleting those. */ | 2731 | * the process of deleting those. */ |
2711 | } | 2732 | } |
2712 | inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); | ||
2713 | ei->i_flags = le32_to_cpu(raw_inode->i_flags); | 2733 | ei->i_flags = le32_to_cpu(raw_inode->i_flags); |
2714 | ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); | 2734 | inode->i_blocks = ext4_inode_blocks(raw_inode, ei); |
2735 | ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); | ||
2715 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 2736 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != |
2716 | cpu_to_le32(EXT4_OS_HURD)) | 2737 | cpu_to_le32(EXT4_OS_HURD)) { |
2717 | ei->i_file_acl |= | 2738 | ei->i_file_acl |= |
2718 | ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; | 2739 | ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; |
2719 | if (!S_ISREG(inode->i_mode)) { | ||
2720 | ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); | ||
2721 | } else { | ||
2722 | inode->i_size |= | ||
2723 | ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; | ||
2724 | } | 2740 | } |
2741 | inode->i_size = ext4_isize(raw_inode); | ||
2725 | ei->i_disksize = inode->i_size; | 2742 | ei->i_disksize = inode->i_size; |
2726 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); | 2743 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); |
2727 | ei->i_block_group = iloc.block_group; | 2744 | ei->i_block_group = iloc.block_group; |
@@ -2765,6 +2782,13 @@ void ext4_read_inode(struct inode * inode) | |||
2765 | EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); | 2782 | EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); |
2766 | EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); | 2783 | EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); |
2767 | 2784 | ||
2785 | inode->i_version = le32_to_cpu(raw_inode->i_disk_version); | ||
2786 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { | ||
2787 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) | ||
2788 | inode->i_version |= | ||
2789 | (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; | ||
2790 | } | ||
2791 | |||
2768 | if (S_ISREG(inode->i_mode)) { | 2792 | if (S_ISREG(inode->i_mode)) { |
2769 | inode->i_op = &ext4_file_inode_operations; | 2793 | inode->i_op = &ext4_file_inode_operations; |
2770 | inode->i_fop = &ext4_file_operations; | 2794 | inode->i_fop = &ext4_file_operations; |
@@ -2797,6 +2821,55 @@ bad_inode: | |||
2797 | return; | 2821 | return; |
2798 | } | 2822 | } |
2799 | 2823 | ||
2824 | static int ext4_inode_blocks_set(handle_t *handle, | ||
2825 | struct ext4_inode *raw_inode, | ||
2826 | struct ext4_inode_info *ei) | ||
2827 | { | ||
2828 | struct inode *inode = &(ei->vfs_inode); | ||
2829 | u64 i_blocks = inode->i_blocks; | ||
2830 | struct super_block *sb = inode->i_sb; | ||
2831 | int err = 0; | ||
2832 | |||
2833 | if (i_blocks <= ~0U) { | ||
2834 | /* | ||
2835 | * i_blocks can be represnted in a 32 bit variable | ||
2836 | * as multiple of 512 bytes | ||
2837 | */ | ||
2838 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | ||
2839 | raw_inode->i_blocks_high = 0; | ||
2840 | ei->i_flags &= ~EXT4_HUGE_FILE_FL; | ||
2841 | } else if (i_blocks <= 0xffffffffffffULL) { | ||
2842 | /* | ||
2843 | * i_blocks can be represented in a 48 bit variable | ||
2844 | * as multiple of 512 bytes | ||
2845 | */ | ||
2846 | err = ext4_update_rocompat_feature(handle, sb, | ||
2847 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
2848 | if (err) | ||
2849 | goto err_out; | ||
2850 | /* i_block is stored in the split 48 bit fields */ | ||
2851 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | ||
2852 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); | ||
2853 | ei->i_flags &= ~EXT4_HUGE_FILE_FL; | ||
2854 | } else { | ||
2855 | /* | ||
2856 | * i_blocks should be represented in a 48 bit variable | ||
2857 | * as multiple of file system block size | ||
2858 | */ | ||
2859 | err = ext4_update_rocompat_feature(handle, sb, | ||
2860 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
2861 | if (err) | ||
2862 | goto err_out; | ||
2863 | ei->i_flags |= EXT4_HUGE_FILE_FL; | ||
2864 | /* i_block is stored in file system block size */ | ||
2865 | i_blocks = i_blocks >> (inode->i_blkbits - 9); | ||
2866 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | ||
2867 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); | ||
2868 | } | ||
2869 | err_out: | ||
2870 | return err; | ||
2871 | } | ||
2872 | |||
2800 | /* | 2873 | /* |
2801 | * Post the struct inode info into an on-disk inode location in the | 2874 | * Post the struct inode info into an on-disk inode location in the |
2802 | * buffer-cache. This gobbles the caller's reference to the | 2875 | * buffer-cache. This gobbles the caller's reference to the |
@@ -2845,47 +2918,42 @@ static int ext4_do_update_inode(handle_t *handle, | |||
2845 | raw_inode->i_gid_high = 0; | 2918 | raw_inode->i_gid_high = 0; |
2846 | } | 2919 | } |
2847 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); | 2920 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); |
2848 | raw_inode->i_size = cpu_to_le32(ei->i_disksize); | ||
2849 | 2921 | ||
2850 | EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); | 2922 | EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); |
2851 | EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); | 2923 | EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); |
2852 | EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); | 2924 | EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); |
2853 | EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); | 2925 | EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); |
2854 | 2926 | ||
2855 | raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); | 2927 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) |
2928 | goto out_brelse; | ||
2856 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); | 2929 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); |
2857 | raw_inode->i_flags = cpu_to_le32(ei->i_flags); | 2930 | raw_inode->i_flags = cpu_to_le32(ei->i_flags); |
2858 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 2931 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != |
2859 | cpu_to_le32(EXT4_OS_HURD)) | 2932 | cpu_to_le32(EXT4_OS_HURD)) |
2860 | raw_inode->i_file_acl_high = | 2933 | raw_inode->i_file_acl_high = |
2861 | cpu_to_le16(ei->i_file_acl >> 32); | 2934 | cpu_to_le16(ei->i_file_acl >> 32); |
2862 | raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); | 2935 | raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); |
2863 | if (!S_ISREG(inode->i_mode)) { | 2936 | ext4_isize_set(raw_inode, ei->i_disksize); |
2864 | raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); | 2937 | if (ei->i_disksize > 0x7fffffffULL) { |
2865 | } else { | 2938 | struct super_block *sb = inode->i_sb; |
2866 | raw_inode->i_size_high = | 2939 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, |
2867 | cpu_to_le32(ei->i_disksize >> 32); | 2940 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE) || |
2868 | if (ei->i_disksize > 0x7fffffffULL) { | 2941 | EXT4_SB(sb)->s_es->s_rev_level == |
2869 | struct super_block *sb = inode->i_sb; | 2942 | cpu_to_le32(EXT4_GOOD_OLD_REV)) { |
2870 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 2943 | /* If this is the first large file |
2871 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE) || | 2944 | * created, add a flag to the superblock. |
2872 | EXT4_SB(sb)->s_es->s_rev_level == | 2945 | */ |
2873 | cpu_to_le32(EXT4_GOOD_OLD_REV)) { | 2946 | err = ext4_journal_get_write_access(handle, |
2874 | /* If this is the first large file | 2947 | EXT4_SB(sb)->s_sbh); |
2875 | * created, add a flag to the superblock. | 2948 | if (err) |
2876 | */ | 2949 | goto out_brelse; |
2877 | err = ext4_journal_get_write_access(handle, | 2950 | ext4_update_dynamic_rev(sb); |
2878 | EXT4_SB(sb)->s_sbh); | 2951 | EXT4_SET_RO_COMPAT_FEATURE(sb, |
2879 | if (err) | ||
2880 | goto out_brelse; | ||
2881 | ext4_update_dynamic_rev(sb); | ||
2882 | EXT4_SET_RO_COMPAT_FEATURE(sb, | ||
2883 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); | 2952 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); |
2884 | sb->s_dirt = 1; | 2953 | sb->s_dirt = 1; |
2885 | handle->h_sync = 1; | 2954 | handle->h_sync = 1; |
2886 | err = ext4_journal_dirty_metadata(handle, | 2955 | err = ext4_journal_dirty_metadata(handle, |
2887 | EXT4_SB(sb)->s_sbh); | 2956 | EXT4_SB(sb)->s_sbh); |
2888 | } | ||
2889 | } | 2957 | } |
2890 | } | 2958 | } |
2891 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); | 2959 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); |
@@ -2903,8 +2971,14 @@ static int ext4_do_update_inode(handle_t *handle, | |||
2903 | } else for (block = 0; block < EXT4_N_BLOCKS; block++) | 2971 | } else for (block = 0; block < EXT4_N_BLOCKS; block++) |
2904 | raw_inode->i_block[block] = ei->i_data[block]; | 2972 | raw_inode->i_block[block] = ei->i_data[block]; |
2905 | 2973 | ||
2906 | if (ei->i_extra_isize) | 2974 | raw_inode->i_disk_version = cpu_to_le32(inode->i_version); |
2975 | if (ei->i_extra_isize) { | ||
2976 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) | ||
2977 | raw_inode->i_version_hi = | ||
2978 | cpu_to_le32(inode->i_version >> 32); | ||
2907 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); | 2979 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); |
2980 | } | ||
2981 | |||
2908 | 2982 | ||
2909 | BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); | 2983 | BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); |
2910 | rc = ext4_journal_dirty_metadata(handle, bh); | 2984 | rc = ext4_journal_dirty_metadata(handle, bh); |
@@ -3024,6 +3098,17 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
3024 | ext4_journal_stop(handle); | 3098 | ext4_journal_stop(handle); |
3025 | } | 3099 | } |
3026 | 3100 | ||
3101 | if (attr->ia_valid & ATTR_SIZE) { | ||
3102 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) { | ||
3103 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
3104 | |||
3105 | if (attr->ia_size > sbi->s_bitmap_maxbytes) { | ||
3106 | error = -EFBIG; | ||
3107 | goto err_out; | ||
3108 | } | ||
3109 | } | ||
3110 | } | ||
3111 | |||
3027 | if (S_ISREG(inode->i_mode) && | 3112 | if (S_ISREG(inode->i_mode) && |
3028 | attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { | 3113 | attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { |
3029 | handle_t *handle; | 3114 | handle_t *handle; |
@@ -3120,6 +3205,9 @@ int ext4_mark_iloc_dirty(handle_t *handle, | |||
3120 | { | 3205 | { |
3121 | int err = 0; | 3206 | int err = 0; |
3122 | 3207 | ||
3208 | if (test_opt(inode->i_sb, I_VERSION)) | ||
3209 | inode_inc_iversion(inode); | ||
3210 | |||
3123 | /* the do_update_inode consumes one bh->b_count */ | 3211 | /* the do_update_inode consumes one bh->b_count */ |
3124 | get_bh(iloc->bh); | 3212 | get_bh(iloc->bh); |
3125 | 3213 | ||
@@ -3158,8 +3246,10 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode, | |||
3158 | * Expand an inode by new_extra_isize bytes. | 3246 | * Expand an inode by new_extra_isize bytes. |
3159 | * Returns 0 on success or negative error number on failure. | 3247 | * Returns 0 on success or negative error number on failure. |
3160 | */ | 3248 | */ |
3161 | int ext4_expand_extra_isize(struct inode *inode, unsigned int new_extra_isize, | 3249 | static int ext4_expand_extra_isize(struct inode *inode, |
3162 | struct ext4_iloc iloc, handle_t *handle) | 3250 | unsigned int new_extra_isize, |
3251 | struct ext4_iloc iloc, | ||
3252 | handle_t *handle) | ||
3163 | { | 3253 | { |
3164 | struct ext4_inode *raw_inode; | 3254 | struct ext4_inode *raw_inode; |
3165 | struct ext4_xattr_ibody_header *header; | 3255 | struct ext4_xattr_ibody_header *header; |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index e7f894bdb420..2ed7c37f897e 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -199,7 +199,7 @@ flags_err: | |||
199 | * need to allocate reservation structure for this inode | 199 | * need to allocate reservation structure for this inode |
200 | * before set the window size | 200 | * before set the window size |
201 | */ | 201 | */ |
202 | mutex_lock(&ei->truncate_mutex); | 202 | down_write(&ei->i_data_sem); |
203 | if (!ei->i_block_alloc_info) | 203 | if (!ei->i_block_alloc_info) |
204 | ext4_init_block_alloc_info(inode); | 204 | ext4_init_block_alloc_info(inode); |
205 | 205 | ||
@@ -207,7 +207,7 @@ flags_err: | |||
207 | struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; | 207 | struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; |
208 | rsv->rsv_goal_size = rsv_window_size; | 208 | rsv->rsv_goal_size = rsv_window_size; |
209 | } | 209 | } |
210 | mutex_unlock(&ei->truncate_mutex); | 210 | up_write(&ei->i_data_sem); |
211 | return 0; | 211 | return 0; |
212 | } | 212 | } |
213 | case EXT4_IOC_GROUP_EXTEND: { | 213 | case EXT4_IOC_GROUP_EXTEND: { |
@@ -254,6 +254,9 @@ flags_err: | |||
254 | return err; | 254 | return err; |
255 | } | 255 | } |
256 | 256 | ||
257 | case EXT4_IOC_MIGRATE: | ||
258 | return ext4_ext_migrate(inode, filp, cmd, arg); | ||
259 | |||
257 | default: | 260 | default: |
258 | return -ENOTTY; | 261 | return -ENOTTY; |
259 | } | 262 | } |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c new file mode 100644 index 000000000000..76e5fedc0a0b --- /dev/null +++ b/fs/ext4/mballoc.c | |||
@@ -0,0 +1,4552 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com | ||
3 | * Written by Alex Tomas <alex@clusterfs.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public Licens | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- | ||
17 | */ | ||
18 | |||
19 | |||
20 | /* | ||
21 | * mballoc.c contains the multiblocks allocation routines | ||
22 | */ | ||
23 | |||
24 | #include <linux/time.h> | ||
25 | #include <linux/fs.h> | ||
26 | #include <linux/namei.h> | ||
27 | #include <linux/ext4_jbd2.h> | ||
28 | #include <linux/ext4_fs.h> | ||
29 | #include <linux/quotaops.h> | ||
30 | #include <linux/buffer_head.h> | ||
31 | #include <linux/module.h> | ||
32 | #include <linux/swap.h> | ||
33 | #include <linux/proc_fs.h> | ||
34 | #include <linux/pagemap.h> | ||
35 | #include <linux/seq_file.h> | ||
36 | #include <linux/version.h> | ||
37 | #include "group.h" | ||
38 | |||
39 | /* | ||
40 | * MUSTDO: | ||
41 | * - test ext4_ext_search_left() and ext4_ext_search_right() | ||
42 | * - search for metadata in few groups | ||
43 | * | ||
44 | * TODO v4: | ||
45 | * - normalization should take into account whether file is still open | ||
46 | * - discard preallocations if no free space left (policy?) | ||
47 | * - don't normalize tails | ||
48 | * - quota | ||
49 | * - reservation for superuser | ||
50 | * | ||
51 | * TODO v3: | ||
52 | * - bitmap read-ahead (proposed by Oleg Drokin aka green) | ||
53 | * - track min/max extents in each group for better group selection | ||
54 | * - mb_mark_used() may allocate chunk right after splitting buddy | ||
55 | * - tree of groups sorted by number of free blocks | ||
56 | * - error handling | ||
57 | */ | ||
58 | |||
59 | /* | ||
60 | * The allocation request involve request for multiple number of blocks | ||
61 | * near to the goal(block) value specified. | ||
62 | * | ||
63 | * During initialization phase of the allocator we decide to use the group | ||
64 | * preallocation or inode preallocation depending on the size file. The | ||
65 | * size of the file could be the resulting file size we would have after | ||
66 | * allocation or the current file size which ever is larger. If the size is | ||
67 | * less that sbi->s_mb_stream_request we select the group | ||
68 | * preallocation. The default value of s_mb_stream_request is 16 | ||
69 | * blocks. This can also be tuned via | ||
70 | * /proc/fs/ext4/<partition>/stream_req. The value is represented in terms | ||
71 | * of number of blocks. | ||
72 | * | ||
73 | * The main motivation for having small file use group preallocation is to | ||
74 | * ensure that we have small file closer in the disk. | ||
75 | * | ||
76 | * First stage the allocator looks at the inode prealloc list | ||
77 | * ext4_inode_info->i_prealloc_list contain list of prealloc spaces for | ||
78 | * this particular inode. The inode prealloc space is represented as: | ||
79 | * | ||
80 | * pa_lstart -> the logical start block for this prealloc space | ||
81 | * pa_pstart -> the physical start block for this prealloc space | ||
82 | * pa_len -> lenght for this prealloc space | ||
83 | * pa_free -> free space available in this prealloc space | ||
84 | * | ||
85 | * The inode preallocation space is used looking at the _logical_ start | ||
86 | * block. If only the logical file block falls within the range of prealloc | ||
87 | * space we will consume the particular prealloc space. This make sure that | ||
88 | * that the we have contiguous physical blocks representing the file blocks | ||
89 | * | ||
90 | * The important thing to be noted in case of inode prealloc space is that | ||
91 | * we don't modify the values associated to inode prealloc space except | ||
92 | * pa_free. | ||
93 | * | ||
94 | * If we are not able to find blocks in the inode prealloc space and if we | ||
95 | * have the group allocation flag set then we look at the locality group | ||
96 | * prealloc space. These are per CPU prealloc list repreasented as | ||
97 | * | ||
98 | * ext4_sb_info.s_locality_groups[smp_processor_id()] | ||
99 | * | ||
100 | * The reason for having a per cpu locality group is to reduce the contention | ||
101 | * between CPUs. It is possible to get scheduled at this point. | ||
102 | * | ||
103 | * The locality group prealloc space is used looking at whether we have | ||
104 | * enough free space (pa_free) withing the prealloc space. | ||
105 | * | ||
106 | * If we can't allocate blocks via inode prealloc or/and locality group | ||
107 | * prealloc then we look at the buddy cache. The buddy cache is represented | ||
108 | * by ext4_sb_info.s_buddy_cache (struct inode) whose file offset gets | ||
109 | * mapped to the buddy and bitmap information regarding different | ||
110 | * groups. The buddy information is attached to buddy cache inode so that | ||
111 | * we can access them through the page cache. The information regarding | ||
112 | * each group is loaded via ext4_mb_load_buddy. The information involve | ||
113 | * block bitmap and buddy information. The information are stored in the | ||
114 | * inode as: | ||
115 | * | ||
116 | * { page } | ||
117 | * [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]... | ||
118 | * | ||
119 | * | ||
120 | * one block each for bitmap and buddy information. So for each group we | ||
121 | * take up 2 blocks. A page can contain blocks_per_page (PAGE_CACHE_SIZE / | ||
122 | * blocksize) blocks. So it can have information regarding groups_per_page | ||
123 | * which is blocks_per_page/2 | ||
124 | * | ||
125 | * The buddy cache inode is not stored on disk. The inode is thrown | ||
126 | * away when the filesystem is unmounted. | ||
127 | * | ||
128 | * We look for count number of blocks in the buddy cache. If we were able | ||
129 | * to locate that many free blocks we return with additional information | ||
130 | * regarding rest of the contiguous physical block available | ||
131 | * | ||
132 | * Before allocating blocks via buddy cache we normalize the request | ||
133 | * blocks. This ensure we ask for more blocks that we needed. The extra | ||
134 | * blocks that we get after allocation is added to the respective prealloc | ||
135 | * list. In case of inode preallocation we follow a list of heuristics | ||
136 | * based on file size. This can be found in ext4_mb_normalize_request. If | ||
137 | * we are doing a group prealloc we try to normalize the request to | ||
138 | * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is set to | ||
139 | * 512 blocks. This can be tuned via | ||
140 | * /proc/fs/ext4/<partition/group_prealloc. The value is represented in | ||
141 | * terms of number of blocks. If we have mounted the file system with -O | ||
142 | * stripe=<value> option the group prealloc request is normalized to the | ||
143 | * stripe value (sbi->s_stripe) | ||
144 | * | ||
145 | * The regular allocator(using the buddy cache) support few tunables. | ||
146 | * | ||
147 | * /proc/fs/ext4/<partition>/min_to_scan | ||
148 | * /proc/fs/ext4/<partition>/max_to_scan | ||
149 | * /proc/fs/ext4/<partition>/order2_req | ||
150 | * | ||
151 | * The regular allocator use buddy scan only if the request len is power of | ||
152 | * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The | ||
153 | * value of s_mb_order2_reqs can be tuned via | ||
154 | * /proc/fs/ext4/<partition>/order2_req. If the request len is equal to | ||
155 | * stripe size (sbi->s_stripe), we try to search for contigous block in | ||
156 | * stripe size. This should result in better allocation on RAID setup. If | ||
157 | * not we search in the specific group using bitmap for best extents. The | ||
158 | * tunable min_to_scan and max_to_scan controll the behaviour here. | ||
159 | * min_to_scan indicate how long the mballoc __must__ look for a best | ||
160 | * extent and max_to_scanindicate how long the mballoc __can__ look for a | ||
161 | * best extent in the found extents. Searching for the blocks starts with | ||
162 | * the group specified as the goal value in allocation context via | ||
163 | * ac_g_ex. Each group is first checked based on the criteria whether it | ||
164 | * can used for allocation. ext4_mb_good_group explains how the groups are | ||
165 | * checked. | ||
166 | * | ||
167 | * Both the prealloc space are getting populated as above. So for the first | ||
168 | * request we will hit the buddy cache which will result in this prealloc | ||
169 | * space getting filled. The prealloc space is then later used for the | ||
170 | * subsequent request. | ||
171 | */ | ||
172 | |||
173 | /* | ||
174 | * mballoc operates on the following data: | ||
175 | * - on-disk bitmap | ||
176 | * - in-core buddy (actually includes buddy and bitmap) | ||
177 | * - preallocation descriptors (PAs) | ||
178 | * | ||
179 | * there are two types of preallocations: | ||
180 | * - inode | ||
181 | * assiged to specific inode and can be used for this inode only. | ||
182 | * it describes part of inode's space preallocated to specific | ||
183 | * physical blocks. any block from that preallocated can be used | ||
184 | * independent. the descriptor just tracks number of blocks left | ||
185 | * unused. so, before taking some block from descriptor, one must | ||
186 | * make sure corresponded logical block isn't allocated yet. this | ||
187 | * also means that freeing any block within descriptor's range | ||
188 | * must discard all preallocated blocks. | ||
189 | * - locality group | ||
190 | * assigned to specific locality group which does not translate to | ||
191 | * permanent set of inodes: inode can join and leave group. space | ||
192 | * from this type of preallocation can be used for any inode. thus | ||
193 | * it's consumed from the beginning to the end. | ||
194 | * | ||
195 | * relation between them can be expressed as: | ||
196 | * in-core buddy = on-disk bitmap + preallocation descriptors | ||
197 | * | ||
198 | * this mean blocks mballoc considers used are: | ||
199 | * - allocated blocks (persistent) | ||
200 | * - preallocated blocks (non-persistent) | ||
201 | * | ||
202 | * consistency in mballoc world means that at any time a block is either | ||
203 | * free or used in ALL structures. notice: "any time" should not be read | ||
204 | * literally -- time is discrete and delimited by locks. | ||
205 | * | ||
206 | * to keep it simple, we don't use block numbers, instead we count number of | ||
207 | * blocks: how many blocks marked used/free in on-disk bitmap, buddy and PA. | ||
208 | * | ||
209 | * all operations can be expressed as: | ||
210 | * - init buddy: buddy = on-disk + PAs | ||
211 | * - new PA: buddy += N; PA = N | ||
212 | * - use inode PA: on-disk += N; PA -= N | ||
213 | * - discard inode PA buddy -= on-disk - PA; PA = 0 | ||
214 | * - use locality group PA on-disk += N; PA -= N | ||
215 | * - discard locality group PA buddy -= PA; PA = 0 | ||
216 | * note: 'buddy -= on-disk - PA' is used to show that on-disk bitmap | ||
217 | * is used in real operation because we can't know actual used | ||
218 | * bits from PA, only from on-disk bitmap | ||
219 | * | ||
220 | * if we follow this strict logic, then all operations above should be atomic. | ||
221 | * given some of them can block, we'd have to use something like semaphores | ||
222 | * killing performance on high-end SMP hardware. let's try to relax it using | ||
223 | * the following knowledge: | ||
224 | * 1) if buddy is referenced, it's already initialized | ||
225 | * 2) while block is used in buddy and the buddy is referenced, | ||
226 | * nobody can re-allocate that block | ||
227 | * 3) we work on bitmaps and '+' actually means 'set bits'. if on-disk has | ||
228 | * bit set and PA claims same block, it's OK. IOW, one can set bit in | ||
229 | * on-disk bitmap if buddy has same bit set or/and PA covers corresponded | ||
230 | * block | ||
231 | * | ||
232 | * so, now we're building a concurrency table: | ||
233 | * - init buddy vs. | ||
234 | * - new PA | ||
235 | * blocks for PA are allocated in the buddy, buddy must be referenced | ||
236 | * until PA is linked to allocation group to avoid concurrent buddy init | ||
237 | * - use inode PA | ||
238 | * we need to make sure that either on-disk bitmap or PA has uptodate data | ||
239 | * given (3) we care that PA-=N operation doesn't interfere with init | ||
240 | * - discard inode PA | ||
241 | * the simplest way would be to have buddy initialized by the discard | ||
242 | * - use locality group PA | ||
243 | * again PA-=N must be serialized with init | ||
244 | * - discard locality group PA | ||
245 | * the simplest way would be to have buddy initialized by the discard | ||
246 | * - new PA vs. | ||
247 | * - use inode PA | ||
248 | * i_data_sem serializes them | ||
249 | * - discard inode PA | ||
250 | * discard process must wait until PA isn't used by another process | ||
251 | * - use locality group PA | ||
252 | * some mutex should serialize them | ||
253 | * - discard locality group PA | ||
254 | * discard process must wait until PA isn't used by another process | ||
255 | * - use inode PA | ||
256 | * - use inode PA | ||
257 | * i_data_sem or another mutex should serializes them | ||
258 | * - discard inode PA | ||
259 | * discard process must wait until PA isn't used by another process | ||
260 | * - use locality group PA | ||
261 | * nothing wrong here -- they're different PAs covering different blocks | ||
262 | * - discard locality group PA | ||
263 | * discard process must wait until PA isn't used by another process | ||
264 | * | ||
265 | * now we're ready to make few consequences: | ||
266 | * - PA is referenced and while it is no discard is possible | ||
267 | * - PA is referenced until block isn't marked in on-disk bitmap | ||
268 | * - PA changes only after on-disk bitmap | ||
269 | * - discard must not compete with init. either init is done before | ||
270 | * any discard or they're serialized somehow | ||
271 | * - buddy init as sum of on-disk bitmap and PAs is done atomically | ||
272 | * | ||
273 | * a special case when we've used PA to emptiness. no need to modify buddy | ||
274 | * in this case, but we should care about concurrent init | ||
275 | * | ||
276 | */ | ||
277 | |||
278 | /* | ||
279 | * Logic in few words: | ||
280 | * | ||
281 | * - allocation: | ||
282 | * load group | ||
283 | * find blocks | ||
284 | * mark bits in on-disk bitmap | ||
285 | * release group | ||
286 | * | ||
287 | * - use preallocation: | ||
288 | * find proper PA (per-inode or group) | ||
289 | * load group | ||
290 | * mark bits in on-disk bitmap | ||
291 | * release group | ||
292 | * release PA | ||
293 | * | ||
294 | * - free: | ||
295 | * load group | ||
296 | * mark bits in on-disk bitmap | ||
297 | * release group | ||
298 | * | ||
299 | * - discard preallocations in group: | ||
300 | * mark PAs deleted | ||
301 | * move them onto local list | ||
302 | * load on-disk bitmap | ||
303 | * load group | ||
304 | * remove PA from object (inode or locality group) | ||
305 | * mark free blocks in-core | ||
306 | * | ||
307 | * - discard inode's preallocations: | ||
308 | */ | ||
309 | |||
310 | /* | ||
311 | * Locking rules | ||
312 | * | ||
313 | * Locks: | ||
314 | * - bitlock on a group (group) | ||
315 | * - object (inode/locality) (object) | ||
316 | * - per-pa lock (pa) | ||
317 | * | ||
318 | * Paths: | ||
319 | * - new pa | ||
320 | * object | ||
321 | * group | ||
322 | * | ||
323 | * - find and use pa: | ||
324 | * pa | ||
325 | * | ||
326 | * - release consumed pa: | ||
327 | * pa | ||
328 | * group | ||
329 | * object | ||
330 | * | ||
331 | * - generate in-core bitmap: | ||
332 | * group | ||
333 | * pa | ||
334 | * | ||
335 | * - discard all for given object (inode, locality group): | ||
336 | * object | ||
337 | * pa | ||
338 | * group | ||
339 | * | ||
340 | * - discard all for given group: | ||
341 | * group | ||
342 | * pa | ||
343 | * group | ||
344 | * object | ||
345 | * | ||
346 | */ | ||
347 | |||
348 | /* | ||
349 | * with AGGRESSIVE_CHECK allocator runs consistency checks over | ||
350 | * structures. these checks slow things down a lot | ||
351 | */ | ||
352 | #define AGGRESSIVE_CHECK__ | ||
353 | |||
354 | /* | ||
355 | * with DOUBLE_CHECK defined mballoc creates persistent in-core | ||
356 | * bitmaps, maintains and uses them to check for double allocations | ||
357 | */ | ||
358 | #define DOUBLE_CHECK__ | ||
359 | |||
360 | /* | ||
361 | */ | ||
362 | #define MB_DEBUG__ | ||
363 | #ifdef MB_DEBUG | ||
364 | #define mb_debug(fmt, a...) printk(fmt, ##a) | ||
365 | #else | ||
366 | #define mb_debug(fmt, a...) | ||
367 | #endif | ||
368 | |||
369 | /* | ||
370 | * with EXT4_MB_HISTORY mballoc stores last N allocations in memory | ||
371 | * and you can monitor it in /proc/fs/ext4/<dev>/mb_history | ||
372 | */ | ||
373 | #define EXT4_MB_HISTORY | ||
374 | #define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ | ||
375 | #define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */ | ||
376 | #define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */ | ||
377 | #define EXT4_MB_HISTORY_FREE 8 /* free */ | ||
378 | |||
379 | #define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \ | ||
380 | EXT4_MB_HISTORY_PREALLOC) | ||
381 | |||
382 | /* | ||
383 | * How long mballoc can look for a best extent (in found extents) | ||
384 | */ | ||
385 | #define MB_DEFAULT_MAX_TO_SCAN 200 | ||
386 | |||
387 | /* | ||
388 | * How long mballoc must look for a best extent | ||
389 | */ | ||
390 | #define MB_DEFAULT_MIN_TO_SCAN 10 | ||
391 | |||
392 | /* | ||
393 | * How many groups mballoc will scan looking for the best chunk | ||
394 | */ | ||
395 | #define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5 | ||
396 | |||
397 | /* | ||
398 | * with 'ext4_mb_stats' allocator will collect stats that will be | ||
399 | * shown at umount. The collecting costs though! | ||
400 | */ | ||
401 | #define MB_DEFAULT_STATS 1 | ||
402 | |||
403 | /* | ||
404 | * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served | ||
405 | * by the stream allocator, which purpose is to pack requests | ||
406 | * as close each to other as possible to produce smooth I/O traffic | ||
407 | * We use locality group prealloc space for stream request. | ||
408 | * We can tune the same via /proc/fs/ext4/<parition>/stream_req | ||
409 | */ | ||
410 | #define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */ | ||
411 | |||
412 | /* | ||
413 | * for which requests use 2^N search using buddies | ||
414 | */ | ||
415 | #define MB_DEFAULT_ORDER2_REQS 2 | ||
416 | |||
417 | /* | ||
418 | * default group prealloc size 512 blocks | ||
419 | */ | ||
420 | #define MB_DEFAULT_GROUP_PREALLOC 512 | ||
421 | |||
422 | static struct kmem_cache *ext4_pspace_cachep; | ||
423 | |||
424 | #ifdef EXT4_BB_MAX_BLOCKS | ||
425 | #undef EXT4_BB_MAX_BLOCKS | ||
426 | #endif | ||
427 | #define EXT4_BB_MAX_BLOCKS 30 | ||
428 | |||
429 | struct ext4_free_metadata { | ||
430 | ext4_group_t group; | ||
431 | unsigned short num; | ||
432 | ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS]; | ||
433 | struct list_head list; | ||
434 | }; | ||
435 | |||
436 | struct ext4_group_info { | ||
437 | unsigned long bb_state; | ||
438 | unsigned long bb_tid; | ||
439 | struct ext4_free_metadata *bb_md_cur; | ||
440 | unsigned short bb_first_free; | ||
441 | unsigned short bb_free; | ||
442 | unsigned short bb_fragments; | ||
443 | struct list_head bb_prealloc_list; | ||
444 | #ifdef DOUBLE_CHECK | ||
445 | void *bb_bitmap; | ||
446 | #endif | ||
447 | unsigned short bb_counters[]; | ||
448 | }; | ||
449 | |||
450 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 | ||
451 | #define EXT4_GROUP_INFO_LOCKED_BIT 1 | ||
452 | |||
453 | #define EXT4_MB_GRP_NEED_INIT(grp) \ | ||
454 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) | ||
455 | |||
456 | |||
457 | struct ext4_prealloc_space { | ||
458 | struct list_head pa_inode_list; | ||
459 | struct list_head pa_group_list; | ||
460 | union { | ||
461 | struct list_head pa_tmp_list; | ||
462 | struct rcu_head pa_rcu; | ||
463 | } u; | ||
464 | spinlock_t pa_lock; | ||
465 | atomic_t pa_count; | ||
466 | unsigned pa_deleted; | ||
467 | ext4_fsblk_t pa_pstart; /* phys. block */ | ||
468 | ext4_lblk_t pa_lstart; /* log. block */ | ||
469 | unsigned short pa_len; /* len of preallocated chunk */ | ||
470 | unsigned short pa_free; /* how many blocks are free */ | ||
471 | unsigned short pa_linear; /* consumed in one direction | ||
472 | * strictly, for grp prealloc */ | ||
473 | spinlock_t *pa_obj_lock; | ||
474 | struct inode *pa_inode; /* hack, for history only */ | ||
475 | }; | ||
476 | |||
477 | |||
478 | struct ext4_free_extent { | ||
479 | ext4_lblk_t fe_logical; | ||
480 | ext4_grpblk_t fe_start; | ||
481 | ext4_group_t fe_group; | ||
482 | int fe_len; | ||
483 | }; | ||
484 | |||
485 | /* | ||
486 | * Locality group: | ||
487 | * we try to group all related changes together | ||
488 | * so that writeback can flush/allocate them together as well | ||
489 | */ | ||
490 | struct ext4_locality_group { | ||
491 | /* for allocator */ | ||
492 | struct mutex lg_mutex; /* to serialize allocates */ | ||
493 | struct list_head lg_prealloc_list;/* list of preallocations */ | ||
494 | spinlock_t lg_prealloc_lock; | ||
495 | }; | ||
496 | |||
497 | struct ext4_allocation_context { | ||
498 | struct inode *ac_inode; | ||
499 | struct super_block *ac_sb; | ||
500 | |||
501 | /* original request */ | ||
502 | struct ext4_free_extent ac_o_ex; | ||
503 | |||
504 | /* goal request (after normalization) */ | ||
505 | struct ext4_free_extent ac_g_ex; | ||
506 | |||
507 | /* the best found extent */ | ||
508 | struct ext4_free_extent ac_b_ex; | ||
509 | |||
510 | /* copy of the bext found extent taken before preallocation efforts */ | ||
511 | struct ext4_free_extent ac_f_ex; | ||
512 | |||
513 | /* number of iterations done. we have to track to limit searching */ | ||
514 | unsigned long ac_ex_scanned; | ||
515 | __u16 ac_groups_scanned; | ||
516 | __u16 ac_found; | ||
517 | __u16 ac_tail; | ||
518 | __u16 ac_buddy; | ||
519 | __u16 ac_flags; /* allocation hints */ | ||
520 | __u8 ac_status; | ||
521 | __u8 ac_criteria; | ||
522 | __u8 ac_repeats; | ||
523 | __u8 ac_2order; /* if request is to allocate 2^N blocks and | ||
524 | * N > 0, the field stores N, otherwise 0 */ | ||
525 | __u8 ac_op; /* operation, for history only */ | ||
526 | struct page *ac_bitmap_page; | ||
527 | struct page *ac_buddy_page; | ||
528 | struct ext4_prealloc_space *ac_pa; | ||
529 | struct ext4_locality_group *ac_lg; | ||
530 | }; | ||
531 | |||
532 | #define AC_STATUS_CONTINUE 1 | ||
533 | #define AC_STATUS_FOUND 2 | ||
534 | #define AC_STATUS_BREAK 3 | ||
535 | |||
536 | struct ext4_mb_history { | ||
537 | struct ext4_free_extent orig; /* orig allocation */ | ||
538 | struct ext4_free_extent goal; /* goal allocation */ | ||
539 | struct ext4_free_extent result; /* result allocation */ | ||
540 | unsigned pid; | ||
541 | unsigned ino; | ||
542 | __u16 found; /* how many extents have been found */ | ||
543 | __u16 groups; /* how many groups have been scanned */ | ||
544 | __u16 tail; /* what tail broke some buddy */ | ||
545 | __u16 buddy; /* buddy the tail ^^^ broke */ | ||
546 | __u16 flags; | ||
547 | __u8 cr:3; /* which phase the result extent was found at */ | ||
548 | __u8 op:4; | ||
549 | __u8 merged:1; | ||
550 | }; | ||
551 | |||
552 | struct ext4_buddy { | ||
553 | struct page *bd_buddy_page; | ||
554 | void *bd_buddy; | ||
555 | struct page *bd_bitmap_page; | ||
556 | void *bd_bitmap; | ||
557 | struct ext4_group_info *bd_info; | ||
558 | struct super_block *bd_sb; | ||
559 | __u16 bd_blkbits; | ||
560 | ext4_group_t bd_group; | ||
561 | }; | ||
562 | #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) | ||
563 | #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) | ||
564 | |||
565 | #ifndef EXT4_MB_HISTORY | ||
566 | static inline void ext4_mb_store_history(struct ext4_allocation_context *ac) | ||
567 | { | ||
568 | return; | ||
569 | } | ||
570 | #else | ||
571 | static void ext4_mb_store_history(struct ext4_allocation_context *ac); | ||
572 | #endif | ||
573 | |||
574 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | ||
575 | |||
576 | static struct proc_dir_entry *proc_root_ext4; | ||
577 | struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); | ||
578 | ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, | ||
579 | ext4_fsblk_t goal, unsigned long *count, int *errp); | ||
580 | |||
581 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | ||
582 | ext4_group_t group); | ||
583 | static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *); | ||
584 | static void ext4_mb_free_committed_blocks(struct super_block *); | ||
585 | static void ext4_mb_return_to_preallocation(struct inode *inode, | ||
586 | struct ext4_buddy *e4b, sector_t block, | ||
587 | int count); | ||
588 | static void ext4_mb_put_pa(struct ext4_allocation_context *, | ||
589 | struct super_block *, struct ext4_prealloc_space *pa); | ||
590 | static int ext4_mb_init_per_dev_proc(struct super_block *sb); | ||
591 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); | ||
592 | |||
593 | |||
594 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) | ||
595 | { | ||
596 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | ||
597 | |||
598 | bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); | ||
599 | } | ||
600 | |||
601 | static inline void ext4_unlock_group(struct super_block *sb, | ||
602 | ext4_group_t group) | ||
603 | { | ||
604 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | ||
605 | |||
606 | bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); | ||
607 | } | ||
608 | |||
609 | static inline int ext4_is_group_locked(struct super_block *sb, | ||
610 | ext4_group_t group) | ||
611 | { | ||
612 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | ||
613 | |||
614 | return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT, | ||
615 | &(grinfo->bb_state)); | ||
616 | } | ||
617 | |||
618 | static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, | ||
619 | struct ext4_free_extent *fex) | ||
620 | { | ||
621 | ext4_fsblk_t block; | ||
622 | |||
623 | block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb) | ||
624 | + fex->fe_start | ||
625 | + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | ||
626 | return block; | ||
627 | } | ||
628 | |||
629 | #if BITS_PER_LONG == 64 | ||
630 | #define mb_correct_addr_and_bit(bit, addr) \ | ||
631 | { \ | ||
632 | bit += ((unsigned long) addr & 7UL) << 3; \ | ||
633 | addr = (void *) ((unsigned long) addr & ~7UL); \ | ||
634 | } | ||
635 | #elif BITS_PER_LONG == 32 | ||
636 | #define mb_correct_addr_and_bit(bit, addr) \ | ||
637 | { \ | ||
638 | bit += ((unsigned long) addr & 3UL) << 3; \ | ||
639 | addr = (void *) ((unsigned long) addr & ~3UL); \ | ||
640 | } | ||
641 | #else | ||
642 | #error "how many bits you are?!" | ||
643 | #endif | ||
644 | |||
645 | static inline int mb_test_bit(int bit, void *addr) | ||
646 | { | ||
647 | /* | ||
648 | * ext4_test_bit on architecture like powerpc | ||
649 | * needs unsigned long aligned address | ||
650 | */ | ||
651 | mb_correct_addr_and_bit(bit, addr); | ||
652 | return ext4_test_bit(bit, addr); | ||
653 | } | ||
654 | |||
655 | static inline void mb_set_bit(int bit, void *addr) | ||
656 | { | ||
657 | mb_correct_addr_and_bit(bit, addr); | ||
658 | ext4_set_bit(bit, addr); | ||
659 | } | ||
660 | |||
661 | static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr) | ||
662 | { | ||
663 | mb_correct_addr_and_bit(bit, addr); | ||
664 | ext4_set_bit_atomic(lock, bit, addr); | ||
665 | } | ||
666 | |||
667 | static inline void mb_clear_bit(int bit, void *addr) | ||
668 | { | ||
669 | mb_correct_addr_and_bit(bit, addr); | ||
670 | ext4_clear_bit(bit, addr); | ||
671 | } | ||
672 | |||
673 | static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr) | ||
674 | { | ||
675 | mb_correct_addr_and_bit(bit, addr); | ||
676 | ext4_clear_bit_atomic(lock, bit, addr); | ||
677 | } | ||
678 | |||
679 | static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) | ||
680 | { | ||
681 | char *bb; | ||
682 | |||
683 | /* FIXME!! is this needed */ | ||
684 | BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); | ||
685 | BUG_ON(max == NULL); | ||
686 | |||
687 | if (order > e4b->bd_blkbits + 1) { | ||
688 | *max = 0; | ||
689 | return NULL; | ||
690 | } | ||
691 | |||
692 | /* at order 0 we see each particular block */ | ||
693 | *max = 1 << (e4b->bd_blkbits + 3); | ||
694 | if (order == 0) | ||
695 | return EXT4_MB_BITMAP(e4b); | ||
696 | |||
697 | bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; | ||
698 | *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; | ||
699 | |||
700 | return bb; | ||
701 | } | ||
702 | |||
703 | #ifdef DOUBLE_CHECK | ||
704 | static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, | ||
705 | int first, int count) | ||
706 | { | ||
707 | int i; | ||
708 | struct super_block *sb = e4b->bd_sb; | ||
709 | |||
710 | if (unlikely(e4b->bd_info->bb_bitmap == NULL)) | ||
711 | return; | ||
712 | BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group)); | ||
713 | for (i = 0; i < count; i++) { | ||
714 | if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { | ||
715 | ext4_fsblk_t blocknr; | ||
716 | blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb); | ||
717 | blocknr += first + i; | ||
718 | blocknr += | ||
719 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | ||
720 | |||
721 | ext4_error(sb, __FUNCTION__, "double-free of inode" | ||
722 | " %lu's block %llu(bit %u in group %lu)\n", | ||
723 | inode ? inode->i_ino : 0, blocknr, | ||
724 | first + i, e4b->bd_group); | ||
725 | } | ||
726 | mb_clear_bit(first + i, e4b->bd_info->bb_bitmap); | ||
727 | } | ||
728 | } | ||
729 | |||
730 | static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count) | ||
731 | { | ||
732 | int i; | ||
733 | |||
734 | if (unlikely(e4b->bd_info->bb_bitmap == NULL)) | ||
735 | return; | ||
736 | BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); | ||
737 | for (i = 0; i < count; i++) { | ||
738 | BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap)); | ||
739 | mb_set_bit(first + i, e4b->bd_info->bb_bitmap); | ||
740 | } | ||
741 | } | ||
742 | |||
743 | static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) | ||
744 | { | ||
745 | if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) { | ||
746 | unsigned char *b1, *b2; | ||
747 | int i; | ||
748 | b1 = (unsigned char *) e4b->bd_info->bb_bitmap; | ||
749 | b2 = (unsigned char *) bitmap; | ||
750 | for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { | ||
751 | if (b1[i] != b2[i]) { | ||
752 | printk("corruption in group %lu at byte %u(%u):" | ||
753 | " %x in copy != %x on disk/prealloc\n", | ||
754 | e4b->bd_group, i, i * 8, b1[i], b2[i]); | ||
755 | BUG(); | ||
756 | } | ||
757 | } | ||
758 | } | ||
759 | } | ||
760 | |||
761 | #else | ||
762 | static inline void mb_free_blocks_double(struct inode *inode, | ||
763 | struct ext4_buddy *e4b, int first, int count) | ||
764 | { | ||
765 | return; | ||
766 | } | ||
767 | static inline void mb_mark_used_double(struct ext4_buddy *e4b, | ||
768 | int first, int count) | ||
769 | { | ||
770 | return; | ||
771 | } | ||
772 | static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) | ||
773 | { | ||
774 | return; | ||
775 | } | ||
776 | #endif | ||
777 | |||
778 | #ifdef AGGRESSIVE_CHECK | ||
779 | |||
780 | #define MB_CHECK_ASSERT(assert) \ | ||
781 | do { \ | ||
782 | if (!(assert)) { \ | ||
783 | printk(KERN_EMERG \ | ||
784 | "Assertion failure in %s() at %s:%d: \"%s\"\n", \ | ||
785 | function, file, line, # assert); \ | ||
786 | BUG(); \ | ||
787 | } \ | ||
788 | } while (0) | ||
789 | |||
790 | static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | ||
791 | const char *function, int line) | ||
792 | { | ||
793 | struct super_block *sb = e4b->bd_sb; | ||
794 | int order = e4b->bd_blkbits + 1; | ||
795 | int max; | ||
796 | int max2; | ||
797 | int i; | ||
798 | int j; | ||
799 | int k; | ||
800 | int count; | ||
801 | struct ext4_group_info *grp; | ||
802 | int fragments = 0; | ||
803 | int fstart; | ||
804 | struct list_head *cur; | ||
805 | void *buddy; | ||
806 | void *buddy2; | ||
807 | |||
808 | if (!test_opt(sb, MBALLOC)) | ||
809 | return 0; | ||
810 | |||
811 | { | ||
812 | static int mb_check_counter; | ||
813 | if (mb_check_counter++ % 100 != 0) | ||
814 | return 0; | ||
815 | } | ||
816 | |||
817 | while (order > 1) { | ||
818 | buddy = mb_find_buddy(e4b, order, &max); | ||
819 | MB_CHECK_ASSERT(buddy); | ||
820 | buddy2 = mb_find_buddy(e4b, order - 1, &max2); | ||
821 | MB_CHECK_ASSERT(buddy2); | ||
822 | MB_CHECK_ASSERT(buddy != buddy2); | ||
823 | MB_CHECK_ASSERT(max * 2 == max2); | ||
824 | |||
825 | count = 0; | ||
826 | for (i = 0; i < max; i++) { | ||
827 | |||
828 | if (mb_test_bit(i, buddy)) { | ||
829 | /* only single bit in buddy2 may be 1 */ | ||
830 | if (!mb_test_bit(i << 1, buddy2)) { | ||
831 | MB_CHECK_ASSERT( | ||
832 | mb_test_bit((i<<1)+1, buddy2)); | ||
833 | } else if (!mb_test_bit((i << 1) + 1, buddy2)) { | ||
834 | MB_CHECK_ASSERT( | ||
835 | mb_test_bit(i << 1, buddy2)); | ||
836 | } | ||
837 | continue; | ||
838 | } | ||
839 | |||
840 | /* both bits in buddy2 must be 0 */ | ||
841 | MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2)); | ||
842 | MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2)); | ||
843 | |||
844 | for (j = 0; j < (1 << order); j++) { | ||
845 | k = (i * (1 << order)) + j; | ||
846 | MB_CHECK_ASSERT( | ||
847 | !mb_test_bit(k, EXT4_MB_BITMAP(e4b))); | ||
848 | } | ||
849 | count++; | ||
850 | } | ||
851 | MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count); | ||
852 | order--; | ||
853 | } | ||
854 | |||
855 | fstart = -1; | ||
856 | buddy = mb_find_buddy(e4b, 0, &max); | ||
857 | for (i = 0; i < max; i++) { | ||
858 | if (!mb_test_bit(i, buddy)) { | ||
859 | MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free); | ||
860 | if (fstart == -1) { | ||
861 | fragments++; | ||
862 | fstart = i; | ||
863 | } | ||
864 | continue; | ||
865 | } | ||
866 | fstart = -1; | ||
867 | /* check used bits only */ | ||
868 | for (j = 0; j < e4b->bd_blkbits + 1; j++) { | ||
869 | buddy2 = mb_find_buddy(e4b, j, &max2); | ||
870 | k = i >> j; | ||
871 | MB_CHECK_ASSERT(k < max2); | ||
872 | MB_CHECK_ASSERT(mb_test_bit(k, buddy2)); | ||
873 | } | ||
874 | } | ||
875 | MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info)); | ||
876 | MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); | ||
877 | |||
878 | grp = ext4_get_group_info(sb, e4b->bd_group); | ||
879 | buddy = mb_find_buddy(e4b, 0, &max); | ||
880 | list_for_each(cur, &grp->bb_prealloc_list) { | ||
881 | ext4_group_t groupnr; | ||
882 | struct ext4_prealloc_space *pa; | ||
883 | pa = list_entry(cur, struct ext4_prealloc_space, group_list); | ||
884 | ext4_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k); | ||
885 | MB_CHECK_ASSERT(groupnr == e4b->bd_group); | ||
886 | for (i = 0; i < pa->len; i++) | ||
887 | MB_CHECK_ASSERT(mb_test_bit(k + i, buddy)); | ||
888 | } | ||
889 | return 0; | ||
890 | } | ||
891 | #undef MB_CHECK_ASSERT | ||
892 | #define mb_check_buddy(e4b) __mb_check_buddy(e4b, \ | ||
893 | __FILE__, __FUNCTION__, __LINE__) | ||
894 | #else | ||
895 | #define mb_check_buddy(e4b) | ||
896 | #endif | ||
897 | |||
898 | /* FIXME!! need more doc */ | ||
899 | static void ext4_mb_mark_free_simple(struct super_block *sb, | ||
900 | void *buddy, unsigned first, int len, | ||
901 | struct ext4_group_info *grp) | ||
902 | { | ||
903 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
904 | unsigned short min; | ||
905 | unsigned short max; | ||
906 | unsigned short chunk; | ||
907 | unsigned short border; | ||
908 | |||
909 | BUG_ON(len >= EXT4_BLOCKS_PER_GROUP(sb)); | ||
910 | |||
911 | border = 2 << sb->s_blocksize_bits; | ||
912 | |||
913 | while (len > 0) { | ||
914 | /* find how many blocks can be covered since this position */ | ||
915 | max = ffs(first | border) - 1; | ||
916 | |||
917 | /* find how many blocks of power 2 we need to mark */ | ||
918 | min = fls(len) - 1; | ||
919 | |||
920 | if (max < min) | ||
921 | min = max; | ||
922 | chunk = 1 << min; | ||
923 | |||
924 | /* mark multiblock chunks only */ | ||
925 | grp->bb_counters[min]++; | ||
926 | if (min > 0) | ||
927 | mb_clear_bit(first >> min, | ||
928 | buddy + sbi->s_mb_offsets[min]); | ||
929 | |||
930 | len -= chunk; | ||
931 | first += chunk; | ||
932 | } | ||
933 | } | ||
934 | |||
935 | static void ext4_mb_generate_buddy(struct super_block *sb, | ||
936 | void *buddy, void *bitmap, ext4_group_t group) | ||
937 | { | ||
938 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
939 | unsigned short max = EXT4_BLOCKS_PER_GROUP(sb); | ||
940 | unsigned short i = 0; | ||
941 | unsigned short first; | ||
942 | unsigned short len; | ||
943 | unsigned free = 0; | ||
944 | unsigned fragments = 0; | ||
945 | unsigned long long period = get_cycles(); | ||
946 | |||
947 | /* initialize buddy from bitmap which is aggregation | ||
948 | * of on-disk bitmap and preallocations */ | ||
949 | i = ext4_find_next_zero_bit(bitmap, max, 0); | ||
950 | grp->bb_first_free = i; | ||
951 | while (i < max) { | ||
952 | fragments++; | ||
953 | first = i; | ||
954 | i = ext4_find_next_bit(bitmap, max, i); | ||
955 | len = i - first; | ||
956 | free += len; | ||
957 | if (len > 1) | ||
958 | ext4_mb_mark_free_simple(sb, buddy, first, len, grp); | ||
959 | else | ||
960 | grp->bb_counters[0]++; | ||
961 | if (i < max) | ||
962 | i = ext4_find_next_zero_bit(bitmap, max, i); | ||
963 | } | ||
964 | grp->bb_fragments = fragments; | ||
965 | |||
966 | if (free != grp->bb_free) { | ||
967 | printk(KERN_DEBUG | ||
968 | "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", | ||
969 | group, free, grp->bb_free); | ||
970 | grp->bb_free = free; | ||
971 | } | ||
972 | |||
973 | clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); | ||
974 | |||
975 | period = get_cycles() - period; | ||
976 | spin_lock(&EXT4_SB(sb)->s_bal_lock); | ||
977 | EXT4_SB(sb)->s_mb_buddies_generated++; | ||
978 | EXT4_SB(sb)->s_mb_generation_time += period; | ||
979 | spin_unlock(&EXT4_SB(sb)->s_bal_lock); | ||
980 | } | ||
981 | |||
982 | /* The buddy information is attached the buddy cache inode | ||
983 | * for convenience. The information regarding each group | ||
984 | * is loaded via ext4_mb_load_buddy. The information involve | ||
985 | * block bitmap and buddy information. The information are | ||
986 | * stored in the inode as | ||
987 | * | ||
988 | * { page } | ||
989 | * [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]... | ||
990 | * | ||
991 | * | ||
992 | * one block each for bitmap and buddy information. | ||
993 | * So for each group we take up 2 blocks. A page can | ||
994 | * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize) blocks. | ||
995 | * So it can have information regarding groups_per_page which | ||
996 | * is blocks_per_page/2 | ||
997 | */ | ||
998 | |||
999 | static int ext4_mb_init_cache(struct page *page, char *incore) | ||
1000 | { | ||
1001 | int blocksize; | ||
1002 | int blocks_per_page; | ||
1003 | int groups_per_page; | ||
1004 | int err = 0; | ||
1005 | int i; | ||
1006 | ext4_group_t first_group; | ||
1007 | int first_block; | ||
1008 | struct super_block *sb; | ||
1009 | struct buffer_head *bhs; | ||
1010 | struct buffer_head **bh; | ||
1011 | struct inode *inode; | ||
1012 | char *data; | ||
1013 | char *bitmap; | ||
1014 | |||
1015 | mb_debug("init page %lu\n", page->index); | ||
1016 | |||
1017 | inode = page->mapping->host; | ||
1018 | sb = inode->i_sb; | ||
1019 | blocksize = 1 << inode->i_blkbits; | ||
1020 | blocks_per_page = PAGE_CACHE_SIZE / blocksize; | ||
1021 | |||
1022 | groups_per_page = blocks_per_page >> 1; | ||
1023 | if (groups_per_page == 0) | ||
1024 | groups_per_page = 1; | ||
1025 | |||
1026 | /* allocate buffer_heads to read bitmaps */ | ||
1027 | if (groups_per_page > 1) { | ||
1028 | err = -ENOMEM; | ||
1029 | i = sizeof(struct buffer_head *) * groups_per_page; | ||
1030 | bh = kzalloc(i, GFP_NOFS); | ||
1031 | if (bh == NULL) | ||
1032 | goto out; | ||
1033 | } else | ||
1034 | bh = &bhs; | ||
1035 | |||
1036 | first_group = page->index * blocks_per_page / 2; | ||
1037 | |||
1038 | /* read all groups the page covers into the cache */ | ||
1039 | for (i = 0; i < groups_per_page; i++) { | ||
1040 | struct ext4_group_desc *desc; | ||
1041 | |||
1042 | if (first_group + i >= EXT4_SB(sb)->s_groups_count) | ||
1043 | break; | ||
1044 | |||
1045 | err = -EIO; | ||
1046 | desc = ext4_get_group_desc(sb, first_group + i, NULL); | ||
1047 | if (desc == NULL) | ||
1048 | goto out; | ||
1049 | |||
1050 | err = -ENOMEM; | ||
1051 | bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc)); | ||
1052 | if (bh[i] == NULL) | ||
1053 | goto out; | ||
1054 | |||
1055 | if (bh_uptodate_or_lock(bh[i])) | ||
1056 | continue; | ||
1057 | |||
1058 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | ||
1059 | ext4_init_block_bitmap(sb, bh[i], | ||
1060 | first_group + i, desc); | ||
1061 | set_buffer_uptodate(bh[i]); | ||
1062 | unlock_buffer(bh[i]); | ||
1063 | continue; | ||
1064 | } | ||
1065 | get_bh(bh[i]); | ||
1066 | bh[i]->b_end_io = end_buffer_read_sync; | ||
1067 | submit_bh(READ, bh[i]); | ||
1068 | mb_debug("read bitmap for group %lu\n", first_group + i); | ||
1069 | } | ||
1070 | |||
1071 | /* wait for I/O completion */ | ||
1072 | for (i = 0; i < groups_per_page && bh[i]; i++) | ||
1073 | wait_on_buffer(bh[i]); | ||
1074 | |||
1075 | err = -EIO; | ||
1076 | for (i = 0; i < groups_per_page && bh[i]; i++) | ||
1077 | if (!buffer_uptodate(bh[i])) | ||
1078 | goto out; | ||
1079 | |||
1080 | first_block = page->index * blocks_per_page; | ||
1081 | for (i = 0; i < blocks_per_page; i++) { | ||
1082 | int group; | ||
1083 | struct ext4_group_info *grinfo; | ||
1084 | |||
1085 | group = (first_block + i) >> 1; | ||
1086 | if (group >= EXT4_SB(sb)->s_groups_count) | ||
1087 | break; | ||
1088 | |||
1089 | /* | ||
1090 | * data carry information regarding this | ||
1091 | * particular group in the format specified | ||
1092 | * above | ||
1093 | * | ||
1094 | */ | ||
1095 | data = page_address(page) + (i * blocksize); | ||
1096 | bitmap = bh[group - first_group]->b_data; | ||
1097 | |||
1098 | /* | ||
1099 | * We place the buddy block and bitmap block | ||
1100 | * close together | ||
1101 | */ | ||
1102 | if ((first_block + i) & 1) { | ||
1103 | /* this is block of buddy */ | ||
1104 | BUG_ON(incore == NULL); | ||
1105 | mb_debug("put buddy for group %u in page %lu/%x\n", | ||
1106 | group, page->index, i * blocksize); | ||
1107 | memset(data, 0xff, blocksize); | ||
1108 | grinfo = ext4_get_group_info(sb, group); | ||
1109 | grinfo->bb_fragments = 0; | ||
1110 | memset(grinfo->bb_counters, 0, | ||
1111 | sizeof(unsigned short)*(sb->s_blocksize_bits+2)); | ||
1112 | /* | ||
1113 | * incore got set to the group block bitmap below | ||
1114 | */ | ||
1115 | ext4_mb_generate_buddy(sb, data, incore, group); | ||
1116 | incore = NULL; | ||
1117 | } else { | ||
1118 | /* this is block of bitmap */ | ||
1119 | BUG_ON(incore != NULL); | ||
1120 | mb_debug("put bitmap for group %u in page %lu/%x\n", | ||
1121 | group, page->index, i * blocksize); | ||
1122 | |||
1123 | /* see comments in ext4_mb_put_pa() */ | ||
1124 | ext4_lock_group(sb, group); | ||
1125 | memcpy(data, bitmap, blocksize); | ||
1126 | |||
1127 | /* mark all preallocated blks used in in-core bitmap */ | ||
1128 | ext4_mb_generate_from_pa(sb, data, group); | ||
1129 | ext4_unlock_group(sb, group); | ||
1130 | |||
1131 | /* set incore so that the buddy information can be | ||
1132 | * generated using this | ||
1133 | */ | ||
1134 | incore = data; | ||
1135 | } | ||
1136 | } | ||
1137 | SetPageUptodate(page); | ||
1138 | |||
1139 | out: | ||
1140 | if (bh) { | ||
1141 | for (i = 0; i < groups_per_page && bh[i]; i++) | ||
1142 | brelse(bh[i]); | ||
1143 | if (bh != &bhs) | ||
1144 | kfree(bh); | ||
1145 | } | ||
1146 | return err; | ||
1147 | } | ||
1148 | |||
1149 | static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | ||
1150 | struct ext4_buddy *e4b) | ||
1151 | { | ||
1152 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1153 | struct inode *inode = sbi->s_buddy_cache; | ||
1154 | int blocks_per_page; | ||
1155 | int block; | ||
1156 | int pnum; | ||
1157 | int poff; | ||
1158 | struct page *page; | ||
1159 | |||
1160 | mb_debug("load group %lu\n", group); | ||
1161 | |||
1162 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1163 | |||
1164 | e4b->bd_blkbits = sb->s_blocksize_bits; | ||
1165 | e4b->bd_info = ext4_get_group_info(sb, group); | ||
1166 | e4b->bd_sb = sb; | ||
1167 | e4b->bd_group = group; | ||
1168 | e4b->bd_buddy_page = NULL; | ||
1169 | e4b->bd_bitmap_page = NULL; | ||
1170 | |||
1171 | /* | ||
1172 | * the buddy cache inode stores the block bitmap | ||
1173 | * and buddy information in consecutive blocks. | ||
1174 | * So for each group we need two blocks. | ||
1175 | */ | ||
1176 | block = group * 2; | ||
1177 | pnum = block / blocks_per_page; | ||
1178 | poff = block % blocks_per_page; | ||
1179 | |||
1180 | /* we could use find_or_create_page(), but it locks page | ||
1181 | * what we'd like to avoid in fast path ... */ | ||
1182 | page = find_get_page(inode->i_mapping, pnum); | ||
1183 | if (page == NULL || !PageUptodate(page)) { | ||
1184 | if (page) | ||
1185 | page_cache_release(page); | ||
1186 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1187 | if (page) { | ||
1188 | BUG_ON(page->mapping != inode->i_mapping); | ||
1189 | if (!PageUptodate(page)) { | ||
1190 | ext4_mb_init_cache(page, NULL); | ||
1191 | mb_cmp_bitmaps(e4b, page_address(page) + | ||
1192 | (poff * sb->s_blocksize)); | ||
1193 | } | ||
1194 | unlock_page(page); | ||
1195 | } | ||
1196 | } | ||
1197 | if (page == NULL || !PageUptodate(page)) | ||
1198 | goto err; | ||
1199 | e4b->bd_bitmap_page = page; | ||
1200 | e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); | ||
1201 | mark_page_accessed(page); | ||
1202 | |||
1203 | block++; | ||
1204 | pnum = block / blocks_per_page; | ||
1205 | poff = block % blocks_per_page; | ||
1206 | |||
1207 | page = find_get_page(inode->i_mapping, pnum); | ||
1208 | if (page == NULL || !PageUptodate(page)) { | ||
1209 | if (page) | ||
1210 | page_cache_release(page); | ||
1211 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1212 | if (page) { | ||
1213 | BUG_ON(page->mapping != inode->i_mapping); | ||
1214 | if (!PageUptodate(page)) | ||
1215 | ext4_mb_init_cache(page, e4b->bd_bitmap); | ||
1216 | |||
1217 | unlock_page(page); | ||
1218 | } | ||
1219 | } | ||
1220 | if (page == NULL || !PageUptodate(page)) | ||
1221 | goto err; | ||
1222 | e4b->bd_buddy_page = page; | ||
1223 | e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); | ||
1224 | mark_page_accessed(page); | ||
1225 | |||
1226 | BUG_ON(e4b->bd_bitmap_page == NULL); | ||
1227 | BUG_ON(e4b->bd_buddy_page == NULL); | ||
1228 | |||
1229 | return 0; | ||
1230 | |||
1231 | err: | ||
1232 | if (e4b->bd_bitmap_page) | ||
1233 | page_cache_release(e4b->bd_bitmap_page); | ||
1234 | if (e4b->bd_buddy_page) | ||
1235 | page_cache_release(e4b->bd_buddy_page); | ||
1236 | e4b->bd_buddy = NULL; | ||
1237 | e4b->bd_bitmap = NULL; | ||
1238 | return -EIO; | ||
1239 | } | ||
1240 | |||
1241 | static void ext4_mb_release_desc(struct ext4_buddy *e4b) | ||
1242 | { | ||
1243 | if (e4b->bd_bitmap_page) | ||
1244 | page_cache_release(e4b->bd_bitmap_page); | ||
1245 | if (e4b->bd_buddy_page) | ||
1246 | page_cache_release(e4b->bd_buddy_page); | ||
1247 | } | ||
1248 | |||
1249 | |||
1250 | static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) | ||
1251 | { | ||
1252 | int order = 1; | ||
1253 | void *bb; | ||
1254 | |||
1255 | BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); | ||
1256 | BUG_ON(block >= (1 << (e4b->bd_blkbits + 3))); | ||
1257 | |||
1258 | bb = EXT4_MB_BUDDY(e4b); | ||
1259 | while (order <= e4b->bd_blkbits + 1) { | ||
1260 | block = block >> 1; | ||
1261 | if (!mb_test_bit(block, bb)) { | ||
1262 | /* this block is part of buddy of order 'order' */ | ||
1263 | return order; | ||
1264 | } | ||
1265 | bb += 1 << (e4b->bd_blkbits - order); | ||
1266 | order++; | ||
1267 | } | ||
1268 | return 0; | ||
1269 | } | ||
1270 | |||
1271 | static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len) | ||
1272 | { | ||
1273 | __u32 *addr; | ||
1274 | |||
1275 | len = cur + len; | ||
1276 | while (cur < len) { | ||
1277 | if ((cur & 31) == 0 && (len - cur) >= 32) { | ||
1278 | /* fast path: clear whole word at once */ | ||
1279 | addr = bm + (cur >> 3); | ||
1280 | *addr = 0; | ||
1281 | cur += 32; | ||
1282 | continue; | ||
1283 | } | ||
1284 | mb_clear_bit_atomic(lock, cur, bm); | ||
1285 | cur++; | ||
1286 | } | ||
1287 | } | ||
1288 | |||
1289 | static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) | ||
1290 | { | ||
1291 | __u32 *addr; | ||
1292 | |||
1293 | len = cur + len; | ||
1294 | while (cur < len) { | ||
1295 | if ((cur & 31) == 0 && (len - cur) >= 32) { | ||
1296 | /* fast path: set whole word at once */ | ||
1297 | addr = bm + (cur >> 3); | ||
1298 | *addr = 0xffffffff; | ||
1299 | cur += 32; | ||
1300 | continue; | ||
1301 | } | ||
1302 | mb_set_bit_atomic(lock, cur, bm); | ||
1303 | cur++; | ||
1304 | } | ||
1305 | } | ||
1306 | |||
1307 | static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | ||
1308 | int first, int count) | ||
1309 | { | ||
1310 | int block = 0; | ||
1311 | int max = 0; | ||
1312 | int order; | ||
1313 | void *buddy; | ||
1314 | void *buddy2; | ||
1315 | struct super_block *sb = e4b->bd_sb; | ||
1316 | |||
1317 | BUG_ON(first + count > (sb->s_blocksize << 3)); | ||
1318 | BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group)); | ||
1319 | mb_check_buddy(e4b); | ||
1320 | mb_free_blocks_double(inode, e4b, first, count); | ||
1321 | |||
1322 | e4b->bd_info->bb_free += count; | ||
1323 | if (first < e4b->bd_info->bb_first_free) | ||
1324 | e4b->bd_info->bb_first_free = first; | ||
1325 | |||
1326 | /* let's maintain fragments counter */ | ||
1327 | if (first != 0) | ||
1328 | block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b)); | ||
1329 | if (first + count < EXT4_SB(sb)->s_mb_maxs[0]) | ||
1330 | max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b)); | ||
1331 | if (block && max) | ||
1332 | e4b->bd_info->bb_fragments--; | ||
1333 | else if (!block && !max) | ||
1334 | e4b->bd_info->bb_fragments++; | ||
1335 | |||
1336 | /* let's maintain buddy itself */ | ||
1337 | while (count-- > 0) { | ||
1338 | block = first++; | ||
1339 | order = 0; | ||
1340 | |||
1341 | if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { | ||
1342 | ext4_fsblk_t blocknr; | ||
1343 | blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb); | ||
1344 | blocknr += block; | ||
1345 | blocknr += | ||
1346 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | ||
1347 | |||
1348 | ext4_error(sb, __FUNCTION__, "double-free of inode" | ||
1349 | " %lu's block %llu(bit %u in group %lu)\n", | ||
1350 | inode ? inode->i_ino : 0, blocknr, block, | ||
1351 | e4b->bd_group); | ||
1352 | } | ||
1353 | mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); | ||
1354 | e4b->bd_info->bb_counters[order]++; | ||
1355 | |||
1356 | /* start of the buddy */ | ||
1357 | buddy = mb_find_buddy(e4b, order, &max); | ||
1358 | |||
1359 | do { | ||
1360 | block &= ~1UL; | ||
1361 | if (mb_test_bit(block, buddy) || | ||
1362 | mb_test_bit(block + 1, buddy)) | ||
1363 | break; | ||
1364 | |||
1365 | /* both the buddies are free, try to coalesce them */ | ||
1366 | buddy2 = mb_find_buddy(e4b, order + 1, &max); | ||
1367 | |||
1368 | if (!buddy2) | ||
1369 | break; | ||
1370 | |||
1371 | if (order > 0) { | ||
1372 | /* for special purposes, we don't set | ||
1373 | * free bits in bitmap */ | ||
1374 | mb_set_bit(block, buddy); | ||
1375 | mb_set_bit(block + 1, buddy); | ||
1376 | } | ||
1377 | e4b->bd_info->bb_counters[order]--; | ||
1378 | e4b->bd_info->bb_counters[order]--; | ||
1379 | |||
1380 | block = block >> 1; | ||
1381 | order++; | ||
1382 | e4b->bd_info->bb_counters[order]++; | ||
1383 | |||
1384 | mb_clear_bit(block, buddy2); | ||
1385 | buddy = buddy2; | ||
1386 | } while (1); | ||
1387 | } | ||
1388 | mb_check_buddy(e4b); | ||
1389 | |||
1390 | return 0; | ||
1391 | } | ||
1392 | |||
1393 | static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, | ||
1394 | int needed, struct ext4_free_extent *ex) | ||
1395 | { | ||
1396 | int next = block; | ||
1397 | int max; | ||
1398 | int ord; | ||
1399 | void *buddy; | ||
1400 | |||
1401 | BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); | ||
1402 | BUG_ON(ex == NULL); | ||
1403 | |||
1404 | buddy = mb_find_buddy(e4b, order, &max); | ||
1405 | BUG_ON(buddy == NULL); | ||
1406 | BUG_ON(block >= max); | ||
1407 | if (mb_test_bit(block, buddy)) { | ||
1408 | ex->fe_len = 0; | ||
1409 | ex->fe_start = 0; | ||
1410 | ex->fe_group = 0; | ||
1411 | return 0; | ||
1412 | } | ||
1413 | |||
1414 | /* FIXME dorp order completely ? */ | ||
1415 | if (likely(order == 0)) { | ||
1416 | /* find actual order */ | ||
1417 | order = mb_find_order_for_block(e4b, block); | ||
1418 | block = block >> order; | ||
1419 | } | ||
1420 | |||
1421 | ex->fe_len = 1 << order; | ||
1422 | ex->fe_start = block << order; | ||
1423 | ex->fe_group = e4b->bd_group; | ||
1424 | |||
1425 | /* calc difference from given start */ | ||
1426 | next = next - ex->fe_start; | ||
1427 | ex->fe_len -= next; | ||
1428 | ex->fe_start += next; | ||
1429 | |||
1430 | while (needed > ex->fe_len && | ||
1431 | (buddy = mb_find_buddy(e4b, order, &max))) { | ||
1432 | |||
1433 | if (block + 1 >= max) | ||
1434 | break; | ||
1435 | |||
1436 | next = (block + 1) * (1 << order); | ||
1437 | if (mb_test_bit(next, EXT4_MB_BITMAP(e4b))) | ||
1438 | break; | ||
1439 | |||
1440 | ord = mb_find_order_for_block(e4b, next); | ||
1441 | |||
1442 | order = ord; | ||
1443 | block = next >> order; | ||
1444 | ex->fe_len += 1 << order; | ||
1445 | } | ||
1446 | |||
1447 | BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3))); | ||
1448 | return ex->fe_len; | ||
1449 | } | ||
1450 | |||
1451 | static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) | ||
1452 | { | ||
1453 | int ord; | ||
1454 | int mlen = 0; | ||
1455 | int max = 0; | ||
1456 | int cur; | ||
1457 | int start = ex->fe_start; | ||
1458 | int len = ex->fe_len; | ||
1459 | unsigned ret = 0; | ||
1460 | int len0 = len; | ||
1461 | void *buddy; | ||
1462 | |||
1463 | BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3)); | ||
1464 | BUG_ON(e4b->bd_group != ex->fe_group); | ||
1465 | BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); | ||
1466 | mb_check_buddy(e4b); | ||
1467 | mb_mark_used_double(e4b, start, len); | ||
1468 | |||
1469 | e4b->bd_info->bb_free -= len; | ||
1470 | if (e4b->bd_info->bb_first_free == start) | ||
1471 | e4b->bd_info->bb_first_free += len; | ||
1472 | |||
1473 | /* let's maintain fragments counter */ | ||
1474 | if (start != 0) | ||
1475 | mlen = !mb_test_bit(start - 1, EXT4_MB_BITMAP(e4b)); | ||
1476 | if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0]) | ||
1477 | max = !mb_test_bit(start + len, EXT4_MB_BITMAP(e4b)); | ||
1478 | if (mlen && max) | ||
1479 | e4b->bd_info->bb_fragments++; | ||
1480 | else if (!mlen && !max) | ||
1481 | e4b->bd_info->bb_fragments--; | ||
1482 | |||
1483 | /* let's maintain buddy itself */ | ||
1484 | while (len) { | ||
1485 | ord = mb_find_order_for_block(e4b, start); | ||
1486 | |||
1487 | if (((start >> ord) << ord) == start && len >= (1 << ord)) { | ||
1488 | /* the whole chunk may be allocated at once! */ | ||
1489 | mlen = 1 << ord; | ||
1490 | buddy = mb_find_buddy(e4b, ord, &max); | ||
1491 | BUG_ON((start >> ord) >= max); | ||
1492 | mb_set_bit(start >> ord, buddy); | ||
1493 | e4b->bd_info->bb_counters[ord]--; | ||
1494 | start += mlen; | ||
1495 | len -= mlen; | ||
1496 | BUG_ON(len < 0); | ||
1497 | continue; | ||
1498 | } | ||
1499 | |||
1500 | /* store for history */ | ||
1501 | if (ret == 0) | ||
1502 | ret = len | (ord << 16); | ||
1503 | |||
1504 | /* we have to split large buddy */ | ||
1505 | BUG_ON(ord <= 0); | ||
1506 | buddy = mb_find_buddy(e4b, ord, &max); | ||
1507 | mb_set_bit(start >> ord, buddy); | ||
1508 | e4b->bd_info->bb_counters[ord]--; | ||
1509 | |||
1510 | ord--; | ||
1511 | cur = (start >> ord) & ~1U; | ||
1512 | buddy = mb_find_buddy(e4b, ord, &max); | ||
1513 | mb_clear_bit(cur, buddy); | ||
1514 | mb_clear_bit(cur + 1, buddy); | ||
1515 | e4b->bd_info->bb_counters[ord]++; | ||
1516 | e4b->bd_info->bb_counters[ord]++; | ||
1517 | } | ||
1518 | |||
1519 | mb_set_bits(sb_bgl_lock(EXT4_SB(e4b->bd_sb), ex->fe_group), | ||
1520 | EXT4_MB_BITMAP(e4b), ex->fe_start, len0); | ||
1521 | mb_check_buddy(e4b); | ||
1522 | |||
1523 | return ret; | ||
1524 | } | ||
1525 | |||
1526 | /* | ||
1527 | * Must be called under group lock! | ||
1528 | */ | ||
1529 | static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, | ||
1530 | struct ext4_buddy *e4b) | ||
1531 | { | ||
1532 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | ||
1533 | int ret; | ||
1534 | |||
1535 | BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group); | ||
1536 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); | ||
1537 | |||
1538 | ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len); | ||
1539 | ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical; | ||
1540 | ret = mb_mark_used(e4b, &ac->ac_b_ex); | ||
1541 | |||
1542 | /* preallocation can change ac_b_ex, thus we store actually | ||
1543 | * allocated blocks for history */ | ||
1544 | ac->ac_f_ex = ac->ac_b_ex; | ||
1545 | |||
1546 | ac->ac_status = AC_STATUS_FOUND; | ||
1547 | ac->ac_tail = ret & 0xffff; | ||
1548 | ac->ac_buddy = ret >> 16; | ||
1549 | |||
1550 | /* XXXXXXX: SUCH A HORRIBLE **CK */ | ||
1551 | /*FIXME!! Why ? */ | ||
1552 | ac->ac_bitmap_page = e4b->bd_bitmap_page; | ||
1553 | get_page(ac->ac_bitmap_page); | ||
1554 | ac->ac_buddy_page = e4b->bd_buddy_page; | ||
1555 | get_page(ac->ac_buddy_page); | ||
1556 | |||
1557 | /* store last allocated for subsequent stream allocation */ | ||
1558 | if ((ac->ac_flags & EXT4_MB_HINT_DATA)) { | ||
1559 | spin_lock(&sbi->s_md_lock); | ||
1560 | sbi->s_mb_last_group = ac->ac_f_ex.fe_group; | ||
1561 | sbi->s_mb_last_start = ac->ac_f_ex.fe_start; | ||
1562 | spin_unlock(&sbi->s_md_lock); | ||
1563 | } | ||
1564 | } | ||
1565 | |||
1566 | /* | ||
1567 | * regular allocator, for general purposes allocation | ||
1568 | */ | ||
1569 | |||
1570 | static void ext4_mb_check_limits(struct ext4_allocation_context *ac, | ||
1571 | struct ext4_buddy *e4b, | ||
1572 | int finish_group) | ||
1573 | { | ||
1574 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | ||
1575 | struct ext4_free_extent *bex = &ac->ac_b_ex; | ||
1576 | struct ext4_free_extent *gex = &ac->ac_g_ex; | ||
1577 | struct ext4_free_extent ex; | ||
1578 | int max; | ||
1579 | |||
1580 | /* | ||
1581 | * We don't want to scan for a whole year | ||
1582 | */ | ||
1583 | if (ac->ac_found > sbi->s_mb_max_to_scan && | ||
1584 | !(ac->ac_flags & EXT4_MB_HINT_FIRST)) { | ||
1585 | ac->ac_status = AC_STATUS_BREAK; | ||
1586 | return; | ||
1587 | } | ||
1588 | |||
1589 | /* | ||
1590 | * Haven't found good chunk so far, let's continue | ||
1591 | */ | ||
1592 | if (bex->fe_len < gex->fe_len) | ||
1593 | return; | ||
1594 | |||
1595 | if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan) | ||
1596 | && bex->fe_group == e4b->bd_group) { | ||
1597 | /* recheck chunk's availability - we don't know | ||
1598 | * when it was found (within this lock-unlock | ||
1599 | * period or not) */ | ||
1600 | max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex); | ||
1601 | if (max >= gex->fe_len) { | ||
1602 | ext4_mb_use_best_found(ac, e4b); | ||
1603 | return; | ||
1604 | } | ||
1605 | } | ||
1606 | } | ||
1607 | |||
1608 | /* | ||
1609 | * The routine checks whether found extent is good enough. If it is, | ||
1610 | * then the extent gets marked used and flag is set to the context | ||
1611 | * to stop scanning. Otherwise, the extent is compared with the | ||
1612 | * previous found extent and if new one is better, then it's stored | ||
1613 | * in the context. Later, the best found extent will be used, if | ||
1614 | * mballoc can't find good enough extent. | ||
1615 | * | ||
1616 | * FIXME: real allocation policy is to be designed yet! | ||
1617 | */ | ||
1618 | static void ext4_mb_measure_extent(struct ext4_allocation_context *ac, | ||
1619 | struct ext4_free_extent *ex, | ||
1620 | struct ext4_buddy *e4b) | ||
1621 | { | ||
1622 | struct ext4_free_extent *bex = &ac->ac_b_ex; | ||
1623 | struct ext4_free_extent *gex = &ac->ac_g_ex; | ||
1624 | |||
1625 | BUG_ON(ex->fe_len <= 0); | ||
1626 | BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); | ||
1627 | BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); | ||
1628 | BUG_ON(ac->ac_status != AC_STATUS_CONTINUE); | ||
1629 | |||
1630 | ac->ac_found++; | ||
1631 | |||
1632 | /* | ||
1633 | * The special case - take what you catch first | ||
1634 | */ | ||
1635 | if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) { | ||
1636 | *bex = *ex; | ||
1637 | ext4_mb_use_best_found(ac, e4b); | ||
1638 | return; | ||
1639 | } | ||
1640 | |||
1641 | /* | ||
1642 | * Let's check whether the chuck is good enough | ||
1643 | */ | ||
1644 | if (ex->fe_len == gex->fe_len) { | ||
1645 | *bex = *ex; | ||
1646 | ext4_mb_use_best_found(ac, e4b); | ||
1647 | return; | ||
1648 | } | ||
1649 | |||
1650 | /* | ||
1651 | * If this is first found extent, just store it in the context | ||
1652 | */ | ||
1653 | if (bex->fe_len == 0) { | ||
1654 | *bex = *ex; | ||
1655 | return; | ||
1656 | } | ||
1657 | |||
1658 | /* | ||
1659 | * If new found extent is better, store it in the context | ||
1660 | */ | ||
1661 | if (bex->fe_len < gex->fe_len) { | ||
1662 | /* if the request isn't satisfied, any found extent | ||
1663 | * larger than previous best one is better */ | ||
1664 | if (ex->fe_len > bex->fe_len) | ||
1665 | *bex = *ex; | ||
1666 | } else if (ex->fe_len > gex->fe_len) { | ||
1667 | /* if the request is satisfied, then we try to find | ||
1668 | * an extent that still satisfy the request, but is | ||
1669 | * smaller than previous one */ | ||
1670 | if (ex->fe_len < bex->fe_len) | ||
1671 | *bex = *ex; | ||
1672 | } | ||
1673 | |||
1674 | ext4_mb_check_limits(ac, e4b, 0); | ||
1675 | } | ||
1676 | |||
1677 | static int ext4_mb_try_best_found(struct ext4_allocation_context *ac, | ||
1678 | struct ext4_buddy *e4b) | ||
1679 | { | ||
1680 | struct ext4_free_extent ex = ac->ac_b_ex; | ||
1681 | ext4_group_t group = ex.fe_group; | ||
1682 | int max; | ||
1683 | int err; | ||
1684 | |||
1685 | BUG_ON(ex.fe_len <= 0); | ||
1686 | err = ext4_mb_load_buddy(ac->ac_sb, group, e4b); | ||
1687 | if (err) | ||
1688 | return err; | ||
1689 | |||
1690 | ext4_lock_group(ac->ac_sb, group); | ||
1691 | max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex); | ||
1692 | |||
1693 | if (max > 0) { | ||
1694 | ac->ac_b_ex = ex; | ||
1695 | ext4_mb_use_best_found(ac, e4b); | ||
1696 | } | ||
1697 | |||
1698 | ext4_unlock_group(ac->ac_sb, group); | ||
1699 | ext4_mb_release_desc(e4b); | ||
1700 | |||
1701 | return 0; | ||
1702 | } | ||
1703 | |||
1704 | static int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, | ||
1705 | struct ext4_buddy *e4b) | ||
1706 | { | ||
1707 | ext4_group_t group = ac->ac_g_ex.fe_group; | ||
1708 | int max; | ||
1709 | int err; | ||
1710 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | ||
1711 | struct ext4_super_block *es = sbi->s_es; | ||
1712 | struct ext4_free_extent ex; | ||
1713 | |||
1714 | if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) | ||
1715 | return 0; | ||
1716 | |||
1717 | err = ext4_mb_load_buddy(ac->ac_sb, group, e4b); | ||
1718 | if (err) | ||
1719 | return err; | ||
1720 | |||
1721 | ext4_lock_group(ac->ac_sb, group); | ||
1722 | max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start, | ||
1723 | ac->ac_g_ex.fe_len, &ex); | ||
1724 | |||
1725 | if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { | ||
1726 | ext4_fsblk_t start; | ||
1727 | |||
1728 | start = (e4b->bd_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb)) + | ||
1729 | ex.fe_start + le32_to_cpu(es->s_first_data_block); | ||
1730 | /* use do_div to get remainder (would be 64-bit modulo) */ | ||
1731 | if (do_div(start, sbi->s_stripe) == 0) { | ||
1732 | ac->ac_found++; | ||
1733 | ac->ac_b_ex = ex; | ||
1734 | ext4_mb_use_best_found(ac, e4b); | ||
1735 | } | ||
1736 | } else if (max >= ac->ac_g_ex.fe_len) { | ||
1737 | BUG_ON(ex.fe_len <= 0); | ||
1738 | BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group); | ||
1739 | BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start); | ||
1740 | ac->ac_found++; | ||
1741 | ac->ac_b_ex = ex; | ||
1742 | ext4_mb_use_best_found(ac, e4b); | ||
1743 | } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) { | ||
1744 | /* Sometimes, caller may want to merge even small | ||
1745 | * number of blocks to an existing extent */ | ||
1746 | BUG_ON(ex.fe_len <= 0); | ||
1747 | BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group); | ||
1748 | BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start); | ||
1749 | ac->ac_found++; | ||
1750 | ac->ac_b_ex = ex; | ||
1751 | ext4_mb_use_best_found(ac, e4b); | ||
1752 | } | ||
1753 | ext4_unlock_group(ac->ac_sb, group); | ||
1754 | ext4_mb_release_desc(e4b); | ||
1755 | |||
1756 | return 0; | ||
1757 | } | ||
1758 | |||
1759 | /* | ||
1760 | * The routine scans buddy structures (not bitmap!) from given order | ||
1761 | * to max order and tries to find big enough chunk to satisfy the req | ||
1762 | */ | ||
1763 | static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac, | ||
1764 | struct ext4_buddy *e4b) | ||
1765 | { | ||
1766 | struct super_block *sb = ac->ac_sb; | ||
1767 | struct ext4_group_info *grp = e4b->bd_info; | ||
1768 | void *buddy; | ||
1769 | int i; | ||
1770 | int k; | ||
1771 | int max; | ||
1772 | |||
1773 | BUG_ON(ac->ac_2order <= 0); | ||
1774 | for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) { | ||
1775 | if (grp->bb_counters[i] == 0) | ||
1776 | continue; | ||
1777 | |||
1778 | buddy = mb_find_buddy(e4b, i, &max); | ||
1779 | BUG_ON(buddy == NULL); | ||
1780 | |||
1781 | k = ext4_find_next_zero_bit(buddy, max, 0); | ||
1782 | BUG_ON(k >= max); | ||
1783 | |||
1784 | ac->ac_found++; | ||
1785 | |||
1786 | ac->ac_b_ex.fe_len = 1 << i; | ||
1787 | ac->ac_b_ex.fe_start = k << i; | ||
1788 | ac->ac_b_ex.fe_group = e4b->bd_group; | ||
1789 | |||
1790 | ext4_mb_use_best_found(ac, e4b); | ||
1791 | |||
1792 | BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len); | ||
1793 | |||
1794 | if (EXT4_SB(sb)->s_mb_stats) | ||
1795 | atomic_inc(&EXT4_SB(sb)->s_bal_2orders); | ||
1796 | |||
1797 | break; | ||
1798 | } | ||
1799 | } | ||
1800 | |||
1801 | /* | ||
1802 | * The routine scans the group and measures all found extents. | ||
1803 | * In order to optimize scanning, caller must pass number of | ||
1804 | * free blocks in the group, so the routine can know upper limit. | ||
1805 | */ | ||
1806 | static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | ||
1807 | struct ext4_buddy *e4b) | ||
1808 | { | ||
1809 | struct super_block *sb = ac->ac_sb; | ||
1810 | void *bitmap = EXT4_MB_BITMAP(e4b); | ||
1811 | struct ext4_free_extent ex; | ||
1812 | int i; | ||
1813 | int free; | ||
1814 | |||
1815 | free = e4b->bd_info->bb_free; | ||
1816 | BUG_ON(free <= 0); | ||
1817 | |||
1818 | i = e4b->bd_info->bb_first_free; | ||
1819 | |||
1820 | while (free && ac->ac_status == AC_STATUS_CONTINUE) { | ||
1821 | i = ext4_find_next_zero_bit(bitmap, | ||
1822 | EXT4_BLOCKS_PER_GROUP(sb), i); | ||
1823 | if (i >= EXT4_BLOCKS_PER_GROUP(sb)) { | ||
1824 | BUG_ON(free != 0); | ||
1825 | break; | ||
1826 | } | ||
1827 | |||
1828 | mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); | ||
1829 | BUG_ON(ex.fe_len <= 0); | ||
1830 | BUG_ON(free < ex.fe_len); | ||
1831 | |||
1832 | ext4_mb_measure_extent(ac, &ex, e4b); | ||
1833 | |||
1834 | i += ex.fe_len; | ||
1835 | free -= ex.fe_len; | ||
1836 | } | ||
1837 | |||
1838 | ext4_mb_check_limits(ac, e4b, 1); | ||
1839 | } | ||
1840 | |||
1841 | /* | ||
1842 | * This is a special case for storages like raid5 | ||
1843 | * we try to find stripe-aligned chunks for stripe-size requests | ||
1844 | * XXX should do so at least for multiples of stripe size as well | ||
1845 | */ | ||
1846 | static void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, | ||
1847 | struct ext4_buddy *e4b) | ||
1848 | { | ||
1849 | struct super_block *sb = ac->ac_sb; | ||
1850 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1851 | void *bitmap = EXT4_MB_BITMAP(e4b); | ||
1852 | struct ext4_free_extent ex; | ||
1853 | ext4_fsblk_t first_group_block; | ||
1854 | ext4_fsblk_t a; | ||
1855 | ext4_grpblk_t i; | ||
1856 | int max; | ||
1857 | |||
1858 | BUG_ON(sbi->s_stripe == 0); | ||
1859 | |||
1860 | /* find first stripe-aligned block in group */ | ||
1861 | first_group_block = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb) | ||
1862 | + le32_to_cpu(sbi->s_es->s_first_data_block); | ||
1863 | a = first_group_block + sbi->s_stripe - 1; | ||
1864 | do_div(a, sbi->s_stripe); | ||
1865 | i = (a * sbi->s_stripe) - first_group_block; | ||
1866 | |||
1867 | while (i < EXT4_BLOCKS_PER_GROUP(sb)) { | ||
1868 | if (!mb_test_bit(i, bitmap)) { | ||
1869 | max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex); | ||
1870 | if (max >= sbi->s_stripe) { | ||
1871 | ac->ac_found++; | ||
1872 | ac->ac_b_ex = ex; | ||
1873 | ext4_mb_use_best_found(ac, e4b); | ||
1874 | break; | ||
1875 | } | ||
1876 | } | ||
1877 | i += sbi->s_stripe; | ||
1878 | } | ||
1879 | } | ||
1880 | |||
1881 | static int ext4_mb_good_group(struct ext4_allocation_context *ac, | ||
1882 | ext4_group_t group, int cr) | ||
1883 | { | ||
1884 | unsigned free, fragments; | ||
1885 | unsigned i, bits; | ||
1886 | struct ext4_group_desc *desc; | ||
1887 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); | ||
1888 | |||
1889 | BUG_ON(cr < 0 || cr >= 4); | ||
1890 | BUG_ON(EXT4_MB_GRP_NEED_INIT(grp)); | ||
1891 | |||
1892 | free = grp->bb_free; | ||
1893 | fragments = grp->bb_fragments; | ||
1894 | if (free == 0) | ||
1895 | return 0; | ||
1896 | if (fragments == 0) | ||
1897 | return 0; | ||
1898 | |||
1899 | switch (cr) { | ||
1900 | case 0: | ||
1901 | BUG_ON(ac->ac_2order == 0); | ||
1902 | /* If this group is uninitialized, skip it initially */ | ||
1903 | desc = ext4_get_group_desc(ac->ac_sb, group, NULL); | ||
1904 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) | ||
1905 | return 0; | ||
1906 | |||
1907 | bits = ac->ac_sb->s_blocksize_bits + 1; | ||
1908 | for (i = ac->ac_2order; i <= bits; i++) | ||
1909 | if (grp->bb_counters[i] > 0) | ||
1910 | return 1; | ||
1911 | break; | ||
1912 | case 1: | ||
1913 | if ((free / fragments) >= ac->ac_g_ex.fe_len) | ||
1914 | return 1; | ||
1915 | break; | ||
1916 | case 2: | ||
1917 | if (free >= ac->ac_g_ex.fe_len) | ||
1918 | return 1; | ||
1919 | break; | ||
1920 | case 3: | ||
1921 | return 1; | ||
1922 | default: | ||
1923 | BUG(); | ||
1924 | } | ||
1925 | |||
1926 | return 0; | ||
1927 | } | ||
1928 | |||
1929 | static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | ||
1930 | { | ||
1931 | ext4_group_t group; | ||
1932 | ext4_group_t i; | ||
1933 | int cr; | ||
1934 | int err = 0; | ||
1935 | int bsbits; | ||
1936 | struct ext4_sb_info *sbi; | ||
1937 | struct super_block *sb; | ||
1938 | struct ext4_buddy e4b; | ||
1939 | loff_t size, isize; | ||
1940 | |||
1941 | sb = ac->ac_sb; | ||
1942 | sbi = EXT4_SB(sb); | ||
1943 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); | ||
1944 | |||
1945 | /* first, try the goal */ | ||
1946 | err = ext4_mb_find_by_goal(ac, &e4b); | ||
1947 | if (err || ac->ac_status == AC_STATUS_FOUND) | ||
1948 | goto out; | ||
1949 | |||
1950 | if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) | ||
1951 | goto out; | ||
1952 | |||
1953 | /* | ||
1954 | * ac->ac2_order is set only if the fe_len is a power of 2 | ||
1955 | * if ac2_order is set we also set criteria to 0 so that we | ||
1956 | * try exact allocation using buddy. | ||
1957 | */ | ||
1958 | i = fls(ac->ac_g_ex.fe_len); | ||
1959 | ac->ac_2order = 0; | ||
1960 | /* | ||
1961 | * We search using buddy data only if the order of the request | ||
1962 | * is greater than equal to the sbi_s_mb_order2_reqs | ||
1963 | * You can tune it via /proc/fs/ext4/<partition>/order2_req | ||
1964 | */ | ||
1965 | if (i >= sbi->s_mb_order2_reqs) { | ||
1966 | /* | ||
1967 | * This should tell if fe_len is exactly power of 2 | ||
1968 | */ | ||
1969 | if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0) | ||
1970 | ac->ac_2order = i - 1; | ||
1971 | } | ||
1972 | |||
1973 | bsbits = ac->ac_sb->s_blocksize_bits; | ||
1974 | /* if stream allocation is enabled, use global goal */ | ||
1975 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; | ||
1976 | isize = i_size_read(ac->ac_inode) >> bsbits; | ||
1977 | if (size < isize) | ||
1978 | size = isize; | ||
1979 | |||
1980 | if (size < sbi->s_mb_stream_request && | ||
1981 | (ac->ac_flags & EXT4_MB_HINT_DATA)) { | ||
1982 | /* TBD: may be hot point */ | ||
1983 | spin_lock(&sbi->s_md_lock); | ||
1984 | ac->ac_g_ex.fe_group = sbi->s_mb_last_group; | ||
1985 | ac->ac_g_ex.fe_start = sbi->s_mb_last_start; | ||
1986 | spin_unlock(&sbi->s_md_lock); | ||
1987 | } | ||
1988 | |||
1989 | /* searching for the right group start from the goal value specified */ | ||
1990 | group = ac->ac_g_ex.fe_group; | ||
1991 | |||
1992 | /* Let's just scan groups to find more-less suitable blocks */ | ||
1993 | cr = ac->ac_2order ? 0 : 1; | ||
1994 | /* | ||
1995 | * cr == 0 try to get exact allocation, | ||
1996 | * cr == 3 try to get anything | ||
1997 | */ | ||
1998 | repeat: | ||
1999 | for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) { | ||
2000 | ac->ac_criteria = cr; | ||
2001 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) { | ||
2002 | struct ext4_group_info *grp; | ||
2003 | struct ext4_group_desc *desc; | ||
2004 | |||
2005 | if (group == EXT4_SB(sb)->s_groups_count) | ||
2006 | group = 0; | ||
2007 | |||
2008 | /* quick check to skip empty groups */ | ||
2009 | grp = ext4_get_group_info(ac->ac_sb, group); | ||
2010 | if (grp->bb_free == 0) | ||
2011 | continue; | ||
2012 | |||
2013 | /* | ||
2014 | * if the group is already init we check whether it is | ||
2015 | * a good group and if not we don't load the buddy | ||
2016 | */ | ||
2017 | if (EXT4_MB_GRP_NEED_INIT(grp)) { | ||
2018 | /* | ||
2019 | * we need full data about the group | ||
2020 | * to make a good selection | ||
2021 | */ | ||
2022 | err = ext4_mb_load_buddy(sb, group, &e4b); | ||
2023 | if (err) | ||
2024 | goto out; | ||
2025 | ext4_mb_release_desc(&e4b); | ||
2026 | } | ||
2027 | |||
2028 | /* | ||
2029 | * If the particular group doesn't satisfy our | ||
2030 | * criteria we continue with the next group | ||
2031 | */ | ||
2032 | if (!ext4_mb_good_group(ac, group, cr)) | ||
2033 | continue; | ||
2034 | |||
2035 | err = ext4_mb_load_buddy(sb, group, &e4b); | ||
2036 | if (err) | ||
2037 | goto out; | ||
2038 | |||
2039 | ext4_lock_group(sb, group); | ||
2040 | if (!ext4_mb_good_group(ac, group, cr)) { | ||
2041 | /* someone did allocation from this group */ | ||
2042 | ext4_unlock_group(sb, group); | ||
2043 | ext4_mb_release_desc(&e4b); | ||
2044 | continue; | ||
2045 | } | ||
2046 | |||
2047 | ac->ac_groups_scanned++; | ||
2048 | desc = ext4_get_group_desc(sb, group, NULL); | ||
2049 | if (cr == 0 || (desc->bg_flags & | ||
2050 | cpu_to_le16(EXT4_BG_BLOCK_UNINIT) && | ||
2051 | ac->ac_2order != 0)) | ||
2052 | ext4_mb_simple_scan_group(ac, &e4b); | ||
2053 | else if (cr == 1 && | ||
2054 | ac->ac_g_ex.fe_len == sbi->s_stripe) | ||
2055 | ext4_mb_scan_aligned(ac, &e4b); | ||
2056 | else | ||
2057 | ext4_mb_complex_scan_group(ac, &e4b); | ||
2058 | |||
2059 | ext4_unlock_group(sb, group); | ||
2060 | ext4_mb_release_desc(&e4b); | ||
2061 | |||
2062 | if (ac->ac_status != AC_STATUS_CONTINUE) | ||
2063 | break; | ||
2064 | } | ||
2065 | } | ||
2066 | |||
2067 | if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND && | ||
2068 | !(ac->ac_flags & EXT4_MB_HINT_FIRST)) { | ||
2069 | /* | ||
2070 | * We've been searching too long. Let's try to allocate | ||
2071 | * the best chunk we've found so far | ||
2072 | */ | ||
2073 | |||
2074 | ext4_mb_try_best_found(ac, &e4b); | ||
2075 | if (ac->ac_status != AC_STATUS_FOUND) { | ||
2076 | /* | ||
2077 | * Someone more lucky has already allocated it. | ||
2078 | * The only thing we can do is just take first | ||
2079 | * found block(s) | ||
2080 | printk(KERN_DEBUG "EXT4-fs: someone won our chunk\n"); | ||
2081 | */ | ||
2082 | ac->ac_b_ex.fe_group = 0; | ||
2083 | ac->ac_b_ex.fe_start = 0; | ||
2084 | ac->ac_b_ex.fe_len = 0; | ||
2085 | ac->ac_status = AC_STATUS_CONTINUE; | ||
2086 | ac->ac_flags |= EXT4_MB_HINT_FIRST; | ||
2087 | cr = 3; | ||
2088 | atomic_inc(&sbi->s_mb_lost_chunks); | ||
2089 | goto repeat; | ||
2090 | } | ||
2091 | } | ||
2092 | out: | ||
2093 | return err; | ||
2094 | } | ||
2095 | |||
2096 | #ifdef EXT4_MB_HISTORY | ||
2097 | struct ext4_mb_proc_session { | ||
2098 | struct ext4_mb_history *history; | ||
2099 | struct super_block *sb; | ||
2100 | int start; | ||
2101 | int max; | ||
2102 | }; | ||
2103 | |||
2104 | static void *ext4_mb_history_skip_empty(struct ext4_mb_proc_session *s, | ||
2105 | struct ext4_mb_history *hs, | ||
2106 | int first) | ||
2107 | { | ||
2108 | if (hs == s->history + s->max) | ||
2109 | hs = s->history; | ||
2110 | if (!first && hs == s->history + s->start) | ||
2111 | return NULL; | ||
2112 | while (hs->orig.fe_len == 0) { | ||
2113 | hs++; | ||
2114 | if (hs == s->history + s->max) | ||
2115 | hs = s->history; | ||
2116 | if (hs == s->history + s->start) | ||
2117 | return NULL; | ||
2118 | } | ||
2119 | return hs; | ||
2120 | } | ||
2121 | |||
2122 | static void *ext4_mb_seq_history_start(struct seq_file *seq, loff_t *pos) | ||
2123 | { | ||
2124 | struct ext4_mb_proc_session *s = seq->private; | ||
2125 | struct ext4_mb_history *hs; | ||
2126 | int l = *pos; | ||
2127 | |||
2128 | if (l == 0) | ||
2129 | return SEQ_START_TOKEN; | ||
2130 | hs = ext4_mb_history_skip_empty(s, s->history + s->start, 1); | ||
2131 | if (!hs) | ||
2132 | return NULL; | ||
2133 | while (--l && (hs = ext4_mb_history_skip_empty(s, ++hs, 0)) != NULL); | ||
2134 | return hs; | ||
2135 | } | ||
2136 | |||
2137 | static void *ext4_mb_seq_history_next(struct seq_file *seq, void *v, | ||
2138 | loff_t *pos) | ||
2139 | { | ||
2140 | struct ext4_mb_proc_session *s = seq->private; | ||
2141 | struct ext4_mb_history *hs = v; | ||
2142 | |||
2143 | ++*pos; | ||
2144 | if (v == SEQ_START_TOKEN) | ||
2145 | return ext4_mb_history_skip_empty(s, s->history + s->start, 1); | ||
2146 | else | ||
2147 | return ext4_mb_history_skip_empty(s, ++hs, 0); | ||
2148 | } | ||
2149 | |||
2150 | static int ext4_mb_seq_history_show(struct seq_file *seq, void *v) | ||
2151 | { | ||
2152 | char buf[25], buf2[25], buf3[25], *fmt; | ||
2153 | struct ext4_mb_history *hs = v; | ||
2154 | |||
2155 | if (v == SEQ_START_TOKEN) { | ||
2156 | seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s " | ||
2157 | "%-5s %-2s %-5s %-5s %-5s %-6s\n", | ||
2158 | "pid", "inode", "original", "goal", "result", "found", | ||
2159 | "grps", "cr", "flags", "merge", "tail", "broken"); | ||
2160 | return 0; | ||
2161 | } | ||
2162 | |||
2163 | if (hs->op == EXT4_MB_HISTORY_ALLOC) { | ||
2164 | fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " | ||
2165 | "%-5u %-5s %-5u %-6u\n"; | ||
2166 | sprintf(buf2, "%lu/%d/%u@%u", hs->result.fe_group, | ||
2167 | hs->result.fe_start, hs->result.fe_len, | ||
2168 | hs->result.fe_logical); | ||
2169 | sprintf(buf, "%lu/%d/%u@%u", hs->orig.fe_group, | ||
2170 | hs->orig.fe_start, hs->orig.fe_len, | ||
2171 | hs->orig.fe_logical); | ||
2172 | sprintf(buf3, "%lu/%d/%u@%u", hs->goal.fe_group, | ||
2173 | hs->goal.fe_start, hs->goal.fe_len, | ||
2174 | hs->goal.fe_logical); | ||
2175 | seq_printf(seq, fmt, hs->pid, hs->ino, buf, buf3, buf2, | ||
2176 | hs->found, hs->groups, hs->cr, hs->flags, | ||
2177 | hs->merged ? "M" : "", hs->tail, | ||
2178 | hs->buddy ? 1 << hs->buddy : 0); | ||
2179 | } else if (hs->op == EXT4_MB_HISTORY_PREALLOC) { | ||
2180 | fmt = "%-5u %-8u %-23s %-23s %-23s\n"; | ||
2181 | sprintf(buf2, "%lu/%d/%u@%u", hs->result.fe_group, | ||
2182 | hs->result.fe_start, hs->result.fe_len, | ||
2183 | hs->result.fe_logical); | ||
2184 | sprintf(buf, "%lu/%d/%u@%u", hs->orig.fe_group, | ||
2185 | hs->orig.fe_start, hs->orig.fe_len, | ||
2186 | hs->orig.fe_logical); | ||
2187 | seq_printf(seq, fmt, hs->pid, hs->ino, buf, "", buf2); | ||
2188 | } else if (hs->op == EXT4_MB_HISTORY_DISCARD) { | ||
2189 | sprintf(buf2, "%lu/%d/%u", hs->result.fe_group, | ||
2190 | hs->result.fe_start, hs->result.fe_len); | ||
2191 | seq_printf(seq, "%-5u %-8u %-23s discard\n", | ||
2192 | hs->pid, hs->ino, buf2); | ||
2193 | } else if (hs->op == EXT4_MB_HISTORY_FREE) { | ||
2194 | sprintf(buf2, "%lu/%d/%u", hs->result.fe_group, | ||
2195 | hs->result.fe_start, hs->result.fe_len); | ||
2196 | seq_printf(seq, "%-5u %-8u %-23s free\n", | ||
2197 | hs->pid, hs->ino, buf2); | ||
2198 | } | ||
2199 | return 0; | ||
2200 | } | ||
2201 | |||
2202 | static void ext4_mb_seq_history_stop(struct seq_file *seq, void *v) | ||
2203 | { | ||
2204 | } | ||
2205 | |||
2206 | static struct seq_operations ext4_mb_seq_history_ops = { | ||
2207 | .start = ext4_mb_seq_history_start, | ||
2208 | .next = ext4_mb_seq_history_next, | ||
2209 | .stop = ext4_mb_seq_history_stop, | ||
2210 | .show = ext4_mb_seq_history_show, | ||
2211 | }; | ||
2212 | |||
2213 | static int ext4_mb_seq_history_open(struct inode *inode, struct file *file) | ||
2214 | { | ||
2215 | struct super_block *sb = PDE(inode)->data; | ||
2216 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2217 | struct ext4_mb_proc_session *s; | ||
2218 | int rc; | ||
2219 | int size; | ||
2220 | |||
2221 | s = kmalloc(sizeof(*s), GFP_KERNEL); | ||
2222 | if (s == NULL) | ||
2223 | return -ENOMEM; | ||
2224 | s->sb = sb; | ||
2225 | size = sizeof(struct ext4_mb_history) * sbi->s_mb_history_max; | ||
2226 | s->history = kmalloc(size, GFP_KERNEL); | ||
2227 | if (s->history == NULL) { | ||
2228 | kfree(s); | ||
2229 | return -ENOMEM; | ||
2230 | } | ||
2231 | |||
2232 | spin_lock(&sbi->s_mb_history_lock); | ||
2233 | memcpy(s->history, sbi->s_mb_history, size); | ||
2234 | s->max = sbi->s_mb_history_max; | ||
2235 | s->start = sbi->s_mb_history_cur % s->max; | ||
2236 | spin_unlock(&sbi->s_mb_history_lock); | ||
2237 | |||
2238 | rc = seq_open(file, &ext4_mb_seq_history_ops); | ||
2239 | if (rc == 0) { | ||
2240 | struct seq_file *m = (struct seq_file *)file->private_data; | ||
2241 | m->private = s; | ||
2242 | } else { | ||
2243 | kfree(s->history); | ||
2244 | kfree(s); | ||
2245 | } | ||
2246 | return rc; | ||
2247 | |||
2248 | } | ||
2249 | |||
2250 | static int ext4_mb_seq_history_release(struct inode *inode, struct file *file) | ||
2251 | { | ||
2252 | struct seq_file *seq = (struct seq_file *)file->private_data; | ||
2253 | struct ext4_mb_proc_session *s = seq->private; | ||
2254 | kfree(s->history); | ||
2255 | kfree(s); | ||
2256 | return seq_release(inode, file); | ||
2257 | } | ||
2258 | |||
2259 | static ssize_t ext4_mb_seq_history_write(struct file *file, | ||
2260 | const char __user *buffer, | ||
2261 | size_t count, loff_t *ppos) | ||
2262 | { | ||
2263 | struct seq_file *seq = (struct seq_file *)file->private_data; | ||
2264 | struct ext4_mb_proc_session *s = seq->private; | ||
2265 | struct super_block *sb = s->sb; | ||
2266 | char str[32]; | ||
2267 | int value; | ||
2268 | |||
2269 | if (count >= sizeof(str)) { | ||
2270 | printk(KERN_ERR "EXT4-fs: %s string too long, max %u bytes\n", | ||
2271 | "mb_history", (int)sizeof(str)); | ||
2272 | return -EOVERFLOW; | ||
2273 | } | ||
2274 | |||
2275 | if (copy_from_user(str, buffer, count)) | ||
2276 | return -EFAULT; | ||
2277 | |||
2278 | value = simple_strtol(str, NULL, 0); | ||
2279 | if (value < 0) | ||
2280 | return -ERANGE; | ||
2281 | EXT4_SB(sb)->s_mb_history_filter = value; | ||
2282 | |||
2283 | return count; | ||
2284 | } | ||
2285 | |||
2286 | static struct file_operations ext4_mb_seq_history_fops = { | ||
2287 | .owner = THIS_MODULE, | ||
2288 | .open = ext4_mb_seq_history_open, | ||
2289 | .read = seq_read, | ||
2290 | .write = ext4_mb_seq_history_write, | ||
2291 | .llseek = seq_lseek, | ||
2292 | .release = ext4_mb_seq_history_release, | ||
2293 | }; | ||
2294 | |||
2295 | static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) | ||
2296 | { | ||
2297 | struct super_block *sb = seq->private; | ||
2298 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2299 | ext4_group_t group; | ||
2300 | |||
2301 | if (*pos < 0 || *pos >= sbi->s_groups_count) | ||
2302 | return NULL; | ||
2303 | |||
2304 | group = *pos + 1; | ||
2305 | return (void *) group; | ||
2306 | } | ||
2307 | |||
2308 | static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) | ||
2309 | { | ||
2310 | struct super_block *sb = seq->private; | ||
2311 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2312 | ext4_group_t group; | ||
2313 | |||
2314 | ++*pos; | ||
2315 | if (*pos < 0 || *pos >= sbi->s_groups_count) | ||
2316 | return NULL; | ||
2317 | group = *pos + 1; | ||
2318 | return (void *) group;; | ||
2319 | } | ||
2320 | |||
2321 | static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) | ||
2322 | { | ||
2323 | struct super_block *sb = seq->private; | ||
2324 | long group = (long) v; | ||
2325 | int i; | ||
2326 | int err; | ||
2327 | struct ext4_buddy e4b; | ||
2328 | struct sg { | ||
2329 | struct ext4_group_info info; | ||
2330 | unsigned short counters[16]; | ||
2331 | } sg; | ||
2332 | |||
2333 | group--; | ||
2334 | if (group == 0) | ||
2335 | seq_printf(seq, "#%-5s: %-5s %-5s %-5s " | ||
2336 | "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s " | ||
2337 | "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n", | ||
2338 | "group", "free", "frags", "first", | ||
2339 | "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6", | ||
2340 | "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13"); | ||
2341 | |||
2342 | i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + | ||
2343 | sizeof(struct ext4_group_info); | ||
2344 | err = ext4_mb_load_buddy(sb, group, &e4b); | ||
2345 | if (err) { | ||
2346 | seq_printf(seq, "#%-5lu: I/O error\n", group); | ||
2347 | return 0; | ||
2348 | } | ||
2349 | ext4_lock_group(sb, group); | ||
2350 | memcpy(&sg, ext4_get_group_info(sb, group), i); | ||
2351 | ext4_unlock_group(sb, group); | ||
2352 | ext4_mb_release_desc(&e4b); | ||
2353 | |||
2354 | seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free, | ||
2355 | sg.info.bb_fragments, sg.info.bb_first_free); | ||
2356 | for (i = 0; i <= 13; i++) | ||
2357 | seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ? | ||
2358 | sg.info.bb_counters[i] : 0); | ||
2359 | seq_printf(seq, " ]\n"); | ||
2360 | |||
2361 | return 0; | ||
2362 | } | ||
2363 | |||
2364 | static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v) | ||
2365 | { | ||
2366 | } | ||
2367 | |||
2368 | static struct seq_operations ext4_mb_seq_groups_ops = { | ||
2369 | .start = ext4_mb_seq_groups_start, | ||
2370 | .next = ext4_mb_seq_groups_next, | ||
2371 | .stop = ext4_mb_seq_groups_stop, | ||
2372 | .show = ext4_mb_seq_groups_show, | ||
2373 | }; | ||
2374 | |||
2375 | static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file) | ||
2376 | { | ||
2377 | struct super_block *sb = PDE(inode)->data; | ||
2378 | int rc; | ||
2379 | |||
2380 | rc = seq_open(file, &ext4_mb_seq_groups_ops); | ||
2381 | if (rc == 0) { | ||
2382 | struct seq_file *m = (struct seq_file *)file->private_data; | ||
2383 | m->private = sb; | ||
2384 | } | ||
2385 | return rc; | ||
2386 | |||
2387 | } | ||
2388 | |||
2389 | static struct file_operations ext4_mb_seq_groups_fops = { | ||
2390 | .owner = THIS_MODULE, | ||
2391 | .open = ext4_mb_seq_groups_open, | ||
2392 | .read = seq_read, | ||
2393 | .llseek = seq_lseek, | ||
2394 | .release = seq_release, | ||
2395 | }; | ||
2396 | |||
2397 | static void ext4_mb_history_release(struct super_block *sb) | ||
2398 | { | ||
2399 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2400 | |||
2401 | remove_proc_entry("mb_groups", sbi->s_mb_proc); | ||
2402 | remove_proc_entry("mb_history", sbi->s_mb_proc); | ||
2403 | |||
2404 | kfree(sbi->s_mb_history); | ||
2405 | } | ||
2406 | |||
2407 | static void ext4_mb_history_init(struct super_block *sb) | ||
2408 | { | ||
2409 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2410 | int i; | ||
2411 | |||
2412 | if (sbi->s_mb_proc != NULL) { | ||
2413 | struct proc_dir_entry *p; | ||
2414 | p = create_proc_entry("mb_history", S_IRUGO, sbi->s_mb_proc); | ||
2415 | if (p) { | ||
2416 | p->proc_fops = &ext4_mb_seq_history_fops; | ||
2417 | p->data = sb; | ||
2418 | } | ||
2419 | p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc); | ||
2420 | if (p) { | ||
2421 | p->proc_fops = &ext4_mb_seq_groups_fops; | ||
2422 | p->data = sb; | ||
2423 | } | ||
2424 | } | ||
2425 | |||
2426 | sbi->s_mb_history_max = 1000; | ||
2427 | sbi->s_mb_history_cur = 0; | ||
2428 | spin_lock_init(&sbi->s_mb_history_lock); | ||
2429 | i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); | ||
2430 | sbi->s_mb_history = kmalloc(i, GFP_KERNEL); | ||
2431 | if (likely(sbi->s_mb_history != NULL)) | ||
2432 | memset(sbi->s_mb_history, 0, i); | ||
2433 | /* if we can't allocate history, then we simple won't use it */ | ||
2434 | } | ||
2435 | |||
2436 | static void ext4_mb_store_history(struct ext4_allocation_context *ac) | ||
2437 | { | ||
2438 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | ||
2439 | struct ext4_mb_history h; | ||
2440 | |||
2441 | if (unlikely(sbi->s_mb_history == NULL)) | ||
2442 | return; | ||
2443 | |||
2444 | if (!(ac->ac_op & sbi->s_mb_history_filter)) | ||
2445 | return; | ||
2446 | |||
2447 | h.op = ac->ac_op; | ||
2448 | h.pid = current->pid; | ||
2449 | h.ino = ac->ac_inode ? ac->ac_inode->i_ino : 0; | ||
2450 | h.orig = ac->ac_o_ex; | ||
2451 | h.result = ac->ac_b_ex; | ||
2452 | h.flags = ac->ac_flags; | ||
2453 | h.found = ac->ac_found; | ||
2454 | h.groups = ac->ac_groups_scanned; | ||
2455 | h.cr = ac->ac_criteria; | ||
2456 | h.tail = ac->ac_tail; | ||
2457 | h.buddy = ac->ac_buddy; | ||
2458 | h.merged = 0; | ||
2459 | if (ac->ac_op == EXT4_MB_HISTORY_ALLOC) { | ||
2460 | if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && | ||
2461 | ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) | ||
2462 | h.merged = 1; | ||
2463 | h.goal = ac->ac_g_ex; | ||
2464 | h.result = ac->ac_f_ex; | ||
2465 | } | ||
2466 | |||
2467 | spin_lock(&sbi->s_mb_history_lock); | ||
2468 | memcpy(sbi->s_mb_history + sbi->s_mb_history_cur, &h, sizeof(h)); | ||
2469 | if (++sbi->s_mb_history_cur >= sbi->s_mb_history_max) | ||
2470 | sbi->s_mb_history_cur = 0; | ||
2471 | spin_unlock(&sbi->s_mb_history_lock); | ||
2472 | } | ||
2473 | |||
2474 | #else | ||
2475 | #define ext4_mb_history_release(sb) | ||
2476 | #define ext4_mb_history_init(sb) | ||
2477 | #endif | ||
2478 | |||
2479 | static int ext4_mb_init_backend(struct super_block *sb) | ||
2480 | { | ||
2481 | ext4_group_t i; | ||
2482 | int j, len, metalen; | ||
2483 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2484 | int num_meta_group_infos = | ||
2485 | (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) >> | ||
2486 | EXT4_DESC_PER_BLOCK_BITS(sb); | ||
2487 | struct ext4_group_info **meta_group_info; | ||
2488 | |||
2489 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte | ||
2490 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. | ||
2491 | * So a two level scheme suffices for now. */ | ||
2492 | sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) * | ||
2493 | num_meta_group_infos, GFP_KERNEL); | ||
2494 | if (sbi->s_group_info == NULL) { | ||
2495 | printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); | ||
2496 | return -ENOMEM; | ||
2497 | } | ||
2498 | sbi->s_buddy_cache = new_inode(sb); | ||
2499 | if (sbi->s_buddy_cache == NULL) { | ||
2500 | printk(KERN_ERR "EXT4-fs: can't get new inode\n"); | ||
2501 | goto err_freesgi; | ||
2502 | } | ||
2503 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; | ||
2504 | |||
2505 | metalen = sizeof(*meta_group_info) << EXT4_DESC_PER_BLOCK_BITS(sb); | ||
2506 | for (i = 0; i < num_meta_group_infos; i++) { | ||
2507 | if ((i + 1) == num_meta_group_infos) | ||
2508 | metalen = sizeof(*meta_group_info) * | ||
2509 | (sbi->s_groups_count - | ||
2510 | (i << EXT4_DESC_PER_BLOCK_BITS(sb))); | ||
2511 | meta_group_info = kmalloc(metalen, GFP_KERNEL); | ||
2512 | if (meta_group_info == NULL) { | ||
2513 | printk(KERN_ERR "EXT4-fs: can't allocate mem for a " | ||
2514 | "buddy group\n"); | ||
2515 | goto err_freemeta; | ||
2516 | } | ||
2517 | sbi->s_group_info[i] = meta_group_info; | ||
2518 | } | ||
2519 | |||
2520 | /* | ||
2521 | * calculate needed size. if change bb_counters size, | ||
2522 | * don't forget about ext4_mb_generate_buddy() | ||
2523 | */ | ||
2524 | len = sizeof(struct ext4_group_info); | ||
2525 | len += sizeof(unsigned short) * (sb->s_blocksize_bits + 2); | ||
2526 | for (i = 0; i < sbi->s_groups_count; i++) { | ||
2527 | struct ext4_group_desc *desc; | ||
2528 | |||
2529 | meta_group_info = | ||
2530 | sbi->s_group_info[i >> EXT4_DESC_PER_BLOCK_BITS(sb)]; | ||
2531 | j = i & (EXT4_DESC_PER_BLOCK(sb) - 1); | ||
2532 | |||
2533 | meta_group_info[j] = kzalloc(len, GFP_KERNEL); | ||
2534 | if (meta_group_info[j] == NULL) { | ||
2535 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); | ||
2536 | i--; | ||
2537 | goto err_freebuddy; | ||
2538 | } | ||
2539 | desc = ext4_get_group_desc(sb, i, NULL); | ||
2540 | if (desc == NULL) { | ||
2541 | printk(KERN_ERR | ||
2542 | "EXT4-fs: can't read descriptor %lu\n", i); | ||
2543 | goto err_freebuddy; | ||
2544 | } | ||
2545 | memset(meta_group_info[j], 0, len); | ||
2546 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, | ||
2547 | &(meta_group_info[j]->bb_state)); | ||
2548 | |||
2549 | /* | ||
2550 | * initialize bb_free to be able to skip | ||
2551 | * empty groups without initialization | ||
2552 | */ | ||
2553 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | ||
2554 | meta_group_info[j]->bb_free = | ||
2555 | ext4_free_blocks_after_init(sb, i, desc); | ||
2556 | } else { | ||
2557 | meta_group_info[j]->bb_free = | ||
2558 | le16_to_cpu(desc->bg_free_blocks_count); | ||
2559 | } | ||
2560 | |||
2561 | INIT_LIST_HEAD(&meta_group_info[j]->bb_prealloc_list); | ||
2562 | |||
2563 | #ifdef DOUBLE_CHECK | ||
2564 | { | ||
2565 | struct buffer_head *bh; | ||
2566 | meta_group_info[j]->bb_bitmap = | ||
2567 | kmalloc(sb->s_blocksize, GFP_KERNEL); | ||
2568 | BUG_ON(meta_group_info[j]->bb_bitmap == NULL); | ||
2569 | bh = read_block_bitmap(sb, i); | ||
2570 | BUG_ON(bh == NULL); | ||
2571 | memcpy(meta_group_info[j]->bb_bitmap, bh->b_data, | ||
2572 | sb->s_blocksize); | ||
2573 | put_bh(bh); | ||
2574 | } | ||
2575 | #endif | ||
2576 | |||
2577 | } | ||
2578 | |||
2579 | return 0; | ||
2580 | |||
2581 | err_freebuddy: | ||
2582 | while (i >= 0) { | ||
2583 | kfree(ext4_get_group_info(sb, i)); | ||
2584 | i--; | ||
2585 | } | ||
2586 | i = num_meta_group_infos; | ||
2587 | err_freemeta: | ||
2588 | while (--i >= 0) | ||
2589 | kfree(sbi->s_group_info[i]); | ||
2590 | iput(sbi->s_buddy_cache); | ||
2591 | err_freesgi: | ||
2592 | kfree(sbi->s_group_info); | ||
2593 | return -ENOMEM; | ||
2594 | } | ||
2595 | |||
2596 | int ext4_mb_init(struct super_block *sb, int needs_recovery) | ||
2597 | { | ||
2598 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2599 | unsigned i; | ||
2600 | unsigned offset; | ||
2601 | unsigned max; | ||
2602 | |||
2603 | if (!test_opt(sb, MBALLOC)) | ||
2604 | return 0; | ||
2605 | |||
2606 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); | ||
2607 | |||
2608 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); | ||
2609 | if (sbi->s_mb_offsets == NULL) { | ||
2610 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2611 | return -ENOMEM; | ||
2612 | } | ||
2613 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | ||
2614 | if (sbi->s_mb_maxs == NULL) { | ||
2615 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2616 | kfree(sbi->s_mb_maxs); | ||
2617 | return -ENOMEM; | ||
2618 | } | ||
2619 | |||
2620 | /* order 0 is regular bitmap */ | ||
2621 | sbi->s_mb_maxs[0] = sb->s_blocksize << 3; | ||
2622 | sbi->s_mb_offsets[0] = 0; | ||
2623 | |||
2624 | i = 1; | ||
2625 | offset = 0; | ||
2626 | max = sb->s_blocksize << 2; | ||
2627 | do { | ||
2628 | sbi->s_mb_offsets[i] = offset; | ||
2629 | sbi->s_mb_maxs[i] = max; | ||
2630 | offset += 1 << (sb->s_blocksize_bits - i); | ||
2631 | max = max >> 1; | ||
2632 | i++; | ||
2633 | } while (i <= sb->s_blocksize_bits + 1); | ||
2634 | |||
2635 | /* init file for buddy data */ | ||
2636 | i = ext4_mb_init_backend(sb); | ||
2637 | if (i) { | ||
2638 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2639 | kfree(sbi->s_mb_offsets); | ||
2640 | kfree(sbi->s_mb_maxs); | ||
2641 | return i; | ||
2642 | } | ||
2643 | |||
2644 | spin_lock_init(&sbi->s_md_lock); | ||
2645 | INIT_LIST_HEAD(&sbi->s_active_transaction); | ||
2646 | INIT_LIST_HEAD(&sbi->s_closed_transaction); | ||
2647 | INIT_LIST_HEAD(&sbi->s_committed_transaction); | ||
2648 | spin_lock_init(&sbi->s_bal_lock); | ||
2649 | |||
2650 | sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; | ||
2651 | sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN; | ||
2652 | sbi->s_mb_stats = MB_DEFAULT_STATS; | ||
2653 | sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; | ||
2654 | sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; | ||
2655 | sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; | ||
2656 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; | ||
2657 | |||
2658 | i = sizeof(struct ext4_locality_group) * NR_CPUS; | ||
2659 | sbi->s_locality_groups = kmalloc(i, GFP_KERNEL); | ||
2660 | if (sbi->s_locality_groups == NULL) { | ||
2661 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2662 | kfree(sbi->s_mb_offsets); | ||
2663 | kfree(sbi->s_mb_maxs); | ||
2664 | return -ENOMEM; | ||
2665 | } | ||
2666 | for (i = 0; i < NR_CPUS; i++) { | ||
2667 | struct ext4_locality_group *lg; | ||
2668 | lg = &sbi->s_locality_groups[i]; | ||
2669 | mutex_init(&lg->lg_mutex); | ||
2670 | INIT_LIST_HEAD(&lg->lg_prealloc_list); | ||
2671 | spin_lock_init(&lg->lg_prealloc_lock); | ||
2672 | } | ||
2673 | |||
2674 | ext4_mb_init_per_dev_proc(sb); | ||
2675 | ext4_mb_history_init(sb); | ||
2676 | |||
2677 | printk("EXT4-fs: mballoc enabled\n"); | ||
2678 | return 0; | ||
2679 | } | ||
2680 | |||
2681 | /* need to called with ext4 group lock (ext4_lock_group) */ | ||
2682 | static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) | ||
2683 | { | ||
2684 | struct ext4_prealloc_space *pa; | ||
2685 | struct list_head *cur, *tmp; | ||
2686 | int count = 0; | ||
2687 | |||
2688 | list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) { | ||
2689 | pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); | ||
2690 | list_del(&pa->pa_group_list); | ||
2691 | count++; | ||
2692 | kfree(pa); | ||
2693 | } | ||
2694 | if (count) | ||
2695 | mb_debug("mballoc: %u PAs left\n", count); | ||
2696 | |||
2697 | } | ||
2698 | |||
2699 | int ext4_mb_release(struct super_block *sb) | ||
2700 | { | ||
2701 | ext4_group_t i; | ||
2702 | int num_meta_group_infos; | ||
2703 | struct ext4_group_info *grinfo; | ||
2704 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2705 | |||
2706 | if (!test_opt(sb, MBALLOC)) | ||
2707 | return 0; | ||
2708 | |||
2709 | /* release freed, non-committed blocks */ | ||
2710 | spin_lock(&sbi->s_md_lock); | ||
2711 | list_splice_init(&sbi->s_closed_transaction, | ||
2712 | &sbi->s_committed_transaction); | ||
2713 | list_splice_init(&sbi->s_active_transaction, | ||
2714 | &sbi->s_committed_transaction); | ||
2715 | spin_unlock(&sbi->s_md_lock); | ||
2716 | ext4_mb_free_committed_blocks(sb); | ||
2717 | |||
2718 | if (sbi->s_group_info) { | ||
2719 | for (i = 0; i < sbi->s_groups_count; i++) { | ||
2720 | grinfo = ext4_get_group_info(sb, i); | ||
2721 | #ifdef DOUBLE_CHECK | ||
2722 | kfree(grinfo->bb_bitmap); | ||
2723 | #endif | ||
2724 | ext4_lock_group(sb, i); | ||
2725 | ext4_mb_cleanup_pa(grinfo); | ||
2726 | ext4_unlock_group(sb, i); | ||
2727 | kfree(grinfo); | ||
2728 | } | ||
2729 | num_meta_group_infos = (sbi->s_groups_count + | ||
2730 | EXT4_DESC_PER_BLOCK(sb) - 1) >> | ||
2731 | EXT4_DESC_PER_BLOCK_BITS(sb); | ||
2732 | for (i = 0; i < num_meta_group_infos; i++) | ||
2733 | kfree(sbi->s_group_info[i]); | ||
2734 | kfree(sbi->s_group_info); | ||
2735 | } | ||
2736 | kfree(sbi->s_mb_offsets); | ||
2737 | kfree(sbi->s_mb_maxs); | ||
2738 | if (sbi->s_buddy_cache) | ||
2739 | iput(sbi->s_buddy_cache); | ||
2740 | if (sbi->s_mb_stats) { | ||
2741 | printk(KERN_INFO | ||
2742 | "EXT4-fs: mballoc: %u blocks %u reqs (%u success)\n", | ||
2743 | atomic_read(&sbi->s_bal_allocated), | ||
2744 | atomic_read(&sbi->s_bal_reqs), | ||
2745 | atomic_read(&sbi->s_bal_success)); | ||
2746 | printk(KERN_INFO | ||
2747 | "EXT4-fs: mballoc: %u extents scanned, %u goal hits, " | ||
2748 | "%u 2^N hits, %u breaks, %u lost\n", | ||
2749 | atomic_read(&sbi->s_bal_ex_scanned), | ||
2750 | atomic_read(&sbi->s_bal_goals), | ||
2751 | atomic_read(&sbi->s_bal_2orders), | ||
2752 | atomic_read(&sbi->s_bal_breaks), | ||
2753 | atomic_read(&sbi->s_mb_lost_chunks)); | ||
2754 | printk(KERN_INFO | ||
2755 | "EXT4-fs: mballoc: %lu generated and it took %Lu\n", | ||
2756 | sbi->s_mb_buddies_generated++, | ||
2757 | sbi->s_mb_generation_time); | ||
2758 | printk(KERN_INFO | ||
2759 | "EXT4-fs: mballoc: %u preallocated, %u discarded\n", | ||
2760 | atomic_read(&sbi->s_mb_preallocated), | ||
2761 | atomic_read(&sbi->s_mb_discarded)); | ||
2762 | } | ||
2763 | |||
2764 | kfree(sbi->s_locality_groups); | ||
2765 | |||
2766 | ext4_mb_history_release(sb); | ||
2767 | ext4_mb_destroy_per_dev_proc(sb); | ||
2768 | |||
2769 | return 0; | ||
2770 | } | ||
2771 | |||
2772 | static void ext4_mb_free_committed_blocks(struct super_block *sb) | ||
2773 | { | ||
2774 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2775 | int err; | ||
2776 | int i; | ||
2777 | int count = 0; | ||
2778 | int count2 = 0; | ||
2779 | struct ext4_free_metadata *md; | ||
2780 | struct ext4_buddy e4b; | ||
2781 | |||
2782 | if (list_empty(&sbi->s_committed_transaction)) | ||
2783 | return; | ||
2784 | |||
2785 | /* there is committed blocks to be freed yet */ | ||
2786 | do { | ||
2787 | /* get next array of blocks */ | ||
2788 | md = NULL; | ||
2789 | spin_lock(&sbi->s_md_lock); | ||
2790 | if (!list_empty(&sbi->s_committed_transaction)) { | ||
2791 | md = list_entry(sbi->s_committed_transaction.next, | ||
2792 | struct ext4_free_metadata, list); | ||
2793 | list_del(&md->list); | ||
2794 | } | ||
2795 | spin_unlock(&sbi->s_md_lock); | ||
2796 | |||
2797 | if (md == NULL) | ||
2798 | break; | ||
2799 | |||
2800 | mb_debug("gonna free %u blocks in group %lu (0x%p):", | ||
2801 | md->num, md->group, md); | ||
2802 | |||
2803 | err = ext4_mb_load_buddy(sb, md->group, &e4b); | ||
2804 | /* we expect to find existing buddy because it's pinned */ | ||
2805 | BUG_ON(err != 0); | ||
2806 | |||
2807 | /* there are blocks to put in buddy to make them really free */ | ||
2808 | count += md->num; | ||
2809 | count2++; | ||
2810 | ext4_lock_group(sb, md->group); | ||
2811 | for (i = 0; i < md->num; i++) { | ||
2812 | mb_debug(" %u", md->blocks[i]); | ||
2813 | err = mb_free_blocks(NULL, &e4b, md->blocks[i], 1); | ||
2814 | BUG_ON(err != 0); | ||
2815 | } | ||
2816 | mb_debug("\n"); | ||
2817 | ext4_unlock_group(sb, md->group); | ||
2818 | |||
2819 | /* balance refcounts from ext4_mb_free_metadata() */ | ||
2820 | page_cache_release(e4b.bd_buddy_page); | ||
2821 | page_cache_release(e4b.bd_bitmap_page); | ||
2822 | |||
2823 | kfree(md); | ||
2824 | ext4_mb_release_desc(&e4b); | ||
2825 | |||
2826 | } while (md); | ||
2827 | |||
2828 | mb_debug("freed %u blocks in %u structures\n", count, count2); | ||
2829 | } | ||
2830 | |||
2831 | #define EXT4_ROOT "ext4" | ||
2832 | #define EXT4_MB_STATS_NAME "stats" | ||
2833 | #define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan" | ||
2834 | #define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan" | ||
2835 | #define EXT4_MB_ORDER2_REQ "order2_req" | ||
2836 | #define EXT4_MB_STREAM_REQ "stream_req" | ||
2837 | #define EXT4_MB_GROUP_PREALLOC "group_prealloc" | ||
2838 | |||
2839 | |||
2840 | |||
2841 | #define MB_PROC_VALUE_READ(name) \ | ||
2842 | static int ext4_mb_read_##name(char *page, char **start, \ | ||
2843 | off_t off, int count, int *eof, void *data) \ | ||
2844 | { \ | ||
2845 | struct ext4_sb_info *sbi = data; \ | ||
2846 | int len; \ | ||
2847 | *eof = 1; \ | ||
2848 | if (off != 0) \ | ||
2849 | return 0; \ | ||
2850 | len = sprintf(page, "%ld\n", sbi->s_mb_##name); \ | ||
2851 | *start = page; \ | ||
2852 | return len; \ | ||
2853 | } | ||
2854 | |||
2855 | #define MB_PROC_VALUE_WRITE(name) \ | ||
2856 | static int ext4_mb_write_##name(struct file *file, \ | ||
2857 | const char __user *buf, unsigned long cnt, void *data) \ | ||
2858 | { \ | ||
2859 | struct ext4_sb_info *sbi = data; \ | ||
2860 | char str[32]; \ | ||
2861 | long value; \ | ||
2862 | if (cnt >= sizeof(str)) \ | ||
2863 | return -EINVAL; \ | ||
2864 | if (copy_from_user(str, buf, cnt)) \ | ||
2865 | return -EFAULT; \ | ||
2866 | value = simple_strtol(str, NULL, 0); \ | ||
2867 | if (value <= 0) \ | ||
2868 | return -ERANGE; \ | ||
2869 | sbi->s_mb_##name = value; \ | ||
2870 | return cnt; \ | ||
2871 | } | ||
2872 | |||
2873 | MB_PROC_VALUE_READ(stats); | ||
2874 | MB_PROC_VALUE_WRITE(stats); | ||
2875 | MB_PROC_VALUE_READ(max_to_scan); | ||
2876 | MB_PROC_VALUE_WRITE(max_to_scan); | ||
2877 | MB_PROC_VALUE_READ(min_to_scan); | ||
2878 | MB_PROC_VALUE_WRITE(min_to_scan); | ||
2879 | MB_PROC_VALUE_READ(order2_reqs); | ||
2880 | MB_PROC_VALUE_WRITE(order2_reqs); | ||
2881 | MB_PROC_VALUE_READ(stream_request); | ||
2882 | MB_PROC_VALUE_WRITE(stream_request); | ||
2883 | MB_PROC_VALUE_READ(group_prealloc); | ||
2884 | MB_PROC_VALUE_WRITE(group_prealloc); | ||
2885 | |||
2886 | #define MB_PROC_HANDLER(name, var) \ | ||
2887 | do { \ | ||
2888 | proc = create_proc_entry(name, mode, sbi->s_mb_proc); \ | ||
2889 | if (proc == NULL) { \ | ||
2890 | printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \ | ||
2891 | goto err_out; \ | ||
2892 | } \ | ||
2893 | proc->data = sbi; \ | ||
2894 | proc->read_proc = ext4_mb_read_##var ; \ | ||
2895 | proc->write_proc = ext4_mb_write_##var; \ | ||
2896 | } while (0) | ||
2897 | |||
2898 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) | ||
2899 | { | ||
2900 | mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; | ||
2901 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2902 | struct proc_dir_entry *proc; | ||
2903 | char devname[64]; | ||
2904 | |||
2905 | snprintf(devname, sizeof(devname) - 1, "%s", | ||
2906 | bdevname(sb->s_bdev, devname)); | ||
2907 | sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); | ||
2908 | |||
2909 | MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats); | ||
2910 | MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan); | ||
2911 | MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan); | ||
2912 | MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs); | ||
2913 | MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request); | ||
2914 | MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc); | ||
2915 | |||
2916 | return 0; | ||
2917 | |||
2918 | err_out: | ||
2919 | printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname); | ||
2920 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); | ||
2921 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); | ||
2922 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); | ||
2923 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); | ||
2924 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); | ||
2925 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); | ||
2926 | remove_proc_entry(devname, proc_root_ext4); | ||
2927 | sbi->s_mb_proc = NULL; | ||
2928 | |||
2929 | return -ENOMEM; | ||
2930 | } | ||
2931 | |||
2932 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) | ||
2933 | { | ||
2934 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2935 | char devname[64]; | ||
2936 | |||
2937 | if (sbi->s_mb_proc == NULL) | ||
2938 | return -EINVAL; | ||
2939 | |||
2940 | snprintf(devname, sizeof(devname) - 1, "%s", | ||
2941 | bdevname(sb->s_bdev, devname)); | ||
2942 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); | ||
2943 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); | ||
2944 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); | ||
2945 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); | ||
2946 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); | ||
2947 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); | ||
2948 | remove_proc_entry(devname, proc_root_ext4); | ||
2949 | |||
2950 | return 0; | ||
2951 | } | ||
2952 | |||
2953 | int __init init_ext4_mballoc(void) | ||
2954 | { | ||
2955 | ext4_pspace_cachep = | ||
2956 | kmem_cache_create("ext4_prealloc_space", | ||
2957 | sizeof(struct ext4_prealloc_space), | ||
2958 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2959 | if (ext4_pspace_cachep == NULL) | ||
2960 | return -ENOMEM; | ||
2961 | |||
2962 | #ifdef CONFIG_PROC_FS | ||
2963 | proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs); | ||
2964 | if (proc_root_ext4 == NULL) | ||
2965 | printk(KERN_ERR "EXT4-fs: Unable to create %s\n", EXT4_ROOT); | ||
2966 | #endif | ||
2967 | |||
2968 | return 0; | ||
2969 | } | ||
2970 | |||
2971 | void exit_ext4_mballoc(void) | ||
2972 | { | ||
2973 | /* XXX: synchronize_rcu(); */ | ||
2974 | kmem_cache_destroy(ext4_pspace_cachep); | ||
2975 | #ifdef CONFIG_PROC_FS | ||
2976 | remove_proc_entry(EXT4_ROOT, proc_root_fs); | ||
2977 | #endif | ||
2978 | } | ||
2979 | |||
2980 | |||
2981 | /* | ||
2982 | * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps | ||
2983 | * Returns 0 if success or error code | ||
2984 | */ | ||
2985 | static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | ||
2986 | handle_t *handle) | ||
2987 | { | ||
2988 | struct buffer_head *bitmap_bh = NULL; | ||
2989 | struct ext4_super_block *es; | ||
2990 | struct ext4_group_desc *gdp; | ||
2991 | struct buffer_head *gdp_bh; | ||
2992 | struct ext4_sb_info *sbi; | ||
2993 | struct super_block *sb; | ||
2994 | ext4_fsblk_t block; | ||
2995 | int err; | ||
2996 | |||
2997 | BUG_ON(ac->ac_status != AC_STATUS_FOUND); | ||
2998 | BUG_ON(ac->ac_b_ex.fe_len <= 0); | ||
2999 | |||
3000 | sb = ac->ac_sb; | ||
3001 | sbi = EXT4_SB(sb); | ||
3002 | es = sbi->s_es; | ||
3003 | |||
3004 | ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group, | ||
3005 | gdp->bg_free_blocks_count); | ||
3006 | |||
3007 | err = -EIO; | ||
3008 | bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group); | ||
3009 | if (!bitmap_bh) | ||
3010 | goto out_err; | ||
3011 | |||
3012 | err = ext4_journal_get_write_access(handle, bitmap_bh); | ||
3013 | if (err) | ||
3014 | goto out_err; | ||
3015 | |||
3016 | err = -EIO; | ||
3017 | gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh); | ||
3018 | if (!gdp) | ||
3019 | goto out_err; | ||
3020 | |||
3021 | err = ext4_journal_get_write_access(handle, gdp_bh); | ||
3022 | if (err) | ||
3023 | goto out_err; | ||
3024 | |||
3025 | block = ac->ac_b_ex.fe_group * EXT4_BLOCKS_PER_GROUP(sb) | ||
3026 | + ac->ac_b_ex.fe_start | ||
3027 | + le32_to_cpu(es->s_first_data_block); | ||
3028 | |||
3029 | if (block == ext4_block_bitmap(sb, gdp) || | ||
3030 | block == ext4_inode_bitmap(sb, gdp) || | ||
3031 | in_range(block, ext4_inode_table(sb, gdp), | ||
3032 | EXT4_SB(sb)->s_itb_per_group)) { | ||
3033 | |||
3034 | ext4_error(sb, __FUNCTION__, | ||
3035 | "Allocating block in system zone - block = %llu", | ||
3036 | block); | ||
3037 | } | ||
3038 | #ifdef AGGRESSIVE_CHECK | ||
3039 | { | ||
3040 | int i; | ||
3041 | for (i = 0; i < ac->ac_b_ex.fe_len; i++) { | ||
3042 | BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i, | ||
3043 | bitmap_bh->b_data)); | ||
3044 | } | ||
3045 | } | ||
3046 | #endif | ||
3047 | mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), bitmap_bh->b_data, | ||
3048 | ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); | ||
3049 | |||
3050 | spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); | ||
3051 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | ||
3052 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | ||
3053 | gdp->bg_free_blocks_count = | ||
3054 | cpu_to_le16(ext4_free_blocks_after_init(sb, | ||
3055 | ac->ac_b_ex.fe_group, | ||
3056 | gdp)); | ||
3057 | } | ||
3058 | gdp->bg_free_blocks_count = | ||
3059 | cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) | ||
3060 | - ac->ac_b_ex.fe_len); | ||
3061 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); | ||
3062 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); | ||
3063 | percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); | ||
3064 | |||
3065 | err = ext4_journal_dirty_metadata(handle, bitmap_bh); | ||
3066 | if (err) | ||
3067 | goto out_err; | ||
3068 | err = ext4_journal_dirty_metadata(handle, gdp_bh); | ||
3069 | |||
3070 | out_err: | ||
3071 | sb->s_dirt = 1; | ||
3072 | put_bh(bitmap_bh); | ||
3073 | return err; | ||
3074 | } | ||
3075 | |||
3076 | /* | ||
3077 | * here we normalize request for locality group | ||
3078 | * Group request are normalized to s_strip size if we set the same via mount | ||
3079 | * option. If not we set it to s_mb_group_prealloc which can be configured via | ||
3080 | * /proc/fs/ext4/<partition>/group_prealloc | ||
3081 | * | ||
3082 | * XXX: should we try to preallocate more than the group has now? | ||
3083 | */ | ||
3084 | static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac) | ||
3085 | { | ||
3086 | struct super_block *sb = ac->ac_sb; | ||
3087 | struct ext4_locality_group *lg = ac->ac_lg; | ||
3088 | |||
3089 | BUG_ON(lg == NULL); | ||
3090 | if (EXT4_SB(sb)->s_stripe) | ||
3091 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; | ||
3092 | else | ||
3093 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; | ||
3094 | mb_debug("#%u: goal %lu blocks for locality group\n", | ||
3095 | current->pid, ac->ac_g_ex.fe_len); | ||
3096 | } | ||
3097 | |||
3098 | /* | ||
3099 | * Normalization means making request better in terms of | ||
3100 | * size and alignment | ||
3101 | */ | ||
3102 | static void ext4_mb_normalize_request(struct ext4_allocation_context *ac, | ||
3103 | struct ext4_allocation_request *ar) | ||
3104 | { | ||
3105 | int bsbits, max; | ||
3106 | ext4_lblk_t end; | ||
3107 | struct list_head *cur; | ||
3108 | loff_t size, orig_size, start_off; | ||
3109 | ext4_lblk_t start, orig_start; | ||
3110 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); | ||
3111 | |||
3112 | /* do normalize only data requests, metadata requests | ||
3113 | do not need preallocation */ | ||
3114 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) | ||
3115 | return; | ||
3116 | |||
3117 | /* sometime caller may want exact blocks */ | ||
3118 | if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) | ||
3119 | return; | ||
3120 | |||
3121 | /* caller may indicate that preallocation isn't | ||
3122 | * required (it's a tail, for example) */ | ||
3123 | if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC) | ||
3124 | return; | ||
3125 | |||
3126 | if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) { | ||
3127 | ext4_mb_normalize_group_request(ac); | ||
3128 | return ; | ||
3129 | } | ||
3130 | |||
3131 | bsbits = ac->ac_sb->s_blocksize_bits; | ||
3132 | |||
3133 | /* first, let's learn actual file size | ||
3134 | * given current request is allocated */ | ||
3135 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; | ||
3136 | size = size << bsbits; | ||
3137 | if (size < i_size_read(ac->ac_inode)) | ||
3138 | size = i_size_read(ac->ac_inode); | ||
3139 | |||
3140 | /* max available blocks in a free group */ | ||
3141 | max = EXT4_BLOCKS_PER_GROUP(ac->ac_sb) - 1 - 1 - | ||
3142 | EXT4_SB(ac->ac_sb)->s_itb_per_group; | ||
3143 | |||
3144 | #define NRL_CHECK_SIZE(req, size, max,bits) \ | ||
3145 | (req <= (size) || max <= ((size) >> bits)) | ||
3146 | |||
3147 | /* first, try to predict filesize */ | ||
3148 | /* XXX: should this table be tunable? */ | ||
3149 | start_off = 0; | ||
3150 | if (size <= 16 * 1024) { | ||
3151 | size = 16 * 1024; | ||
3152 | } else if (size <= 32 * 1024) { | ||
3153 | size = 32 * 1024; | ||
3154 | } else if (size <= 64 * 1024) { | ||
3155 | size = 64 * 1024; | ||
3156 | } else if (size <= 128 * 1024) { | ||
3157 | size = 128 * 1024; | ||
3158 | } else if (size <= 256 * 1024) { | ||
3159 | size = 256 * 1024; | ||
3160 | } else if (size <= 512 * 1024) { | ||
3161 | size = 512 * 1024; | ||
3162 | } else if (size <= 1024 * 1024) { | ||
3163 | size = 1024 * 1024; | ||
3164 | } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, bsbits)) { | ||
3165 | start_off = ((loff_t)ac->ac_o_ex.fe_logical >> | ||
3166 | (20 - bsbits)) << 20; | ||
3167 | size = 1024 * 1024; | ||
3168 | } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, bsbits)) { | ||
3169 | start_off = ((loff_t)ac->ac_o_ex.fe_logical >> | ||
3170 | (22 - bsbits)) << 22; | ||
3171 | size = 4 * 1024 * 1024; | ||
3172 | } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len, | ||
3173 | (8<<20)>>bsbits, max, bsbits)) { | ||
3174 | start_off = ((loff_t)ac->ac_o_ex.fe_logical >> | ||
3175 | (23 - bsbits)) << 23; | ||
3176 | size = 8 * 1024 * 1024; | ||
3177 | } else { | ||
3178 | start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits; | ||
3179 | size = ac->ac_o_ex.fe_len << bsbits; | ||
3180 | } | ||
3181 | orig_size = size = size >> bsbits; | ||
3182 | orig_start = start = start_off >> bsbits; | ||
3183 | |||
3184 | /* don't cover already allocated blocks in selected range */ | ||
3185 | if (ar->pleft && start <= ar->lleft) { | ||
3186 | size -= ar->lleft + 1 - start; | ||
3187 | start = ar->lleft + 1; | ||
3188 | } | ||
3189 | if (ar->pright && start + size - 1 >= ar->lright) | ||
3190 | size -= start + size - ar->lright; | ||
3191 | |||
3192 | end = start + size; | ||
3193 | |||
3194 | /* check we don't cross already preallocated blocks */ | ||
3195 | rcu_read_lock(); | ||
3196 | list_for_each_rcu(cur, &ei->i_prealloc_list) { | ||
3197 | struct ext4_prealloc_space *pa; | ||
3198 | unsigned long pa_end; | ||
3199 | |||
3200 | pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list); | ||
3201 | |||
3202 | if (pa->pa_deleted) | ||
3203 | continue; | ||
3204 | spin_lock(&pa->pa_lock); | ||
3205 | if (pa->pa_deleted) { | ||
3206 | spin_unlock(&pa->pa_lock); | ||
3207 | continue; | ||
3208 | } | ||
3209 | |||
3210 | pa_end = pa->pa_lstart + pa->pa_len; | ||
3211 | |||
3212 | /* PA must not overlap original request */ | ||
3213 | BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || | ||
3214 | ac->ac_o_ex.fe_logical < pa->pa_lstart)); | ||
3215 | |||
3216 | /* skip PA normalized request doesn't overlap with */ | ||
3217 | if (pa->pa_lstart >= end) { | ||
3218 | spin_unlock(&pa->pa_lock); | ||
3219 | continue; | ||
3220 | } | ||
3221 | if (pa_end <= start) { | ||
3222 | spin_unlock(&pa->pa_lock); | ||
3223 | continue; | ||
3224 | } | ||
3225 | BUG_ON(pa->pa_lstart <= start && pa_end >= end); | ||
3226 | |||
3227 | if (pa_end <= ac->ac_o_ex.fe_logical) { | ||
3228 | BUG_ON(pa_end < start); | ||
3229 | start = pa_end; | ||
3230 | } | ||
3231 | |||
3232 | if (pa->pa_lstart > ac->ac_o_ex.fe_logical) { | ||
3233 | BUG_ON(pa->pa_lstart > end); | ||
3234 | end = pa->pa_lstart; | ||
3235 | } | ||
3236 | spin_unlock(&pa->pa_lock); | ||
3237 | } | ||
3238 | rcu_read_unlock(); | ||
3239 | size = end - start; | ||
3240 | |||
3241 | /* XXX: extra loop to check we really don't overlap preallocations */ | ||
3242 | rcu_read_lock(); | ||
3243 | list_for_each_rcu(cur, &ei->i_prealloc_list) { | ||
3244 | struct ext4_prealloc_space *pa; | ||
3245 | unsigned long pa_end; | ||
3246 | pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list); | ||
3247 | spin_lock(&pa->pa_lock); | ||
3248 | if (pa->pa_deleted == 0) { | ||
3249 | pa_end = pa->pa_lstart + pa->pa_len; | ||
3250 | BUG_ON(!(start >= pa_end || end <= pa->pa_lstart)); | ||
3251 | } | ||
3252 | spin_unlock(&pa->pa_lock); | ||
3253 | } | ||
3254 | rcu_read_unlock(); | ||
3255 | |||
3256 | if (start + size <= ac->ac_o_ex.fe_logical && | ||
3257 | start > ac->ac_o_ex.fe_logical) { | ||
3258 | printk(KERN_ERR "start %lu, size %lu, fe_logical %lu\n", | ||
3259 | (unsigned long) start, (unsigned long) size, | ||
3260 | (unsigned long) ac->ac_o_ex.fe_logical); | ||
3261 | } | ||
3262 | BUG_ON(start + size <= ac->ac_o_ex.fe_logical && | ||
3263 | start > ac->ac_o_ex.fe_logical); | ||
3264 | BUG_ON(size <= 0 || size >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); | ||
3265 | |||
3266 | /* now prepare goal request */ | ||
3267 | |||
3268 | /* XXX: is it better to align blocks WRT to logical | ||
3269 | * placement or satisfy big request as is */ | ||
3270 | ac->ac_g_ex.fe_logical = start; | ||
3271 | ac->ac_g_ex.fe_len = size; | ||
3272 | |||
3273 | /* define goal start in order to merge */ | ||
3274 | if (ar->pright && (ar->lright == (start + size))) { | ||
3275 | /* merge to the right */ | ||
3276 | ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size, | ||
3277 | &ac->ac_f_ex.fe_group, | ||
3278 | &ac->ac_f_ex.fe_start); | ||
3279 | ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; | ||
3280 | } | ||
3281 | if (ar->pleft && (ar->lleft + 1 == start)) { | ||
3282 | /* merge to the left */ | ||
3283 | ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1, | ||
3284 | &ac->ac_f_ex.fe_group, | ||
3285 | &ac->ac_f_ex.fe_start); | ||
3286 | ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; | ||
3287 | } | ||
3288 | |||
3289 | mb_debug("goal: %u(was %u) blocks at %u\n", (unsigned) size, | ||
3290 | (unsigned) orig_size, (unsigned) start); | ||
3291 | } | ||
3292 | |||
3293 | static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) | ||
3294 | { | ||
3295 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | ||
3296 | |||
3297 | if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) { | ||
3298 | atomic_inc(&sbi->s_bal_reqs); | ||
3299 | atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated); | ||
3300 | if (ac->ac_o_ex.fe_len >= ac->ac_g_ex.fe_len) | ||
3301 | atomic_inc(&sbi->s_bal_success); | ||
3302 | atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned); | ||
3303 | if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && | ||
3304 | ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) | ||
3305 | atomic_inc(&sbi->s_bal_goals); | ||
3306 | if (ac->ac_found > sbi->s_mb_max_to_scan) | ||
3307 | atomic_inc(&sbi->s_bal_breaks); | ||
3308 | } | ||
3309 | |||
3310 | ext4_mb_store_history(ac); | ||
3311 | } | ||
3312 | |||
3313 | /* | ||
3314 | * use blocks preallocated to inode | ||
3315 | */ | ||
3316 | static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, | ||
3317 | struct ext4_prealloc_space *pa) | ||
3318 | { | ||
3319 | ext4_fsblk_t start; | ||
3320 | ext4_fsblk_t end; | ||
3321 | int len; | ||
3322 | |||
3323 | /* found preallocated blocks, use them */ | ||
3324 | start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart); | ||
3325 | end = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len); | ||
3326 | len = end - start; | ||
3327 | ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group, | ||
3328 | &ac->ac_b_ex.fe_start); | ||
3329 | ac->ac_b_ex.fe_len = len; | ||
3330 | ac->ac_status = AC_STATUS_FOUND; | ||
3331 | ac->ac_pa = pa; | ||
3332 | |||
3333 | BUG_ON(start < pa->pa_pstart); | ||
3334 | BUG_ON(start + len > pa->pa_pstart + pa->pa_len); | ||
3335 | BUG_ON(pa->pa_free < len); | ||
3336 | pa->pa_free -= len; | ||
3337 | |||
3338 | mb_debug("use %llu/%lu from inode pa %p\n", start, len, pa); | ||
3339 | } | ||
3340 | |||
3341 | /* | ||
3342 | * use blocks preallocated to locality group | ||
3343 | */ | ||
3344 | static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, | ||
3345 | struct ext4_prealloc_space *pa) | ||
3346 | { | ||
3347 | unsigned len = ac->ac_o_ex.fe_len; | ||
3348 | |||
3349 | ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart, | ||
3350 | &ac->ac_b_ex.fe_group, | ||
3351 | &ac->ac_b_ex.fe_start); | ||
3352 | ac->ac_b_ex.fe_len = len; | ||
3353 | ac->ac_status = AC_STATUS_FOUND; | ||
3354 | ac->ac_pa = pa; | ||
3355 | |||
3356 | /* we don't correct pa_pstart or pa_plen here to avoid | ||
3357 | * possible race when tte group is being loaded concurrently | ||
3358 | * instead we correct pa later, after blocks are marked | ||
3359 | * in on-disk bitmap -- see ext4_mb_release_context() */ | ||
3360 | /* | ||
3361 | * FIXME!! but the other CPUs can look at this particular | ||
3362 | * pa and think that it have enought free blocks if we | ||
3363 | * don't update pa_free here right ? | ||
3364 | */ | ||
3365 | mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); | ||
3366 | } | ||
3367 | |||
3368 | /* | ||
3369 | * search goal blocks in preallocated space | ||
3370 | */ | ||
3371 | static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | ||
3372 | { | ||
3373 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); | ||
3374 | struct ext4_locality_group *lg; | ||
3375 | struct ext4_prealloc_space *pa; | ||
3376 | struct list_head *cur; | ||
3377 | |||
3378 | /* only data can be preallocated */ | ||
3379 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) | ||
3380 | return 0; | ||
3381 | |||
3382 | /* first, try per-file preallocation */ | ||
3383 | rcu_read_lock(); | ||
3384 | list_for_each_rcu(cur, &ei->i_prealloc_list) { | ||
3385 | pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list); | ||
3386 | |||
3387 | /* all fields in this condition don't change, | ||
3388 | * so we can skip locking for them */ | ||
3389 | if (ac->ac_o_ex.fe_logical < pa->pa_lstart || | ||
3390 | ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) | ||
3391 | continue; | ||
3392 | |||
3393 | /* found preallocated blocks, use them */ | ||
3394 | spin_lock(&pa->pa_lock); | ||
3395 | if (pa->pa_deleted == 0 && pa->pa_free) { | ||
3396 | atomic_inc(&pa->pa_count); | ||
3397 | ext4_mb_use_inode_pa(ac, pa); | ||
3398 | spin_unlock(&pa->pa_lock); | ||
3399 | ac->ac_criteria = 10; | ||
3400 | rcu_read_unlock(); | ||
3401 | return 1; | ||
3402 | } | ||
3403 | spin_unlock(&pa->pa_lock); | ||
3404 | } | ||
3405 | rcu_read_unlock(); | ||
3406 | |||
3407 | /* can we use group allocation? */ | ||
3408 | if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)) | ||
3409 | return 0; | ||
3410 | |||
3411 | /* inode may have no locality group for some reason */ | ||
3412 | lg = ac->ac_lg; | ||
3413 | if (lg == NULL) | ||
3414 | return 0; | ||
3415 | |||
3416 | rcu_read_lock(); | ||
3417 | list_for_each_rcu(cur, &lg->lg_prealloc_list) { | ||
3418 | pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list); | ||
3419 | spin_lock(&pa->pa_lock); | ||
3420 | if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) { | ||
3421 | atomic_inc(&pa->pa_count); | ||
3422 | ext4_mb_use_group_pa(ac, pa); | ||
3423 | spin_unlock(&pa->pa_lock); | ||
3424 | ac->ac_criteria = 20; | ||
3425 | rcu_read_unlock(); | ||
3426 | return 1; | ||
3427 | } | ||
3428 | spin_unlock(&pa->pa_lock); | ||
3429 | } | ||
3430 | rcu_read_unlock(); | ||
3431 | |||
3432 | return 0; | ||
3433 | } | ||
3434 | |||
3435 | /* | ||
3436 | * the function goes through all preallocation in this group and marks them | ||
3437 | * used in in-core bitmap. buddy must be generated from this bitmap | ||
3438 | * Need to be called with ext4 group lock (ext4_lock_group) | ||
3439 | */ | ||
3440 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | ||
3441 | ext4_group_t group) | ||
3442 | { | ||
3443 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
3444 | struct ext4_prealloc_space *pa; | ||
3445 | struct list_head *cur; | ||
3446 | ext4_group_t groupnr; | ||
3447 | ext4_grpblk_t start; | ||
3448 | int preallocated = 0; | ||
3449 | int count = 0; | ||
3450 | int len; | ||
3451 | |||
3452 | /* all form of preallocation discards first load group, | ||
3453 | * so the only competing code is preallocation use. | ||
3454 | * we don't need any locking here | ||
3455 | * notice we do NOT ignore preallocations with pa_deleted | ||
3456 | * otherwise we could leave used blocks available for | ||
3457 | * allocation in buddy when concurrent ext4_mb_put_pa() | ||
3458 | * is dropping preallocation | ||
3459 | */ | ||
3460 | list_for_each(cur, &grp->bb_prealloc_list) { | ||
3461 | pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); | ||
3462 | spin_lock(&pa->pa_lock); | ||
3463 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, | ||
3464 | &groupnr, &start); | ||
3465 | len = pa->pa_len; | ||
3466 | spin_unlock(&pa->pa_lock); | ||
3467 | if (unlikely(len == 0)) | ||
3468 | continue; | ||
3469 | BUG_ON(groupnr != group); | ||
3470 | mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), | ||
3471 | bitmap, start, len); | ||
3472 | preallocated += len; | ||
3473 | count++; | ||
3474 | } | ||
3475 | mb_debug("prellocated %u for group %lu\n", preallocated, group); | ||
3476 | } | ||
3477 | |||
3478 | static void ext4_mb_pa_callback(struct rcu_head *head) | ||
3479 | { | ||
3480 | struct ext4_prealloc_space *pa; | ||
3481 | pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu); | ||
3482 | kmem_cache_free(ext4_pspace_cachep, pa); | ||
3483 | } | ||
3484 | |||
3485 | /* | ||
3486 | * drops a reference to preallocated space descriptor | ||
3487 | * if this was the last reference and the space is consumed | ||
3488 | */ | ||
3489 | static void ext4_mb_put_pa(struct ext4_allocation_context *ac, | ||
3490 | struct super_block *sb, struct ext4_prealloc_space *pa) | ||
3491 | { | ||
3492 | unsigned long grp; | ||
3493 | |||
3494 | if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) | ||
3495 | return; | ||
3496 | |||
3497 | /* in this short window concurrent discard can set pa_deleted */ | ||
3498 | spin_lock(&pa->pa_lock); | ||
3499 | if (pa->pa_deleted == 1) { | ||
3500 | spin_unlock(&pa->pa_lock); | ||
3501 | return; | ||
3502 | } | ||
3503 | |||
3504 | pa->pa_deleted = 1; | ||
3505 | spin_unlock(&pa->pa_lock); | ||
3506 | |||
3507 | /* -1 is to protect from crossing allocation group */ | ||
3508 | ext4_get_group_no_and_offset(sb, pa->pa_pstart - 1, &grp, NULL); | ||
3509 | |||
3510 | /* | ||
3511 | * possible race: | ||
3512 | * | ||
3513 | * P1 (buddy init) P2 (regular allocation) | ||
3514 | * find block B in PA | ||
3515 | * copy on-disk bitmap to buddy | ||
3516 | * mark B in on-disk bitmap | ||
3517 | * drop PA from group | ||
3518 | * mark all PAs in buddy | ||
3519 | * | ||
3520 | * thus, P1 initializes buddy with B available. to prevent this | ||
3521 | * we make "copy" and "mark all PAs" atomic and serialize "drop PA" | ||
3522 | * against that pair | ||
3523 | */ | ||
3524 | ext4_lock_group(sb, grp); | ||
3525 | list_del(&pa->pa_group_list); | ||
3526 | ext4_unlock_group(sb, grp); | ||
3527 | |||
3528 | spin_lock(pa->pa_obj_lock); | ||
3529 | list_del_rcu(&pa->pa_inode_list); | ||
3530 | spin_unlock(pa->pa_obj_lock); | ||
3531 | |||
3532 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | ||
3533 | } | ||
3534 | |||
3535 | /* | ||
3536 | * creates new preallocated space for given inode | ||
3537 | */ | ||
3538 | static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) | ||
3539 | { | ||
3540 | struct super_block *sb = ac->ac_sb; | ||
3541 | struct ext4_prealloc_space *pa; | ||
3542 | struct ext4_group_info *grp; | ||
3543 | struct ext4_inode_info *ei; | ||
3544 | |||
3545 | /* preallocate only when found space is larger then requested */ | ||
3546 | BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len); | ||
3547 | BUG_ON(ac->ac_status != AC_STATUS_FOUND); | ||
3548 | BUG_ON(!S_ISREG(ac->ac_inode->i_mode)); | ||
3549 | |||
3550 | pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS); | ||
3551 | if (pa == NULL) | ||
3552 | return -ENOMEM; | ||
3553 | |||
3554 | if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) { | ||
3555 | int winl; | ||
3556 | int wins; | ||
3557 | int win; | ||
3558 | int offs; | ||
3559 | |||
3560 | /* we can't allocate as much as normalizer wants. | ||
3561 | * so, found space must get proper lstart | ||
3562 | * to cover original request */ | ||
3563 | BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical); | ||
3564 | BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len); | ||
3565 | |||
3566 | /* we're limited by original request in that | ||
3567 | * logical block must be covered any way | ||
3568 | * winl is window we can move our chunk within */ | ||
3569 | winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical; | ||
3570 | |||
3571 | /* also, we should cover whole original request */ | ||
3572 | wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len; | ||
3573 | |||
3574 | /* the smallest one defines real window */ | ||
3575 | win = min(winl, wins); | ||
3576 | |||
3577 | offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len; | ||
3578 | if (offs && offs < win) | ||
3579 | win = offs; | ||
3580 | |||
3581 | ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win; | ||
3582 | BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); | ||
3583 | BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); | ||
3584 | } | ||
3585 | |||
3586 | /* preallocation can change ac_b_ex, thus we store actually | ||
3587 | * allocated blocks for history */ | ||
3588 | ac->ac_f_ex = ac->ac_b_ex; | ||
3589 | |||
3590 | pa->pa_lstart = ac->ac_b_ex.fe_logical; | ||
3591 | pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); | ||
3592 | pa->pa_len = ac->ac_b_ex.fe_len; | ||
3593 | pa->pa_free = pa->pa_len; | ||
3594 | atomic_set(&pa->pa_count, 1); | ||
3595 | spin_lock_init(&pa->pa_lock); | ||
3596 | pa->pa_deleted = 0; | ||
3597 | pa->pa_linear = 0; | ||
3598 | |||
3599 | mb_debug("new inode pa %p: %llu/%u for %u\n", pa, | ||
3600 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); | ||
3601 | |||
3602 | ext4_mb_use_inode_pa(ac, pa); | ||
3603 | atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); | ||
3604 | |||
3605 | ei = EXT4_I(ac->ac_inode); | ||
3606 | grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); | ||
3607 | |||
3608 | pa->pa_obj_lock = &ei->i_prealloc_lock; | ||
3609 | pa->pa_inode = ac->ac_inode; | ||
3610 | |||
3611 | ext4_lock_group(sb, ac->ac_b_ex.fe_group); | ||
3612 | list_add(&pa->pa_group_list, &grp->bb_prealloc_list); | ||
3613 | ext4_unlock_group(sb, ac->ac_b_ex.fe_group); | ||
3614 | |||
3615 | spin_lock(pa->pa_obj_lock); | ||
3616 | list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list); | ||
3617 | spin_unlock(pa->pa_obj_lock); | ||
3618 | |||
3619 | return 0; | ||
3620 | } | ||
3621 | |||
3622 | /* | ||
3623 | * creates new preallocated space for locality group inodes belongs to | ||
3624 | */ | ||
3625 | static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac) | ||
3626 | { | ||
3627 | struct super_block *sb = ac->ac_sb; | ||
3628 | struct ext4_locality_group *lg; | ||
3629 | struct ext4_prealloc_space *pa; | ||
3630 | struct ext4_group_info *grp; | ||
3631 | |||
3632 | /* preallocate only when found space is larger then requested */ | ||
3633 | BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len); | ||
3634 | BUG_ON(ac->ac_status != AC_STATUS_FOUND); | ||
3635 | BUG_ON(!S_ISREG(ac->ac_inode->i_mode)); | ||
3636 | |||
3637 | BUG_ON(ext4_pspace_cachep == NULL); | ||
3638 | pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS); | ||
3639 | if (pa == NULL) | ||
3640 | return -ENOMEM; | ||
3641 | |||
3642 | /* preallocation can change ac_b_ex, thus we store actually | ||
3643 | * allocated blocks for history */ | ||
3644 | ac->ac_f_ex = ac->ac_b_ex; | ||
3645 | |||
3646 | pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); | ||
3647 | pa->pa_lstart = pa->pa_pstart; | ||
3648 | pa->pa_len = ac->ac_b_ex.fe_len; | ||
3649 | pa->pa_free = pa->pa_len; | ||
3650 | atomic_set(&pa->pa_count, 1); | ||
3651 | spin_lock_init(&pa->pa_lock); | ||
3652 | pa->pa_deleted = 0; | ||
3653 | pa->pa_linear = 1; | ||
3654 | |||
3655 | mb_debug("new group pa %p: %llu/%u for %u\n", pa, | ||
3656 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); | ||
3657 | |||
3658 | ext4_mb_use_group_pa(ac, pa); | ||
3659 | atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); | ||
3660 | |||
3661 | grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); | ||
3662 | lg = ac->ac_lg; | ||
3663 | BUG_ON(lg == NULL); | ||
3664 | |||
3665 | pa->pa_obj_lock = &lg->lg_prealloc_lock; | ||
3666 | pa->pa_inode = NULL; | ||
3667 | |||
3668 | ext4_lock_group(sb, ac->ac_b_ex.fe_group); | ||
3669 | list_add(&pa->pa_group_list, &grp->bb_prealloc_list); | ||
3670 | ext4_unlock_group(sb, ac->ac_b_ex.fe_group); | ||
3671 | |||
3672 | spin_lock(pa->pa_obj_lock); | ||
3673 | list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list); | ||
3674 | spin_unlock(pa->pa_obj_lock); | ||
3675 | |||
3676 | return 0; | ||
3677 | } | ||
3678 | |||
3679 | static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac) | ||
3680 | { | ||
3681 | int err; | ||
3682 | |||
3683 | if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) | ||
3684 | err = ext4_mb_new_group_pa(ac); | ||
3685 | else | ||
3686 | err = ext4_mb_new_inode_pa(ac); | ||
3687 | return err; | ||
3688 | } | ||
3689 | |||
3690 | /* | ||
3691 | * finds all unused blocks in on-disk bitmap, frees them in | ||
3692 | * in-core bitmap and buddy. | ||
3693 | * @pa must be unlinked from inode and group lists, so that | ||
3694 | * nobody else can find/use it. | ||
3695 | * the caller MUST hold group/inode locks. | ||
3696 | * TODO: optimize the case when there are no in-core structures yet | ||
3697 | */ | ||
3698 | static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, | ||
3699 | struct buffer_head *bitmap_bh, | ||
3700 | struct ext4_prealloc_space *pa) | ||
3701 | { | ||
3702 | struct ext4_allocation_context ac; | ||
3703 | struct super_block *sb = e4b->bd_sb; | ||
3704 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
3705 | unsigned long end; | ||
3706 | unsigned long next; | ||
3707 | ext4_group_t group; | ||
3708 | ext4_grpblk_t bit; | ||
3709 | sector_t start; | ||
3710 | int err = 0; | ||
3711 | int free = 0; | ||
3712 | |||
3713 | BUG_ON(pa->pa_deleted == 0); | ||
3714 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | ||
3715 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | ||
3716 | end = bit + pa->pa_len; | ||
3717 | |||
3718 | ac.ac_sb = sb; | ||
3719 | ac.ac_inode = pa->pa_inode; | ||
3720 | ac.ac_op = EXT4_MB_HISTORY_DISCARD; | ||
3721 | |||
3722 | while (bit < end) { | ||
3723 | bit = ext4_find_next_zero_bit(bitmap_bh->b_data, end, bit); | ||
3724 | if (bit >= end) | ||
3725 | break; | ||
3726 | next = ext4_find_next_bit(bitmap_bh->b_data, end, bit); | ||
3727 | if (next > end) | ||
3728 | next = end; | ||
3729 | start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + | ||
3730 | le32_to_cpu(sbi->s_es->s_first_data_block); | ||
3731 | mb_debug(" free preallocated %u/%u in group %u\n", | ||
3732 | (unsigned) start, (unsigned) next - bit, | ||
3733 | (unsigned) group); | ||
3734 | free += next - bit; | ||
3735 | |||
3736 | ac.ac_b_ex.fe_group = group; | ||
3737 | ac.ac_b_ex.fe_start = bit; | ||
3738 | ac.ac_b_ex.fe_len = next - bit; | ||
3739 | ac.ac_b_ex.fe_logical = 0; | ||
3740 | ext4_mb_store_history(&ac); | ||
3741 | |||
3742 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); | ||
3743 | bit = next + 1; | ||
3744 | } | ||
3745 | if (free != pa->pa_free) { | ||
3746 | printk(KERN_ERR "pa %p: logic %lu, phys. %lu, len %lu\n", | ||
3747 | pa, (unsigned long) pa->pa_lstart, | ||
3748 | (unsigned long) pa->pa_pstart, | ||
3749 | (unsigned long) pa->pa_len); | ||
3750 | printk(KERN_ERR "free %u, pa_free %u\n", free, pa->pa_free); | ||
3751 | } | ||
3752 | BUG_ON(free != pa->pa_free); | ||
3753 | atomic_add(free, &sbi->s_mb_discarded); | ||
3754 | |||
3755 | return err; | ||
3756 | } | ||
3757 | |||
3758 | static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, | ||
3759 | struct ext4_prealloc_space *pa) | ||
3760 | { | ||
3761 | struct ext4_allocation_context ac; | ||
3762 | struct super_block *sb = e4b->bd_sb; | ||
3763 | ext4_group_t group; | ||
3764 | ext4_grpblk_t bit; | ||
3765 | |||
3766 | ac.ac_op = EXT4_MB_HISTORY_DISCARD; | ||
3767 | |||
3768 | BUG_ON(pa->pa_deleted == 0); | ||
3769 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | ||
3770 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | ||
3771 | mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); | ||
3772 | atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); | ||
3773 | |||
3774 | ac.ac_sb = sb; | ||
3775 | ac.ac_inode = NULL; | ||
3776 | ac.ac_b_ex.fe_group = group; | ||
3777 | ac.ac_b_ex.fe_start = bit; | ||
3778 | ac.ac_b_ex.fe_len = pa->pa_len; | ||
3779 | ac.ac_b_ex.fe_logical = 0; | ||
3780 | ext4_mb_store_history(&ac); | ||
3781 | |||
3782 | return 0; | ||
3783 | } | ||
3784 | |||
3785 | /* | ||
3786 | * releases all preallocations in given group | ||
3787 | * | ||
3788 | * first, we need to decide discard policy: | ||
3789 | * - when do we discard | ||
3790 | * 1) ENOSPC | ||
3791 | * - how many do we discard | ||
3792 | * 1) how many requested | ||
3793 | */ | ||
3794 | static int ext4_mb_discard_group_preallocations(struct super_block *sb, | ||
3795 | ext4_group_t group, int needed) | ||
3796 | { | ||
3797 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
3798 | struct buffer_head *bitmap_bh = NULL; | ||
3799 | struct ext4_prealloc_space *pa, *tmp; | ||
3800 | struct list_head list; | ||
3801 | struct ext4_buddy e4b; | ||
3802 | int err; | ||
3803 | int busy = 0; | ||
3804 | int free = 0; | ||
3805 | |||
3806 | mb_debug("discard preallocation for group %lu\n", group); | ||
3807 | |||
3808 | if (list_empty(&grp->bb_prealloc_list)) | ||
3809 | return 0; | ||
3810 | |||
3811 | bitmap_bh = read_block_bitmap(sb, group); | ||
3812 | if (bitmap_bh == NULL) { | ||
3813 | /* error handling here */ | ||
3814 | ext4_mb_release_desc(&e4b); | ||
3815 | BUG_ON(bitmap_bh == NULL); | ||
3816 | } | ||
3817 | |||
3818 | err = ext4_mb_load_buddy(sb, group, &e4b); | ||
3819 | BUG_ON(err != 0); /* error handling here */ | ||
3820 | |||
3821 | if (needed == 0) | ||
3822 | needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; | ||
3823 | |||
3824 | grp = ext4_get_group_info(sb, group); | ||
3825 | INIT_LIST_HEAD(&list); | ||
3826 | |||
3827 | repeat: | ||
3828 | ext4_lock_group(sb, group); | ||
3829 | list_for_each_entry_safe(pa, tmp, | ||
3830 | &grp->bb_prealloc_list, pa_group_list) { | ||
3831 | spin_lock(&pa->pa_lock); | ||
3832 | if (atomic_read(&pa->pa_count)) { | ||
3833 | spin_unlock(&pa->pa_lock); | ||
3834 | busy = 1; | ||
3835 | continue; | ||
3836 | } | ||
3837 | if (pa->pa_deleted) { | ||
3838 | spin_unlock(&pa->pa_lock); | ||
3839 | continue; | ||
3840 | } | ||
3841 | |||
3842 | /* seems this one can be freed ... */ | ||
3843 | pa->pa_deleted = 1; | ||
3844 | |||
3845 | /* we can trust pa_free ... */ | ||
3846 | free += pa->pa_free; | ||
3847 | |||
3848 | spin_unlock(&pa->pa_lock); | ||
3849 | |||
3850 | list_del(&pa->pa_group_list); | ||
3851 | list_add(&pa->u.pa_tmp_list, &list); | ||
3852 | } | ||
3853 | |||
3854 | /* if we still need more blocks and some PAs were used, try again */ | ||
3855 | if (free < needed && busy) { | ||
3856 | busy = 0; | ||
3857 | ext4_unlock_group(sb, group); | ||
3858 | /* | ||
3859 | * Yield the CPU here so that we don't get soft lockup | ||
3860 | * in non preempt case. | ||
3861 | */ | ||
3862 | yield(); | ||
3863 | goto repeat; | ||
3864 | } | ||
3865 | |||
3866 | /* found anything to free? */ | ||
3867 | if (list_empty(&list)) { | ||
3868 | BUG_ON(free != 0); | ||
3869 | goto out; | ||
3870 | } | ||
3871 | |||
3872 | /* now free all selected PAs */ | ||
3873 | list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) { | ||
3874 | |||
3875 | /* remove from object (inode or locality group) */ | ||
3876 | spin_lock(pa->pa_obj_lock); | ||
3877 | list_del_rcu(&pa->pa_inode_list); | ||
3878 | spin_unlock(pa->pa_obj_lock); | ||
3879 | |||
3880 | if (pa->pa_linear) | ||
3881 | ext4_mb_release_group_pa(&e4b, pa); | ||
3882 | else | ||
3883 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); | ||
3884 | |||
3885 | list_del(&pa->u.pa_tmp_list); | ||
3886 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | ||
3887 | } | ||
3888 | |||
3889 | out: | ||
3890 | ext4_unlock_group(sb, group); | ||
3891 | ext4_mb_release_desc(&e4b); | ||
3892 | put_bh(bitmap_bh); | ||
3893 | return free; | ||
3894 | } | ||
3895 | |||
3896 | /* | ||
3897 | * releases all non-used preallocated blocks for given inode | ||
3898 | * | ||
3899 | * It's important to discard preallocations under i_data_sem | ||
3900 | * We don't want another block to be served from the prealloc | ||
3901 | * space when we are discarding the inode prealloc space. | ||
3902 | * | ||
3903 | * FIXME!! Make sure it is valid at all the call sites | ||
3904 | */ | ||
3905 | void ext4_mb_discard_inode_preallocations(struct inode *inode) | ||
3906 | { | ||
3907 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3908 | struct super_block *sb = inode->i_sb; | ||
3909 | struct buffer_head *bitmap_bh = NULL; | ||
3910 | struct ext4_prealloc_space *pa, *tmp; | ||
3911 | ext4_group_t group = 0; | ||
3912 | struct list_head list; | ||
3913 | struct ext4_buddy e4b; | ||
3914 | int err; | ||
3915 | |||
3916 | if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) { | ||
3917 | /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ | ||
3918 | return; | ||
3919 | } | ||
3920 | |||
3921 | mb_debug("discard preallocation for inode %lu\n", inode->i_ino); | ||
3922 | |||
3923 | INIT_LIST_HEAD(&list); | ||
3924 | |||
3925 | repeat: | ||
3926 | /* first, collect all pa's in the inode */ | ||
3927 | spin_lock(&ei->i_prealloc_lock); | ||
3928 | while (!list_empty(&ei->i_prealloc_list)) { | ||
3929 | pa = list_entry(ei->i_prealloc_list.next, | ||
3930 | struct ext4_prealloc_space, pa_inode_list); | ||
3931 | BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock); | ||
3932 | spin_lock(&pa->pa_lock); | ||
3933 | if (atomic_read(&pa->pa_count)) { | ||
3934 | /* this shouldn't happen often - nobody should | ||
3935 | * use preallocation while we're discarding it */ | ||
3936 | spin_unlock(&pa->pa_lock); | ||
3937 | spin_unlock(&ei->i_prealloc_lock); | ||
3938 | printk(KERN_ERR "uh-oh! used pa while discarding\n"); | ||
3939 | WARN_ON(1); | ||
3940 | schedule_timeout_uninterruptible(HZ); | ||
3941 | goto repeat; | ||
3942 | |||
3943 | } | ||
3944 | if (pa->pa_deleted == 0) { | ||
3945 | pa->pa_deleted = 1; | ||
3946 | spin_unlock(&pa->pa_lock); | ||
3947 | list_del_rcu(&pa->pa_inode_list); | ||
3948 | list_add(&pa->u.pa_tmp_list, &list); | ||
3949 | continue; | ||
3950 | } | ||
3951 | |||
3952 | /* someone is deleting pa right now */ | ||
3953 | spin_unlock(&pa->pa_lock); | ||
3954 | spin_unlock(&ei->i_prealloc_lock); | ||
3955 | |||
3956 | /* we have to wait here because pa_deleted | ||
3957 | * doesn't mean pa is already unlinked from | ||
3958 | * the list. as we might be called from | ||
3959 | * ->clear_inode() the inode will get freed | ||
3960 | * and concurrent thread which is unlinking | ||
3961 | * pa from inode's list may access already | ||
3962 | * freed memory, bad-bad-bad */ | ||
3963 | |||
3964 | /* XXX: if this happens too often, we can | ||
3965 | * add a flag to force wait only in case | ||
3966 | * of ->clear_inode(), but not in case of | ||
3967 | * regular truncate */ | ||
3968 | schedule_timeout_uninterruptible(HZ); | ||
3969 | goto repeat; | ||
3970 | } | ||
3971 | spin_unlock(&ei->i_prealloc_lock); | ||
3972 | |||
3973 | list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) { | ||
3974 | BUG_ON(pa->pa_linear != 0); | ||
3975 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); | ||
3976 | |||
3977 | err = ext4_mb_load_buddy(sb, group, &e4b); | ||
3978 | BUG_ON(err != 0); /* error handling here */ | ||
3979 | |||
3980 | bitmap_bh = read_block_bitmap(sb, group); | ||
3981 | if (bitmap_bh == NULL) { | ||
3982 | /* error handling here */ | ||
3983 | ext4_mb_release_desc(&e4b); | ||
3984 | BUG_ON(bitmap_bh == NULL); | ||
3985 | } | ||
3986 | |||
3987 | ext4_lock_group(sb, group); | ||
3988 | list_del(&pa->pa_group_list); | ||
3989 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); | ||
3990 | ext4_unlock_group(sb, group); | ||
3991 | |||
3992 | ext4_mb_release_desc(&e4b); | ||
3993 | put_bh(bitmap_bh); | ||
3994 | |||
3995 | list_del(&pa->u.pa_tmp_list); | ||
3996 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | ||
3997 | } | ||
3998 | } | ||
3999 | |||
4000 | /* | ||
4001 | * finds all preallocated spaces and return blocks being freed to them | ||
4002 | * if preallocated space becomes full (no block is used from the space) | ||
4003 | * then the function frees space in buddy | ||
4004 | * XXX: at the moment, truncate (which is the only way to free blocks) | ||
4005 | * discards all preallocations | ||
4006 | */ | ||
4007 | static void ext4_mb_return_to_preallocation(struct inode *inode, | ||
4008 | struct ext4_buddy *e4b, | ||
4009 | sector_t block, int count) | ||
4010 | { | ||
4011 | BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list)); | ||
4012 | } | ||
4013 | #ifdef MB_DEBUG | ||
4014 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | ||
4015 | { | ||
4016 | struct super_block *sb = ac->ac_sb; | ||
4017 | ext4_group_t i; | ||
4018 | |||
4019 | printk(KERN_ERR "EXT4-fs: Can't allocate:" | ||
4020 | " Allocation context details:\n"); | ||
4021 | printk(KERN_ERR "EXT4-fs: status %d flags %d\n", | ||
4022 | ac->ac_status, ac->ac_flags); | ||
4023 | printk(KERN_ERR "EXT4-fs: orig %lu/%lu/%lu@%lu, goal %lu/%lu/%lu@%lu, " | ||
4024 | "best %lu/%lu/%lu@%lu cr %d\n", | ||
4025 | (unsigned long)ac->ac_o_ex.fe_group, | ||
4026 | (unsigned long)ac->ac_o_ex.fe_start, | ||
4027 | (unsigned long)ac->ac_o_ex.fe_len, | ||
4028 | (unsigned long)ac->ac_o_ex.fe_logical, | ||
4029 | (unsigned long)ac->ac_g_ex.fe_group, | ||
4030 | (unsigned long)ac->ac_g_ex.fe_start, | ||
4031 | (unsigned long)ac->ac_g_ex.fe_len, | ||
4032 | (unsigned long)ac->ac_g_ex.fe_logical, | ||
4033 | (unsigned long)ac->ac_b_ex.fe_group, | ||
4034 | (unsigned long)ac->ac_b_ex.fe_start, | ||
4035 | (unsigned long)ac->ac_b_ex.fe_len, | ||
4036 | (unsigned long)ac->ac_b_ex.fe_logical, | ||
4037 | (int)ac->ac_criteria); | ||
4038 | printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, | ||
4039 | ac->ac_found); | ||
4040 | printk(KERN_ERR "EXT4-fs: groups: \n"); | ||
4041 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | ||
4042 | struct ext4_group_info *grp = ext4_get_group_info(sb, i); | ||
4043 | struct ext4_prealloc_space *pa; | ||
4044 | ext4_grpblk_t start; | ||
4045 | struct list_head *cur; | ||
4046 | ext4_lock_group(sb, i); | ||
4047 | list_for_each(cur, &grp->bb_prealloc_list) { | ||
4048 | pa = list_entry(cur, struct ext4_prealloc_space, | ||
4049 | pa_group_list); | ||
4050 | spin_lock(&pa->pa_lock); | ||
4051 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, | ||
4052 | NULL, &start); | ||
4053 | spin_unlock(&pa->pa_lock); | ||
4054 | printk(KERN_ERR "PA:%lu:%d:%u \n", i, | ||
4055 | start, pa->pa_len); | ||
4056 | } | ||
4057 | ext4_lock_group(sb, i); | ||
4058 | |||
4059 | if (grp->bb_free == 0) | ||
4060 | continue; | ||
4061 | printk(KERN_ERR "%lu: %d/%d \n", | ||
4062 | i, grp->bb_free, grp->bb_fragments); | ||
4063 | } | ||
4064 | printk(KERN_ERR "\n"); | ||
4065 | } | ||
4066 | #else | ||
4067 | static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac) | ||
4068 | { | ||
4069 | return; | ||
4070 | } | ||
4071 | #endif | ||
4072 | |||
4073 | /* | ||
4074 | * We use locality group preallocation for small size file. The size of the | ||
4075 | * file is determined by the current size or the resulting size after | ||
4076 | * allocation which ever is larger | ||
4077 | * | ||
4078 | * One can tune this size via /proc/fs/ext4/<partition>/stream_req | ||
4079 | */ | ||
4080 | static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | ||
4081 | { | ||
4082 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | ||
4083 | int bsbits = ac->ac_sb->s_blocksize_bits; | ||
4084 | loff_t size, isize; | ||
4085 | |||
4086 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) | ||
4087 | return; | ||
4088 | |||
4089 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; | ||
4090 | isize = i_size_read(ac->ac_inode) >> bsbits; | ||
4091 | size = max(size, isize); | ||
4092 | |||
4093 | /* don't use group allocation for large files */ | ||
4094 | if (size >= sbi->s_mb_stream_request) | ||
4095 | return; | ||
4096 | |||
4097 | if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) | ||
4098 | return; | ||
4099 | |||
4100 | BUG_ON(ac->ac_lg != NULL); | ||
4101 | /* | ||
4102 | * locality group prealloc space are per cpu. The reason for having | ||
4103 | * per cpu locality group is to reduce the contention between block | ||
4104 | * request from multiple CPUs. | ||
4105 | */ | ||
4106 | ac->ac_lg = &sbi->s_locality_groups[get_cpu()]; | ||
4107 | put_cpu(); | ||
4108 | |||
4109 | /* we're going to use group allocation */ | ||
4110 | ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; | ||
4111 | |||
4112 | /* serialize all allocations in the group */ | ||
4113 | mutex_lock(&ac->ac_lg->lg_mutex); | ||
4114 | } | ||
4115 | |||
4116 | static int ext4_mb_initialize_context(struct ext4_allocation_context *ac, | ||
4117 | struct ext4_allocation_request *ar) | ||
4118 | { | ||
4119 | struct super_block *sb = ar->inode->i_sb; | ||
4120 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
4121 | struct ext4_super_block *es = sbi->s_es; | ||
4122 | ext4_group_t group; | ||
4123 | unsigned long len; | ||
4124 | unsigned long goal; | ||
4125 | ext4_grpblk_t block; | ||
4126 | |||
4127 | /* we can't allocate > group size */ | ||
4128 | len = ar->len; | ||
4129 | |||
4130 | /* just a dirty hack to filter too big requests */ | ||
4131 | if (len >= EXT4_BLOCKS_PER_GROUP(sb) - 10) | ||
4132 | len = EXT4_BLOCKS_PER_GROUP(sb) - 10; | ||
4133 | |||
4134 | /* start searching from the goal */ | ||
4135 | goal = ar->goal; | ||
4136 | if (goal < le32_to_cpu(es->s_first_data_block) || | ||
4137 | goal >= ext4_blocks_count(es)) | ||
4138 | goal = le32_to_cpu(es->s_first_data_block); | ||
4139 | ext4_get_group_no_and_offset(sb, goal, &group, &block); | ||
4140 | |||
4141 | /* set up allocation goals */ | ||
4142 | ac->ac_b_ex.fe_logical = ar->logical; | ||
4143 | ac->ac_b_ex.fe_group = 0; | ||
4144 | ac->ac_b_ex.fe_start = 0; | ||
4145 | ac->ac_b_ex.fe_len = 0; | ||
4146 | ac->ac_status = AC_STATUS_CONTINUE; | ||
4147 | ac->ac_groups_scanned = 0; | ||
4148 | ac->ac_ex_scanned = 0; | ||
4149 | ac->ac_found = 0; | ||
4150 | ac->ac_sb = sb; | ||
4151 | ac->ac_inode = ar->inode; | ||
4152 | ac->ac_o_ex.fe_logical = ar->logical; | ||
4153 | ac->ac_o_ex.fe_group = group; | ||
4154 | ac->ac_o_ex.fe_start = block; | ||
4155 | ac->ac_o_ex.fe_len = len; | ||
4156 | ac->ac_g_ex.fe_logical = ar->logical; | ||
4157 | ac->ac_g_ex.fe_group = group; | ||
4158 | ac->ac_g_ex.fe_start = block; | ||
4159 | ac->ac_g_ex.fe_len = len; | ||
4160 | ac->ac_f_ex.fe_len = 0; | ||
4161 | ac->ac_flags = ar->flags; | ||
4162 | ac->ac_2order = 0; | ||
4163 | ac->ac_criteria = 0; | ||
4164 | ac->ac_pa = NULL; | ||
4165 | ac->ac_bitmap_page = NULL; | ||
4166 | ac->ac_buddy_page = NULL; | ||
4167 | ac->ac_lg = NULL; | ||
4168 | |||
4169 | /* we have to define context: we'll we work with a file or | ||
4170 | * locality group. this is a policy, actually */ | ||
4171 | ext4_mb_group_or_file(ac); | ||
4172 | |||
4173 | mb_debug("init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, " | ||
4174 | "left: %u/%u, right %u/%u to %swritable\n", | ||
4175 | (unsigned) ar->len, (unsigned) ar->logical, | ||
4176 | (unsigned) ar->goal, ac->ac_flags, ac->ac_2order, | ||
4177 | (unsigned) ar->lleft, (unsigned) ar->pleft, | ||
4178 | (unsigned) ar->lright, (unsigned) ar->pright, | ||
4179 | atomic_read(&ar->inode->i_writecount) ? "" : "non-"); | ||
4180 | return 0; | ||
4181 | |||
4182 | } | ||
4183 | |||
4184 | /* | ||
4185 | * release all resource we used in allocation | ||
4186 | */ | ||
4187 | static int ext4_mb_release_context(struct ext4_allocation_context *ac) | ||
4188 | { | ||
4189 | if (ac->ac_pa) { | ||
4190 | if (ac->ac_pa->pa_linear) { | ||
4191 | /* see comment in ext4_mb_use_group_pa() */ | ||
4192 | spin_lock(&ac->ac_pa->pa_lock); | ||
4193 | ac->ac_pa->pa_pstart += ac->ac_b_ex.fe_len; | ||
4194 | ac->ac_pa->pa_lstart += ac->ac_b_ex.fe_len; | ||
4195 | ac->ac_pa->pa_free -= ac->ac_b_ex.fe_len; | ||
4196 | ac->ac_pa->pa_len -= ac->ac_b_ex.fe_len; | ||
4197 | spin_unlock(&ac->ac_pa->pa_lock); | ||
4198 | } | ||
4199 | ext4_mb_put_pa(ac, ac->ac_sb, ac->ac_pa); | ||
4200 | } | ||
4201 | if (ac->ac_bitmap_page) | ||
4202 | page_cache_release(ac->ac_bitmap_page); | ||
4203 | if (ac->ac_buddy_page) | ||
4204 | page_cache_release(ac->ac_buddy_page); | ||
4205 | if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) | ||
4206 | mutex_unlock(&ac->ac_lg->lg_mutex); | ||
4207 | ext4_mb_collect_stats(ac); | ||
4208 | return 0; | ||
4209 | } | ||
4210 | |||
4211 | static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) | ||
4212 | { | ||
4213 | ext4_group_t i; | ||
4214 | int ret; | ||
4215 | int freed = 0; | ||
4216 | |||
4217 | for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) { | ||
4218 | ret = ext4_mb_discard_group_preallocations(sb, i, needed); | ||
4219 | freed += ret; | ||
4220 | needed -= ret; | ||
4221 | } | ||
4222 | |||
4223 | return freed; | ||
4224 | } | ||
4225 | |||
4226 | /* | ||
4227 | * Main entry point into mballoc to allocate blocks | ||
4228 | * it tries to use preallocation first, then falls back | ||
4229 | * to usual allocation | ||
4230 | */ | ||
4231 | ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | ||
4232 | struct ext4_allocation_request *ar, int *errp) | ||
4233 | { | ||
4234 | struct ext4_allocation_context ac; | ||
4235 | struct ext4_sb_info *sbi; | ||
4236 | struct super_block *sb; | ||
4237 | ext4_fsblk_t block = 0; | ||
4238 | int freed; | ||
4239 | int inquota; | ||
4240 | |||
4241 | sb = ar->inode->i_sb; | ||
4242 | sbi = EXT4_SB(sb); | ||
4243 | |||
4244 | if (!test_opt(sb, MBALLOC)) { | ||
4245 | block = ext4_new_blocks_old(handle, ar->inode, ar->goal, | ||
4246 | &(ar->len), errp); | ||
4247 | return block; | ||
4248 | } | ||
4249 | |||
4250 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { | ||
4251 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | ||
4252 | ar->len--; | ||
4253 | } | ||
4254 | if (ar->len == 0) { | ||
4255 | *errp = -EDQUOT; | ||
4256 | return 0; | ||
4257 | } | ||
4258 | inquota = ar->len; | ||
4259 | |||
4260 | ext4_mb_poll_new_transaction(sb, handle); | ||
4261 | |||
4262 | *errp = ext4_mb_initialize_context(&ac, ar); | ||
4263 | if (*errp) { | ||
4264 | ar->len = 0; | ||
4265 | goto out; | ||
4266 | } | ||
4267 | |||
4268 | ac.ac_op = EXT4_MB_HISTORY_PREALLOC; | ||
4269 | if (!ext4_mb_use_preallocated(&ac)) { | ||
4270 | |||
4271 | ac.ac_op = EXT4_MB_HISTORY_ALLOC; | ||
4272 | ext4_mb_normalize_request(&ac, ar); | ||
4273 | |||
4274 | repeat: | ||
4275 | /* allocate space in core */ | ||
4276 | ext4_mb_regular_allocator(&ac); | ||
4277 | |||
4278 | /* as we've just preallocated more space than | ||
4279 | * user requested orinally, we store allocated | ||
4280 | * space in a special descriptor */ | ||
4281 | if (ac.ac_status == AC_STATUS_FOUND && | ||
4282 | ac.ac_o_ex.fe_len < ac.ac_b_ex.fe_len) | ||
4283 | ext4_mb_new_preallocation(&ac); | ||
4284 | } | ||
4285 | |||
4286 | if (likely(ac.ac_status == AC_STATUS_FOUND)) { | ||
4287 | ext4_mb_mark_diskspace_used(&ac, handle); | ||
4288 | *errp = 0; | ||
4289 | block = ext4_grp_offs_to_block(sb, &ac.ac_b_ex); | ||
4290 | ar->len = ac.ac_b_ex.fe_len; | ||
4291 | } else { | ||
4292 | freed = ext4_mb_discard_preallocations(sb, ac.ac_o_ex.fe_len); | ||
4293 | if (freed) | ||
4294 | goto repeat; | ||
4295 | *errp = -ENOSPC; | ||
4296 | ac.ac_b_ex.fe_len = 0; | ||
4297 | ar->len = 0; | ||
4298 | ext4_mb_show_ac(&ac); | ||
4299 | } | ||
4300 | |||
4301 | ext4_mb_release_context(&ac); | ||
4302 | |||
4303 | out: | ||
4304 | if (ar->len < inquota) | ||
4305 | DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); | ||
4306 | |||
4307 | return block; | ||
4308 | } | ||
4309 | static void ext4_mb_poll_new_transaction(struct super_block *sb, | ||
4310 | handle_t *handle) | ||
4311 | { | ||
4312 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
4313 | |||
4314 | if (sbi->s_last_transaction == handle->h_transaction->t_tid) | ||
4315 | return; | ||
4316 | |||
4317 | /* new transaction! time to close last one and free blocks for | ||
4318 | * committed transaction. we know that only transaction can be | ||
4319 | * active, so previos transaction can be being logged and we | ||
4320 | * know that transaction before previous is known to be already | ||
4321 | * logged. this means that now we may free blocks freed in all | ||
4322 | * transactions before previous one. hope I'm clear enough ... */ | ||
4323 | |||
4324 | spin_lock(&sbi->s_md_lock); | ||
4325 | if (sbi->s_last_transaction != handle->h_transaction->t_tid) { | ||
4326 | mb_debug("new transaction %lu, old %lu\n", | ||
4327 | (unsigned long) handle->h_transaction->t_tid, | ||
4328 | (unsigned long) sbi->s_last_transaction); | ||
4329 | list_splice_init(&sbi->s_closed_transaction, | ||
4330 | &sbi->s_committed_transaction); | ||
4331 | list_splice_init(&sbi->s_active_transaction, | ||
4332 | &sbi->s_closed_transaction); | ||
4333 | sbi->s_last_transaction = handle->h_transaction->t_tid; | ||
4334 | } | ||
4335 | spin_unlock(&sbi->s_md_lock); | ||
4336 | |||
4337 | ext4_mb_free_committed_blocks(sb); | ||
4338 | } | ||
4339 | |||
4340 | static int ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | ||
4341 | ext4_group_t group, ext4_grpblk_t block, int count) | ||
4342 | { | ||
4343 | struct ext4_group_info *db = e4b->bd_info; | ||
4344 | struct super_block *sb = e4b->bd_sb; | ||
4345 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
4346 | struct ext4_free_metadata *md; | ||
4347 | int i; | ||
4348 | |||
4349 | BUG_ON(e4b->bd_bitmap_page == NULL); | ||
4350 | BUG_ON(e4b->bd_buddy_page == NULL); | ||
4351 | |||
4352 | ext4_lock_group(sb, group); | ||
4353 | for (i = 0; i < count; i++) { | ||
4354 | md = db->bb_md_cur; | ||
4355 | if (md && db->bb_tid != handle->h_transaction->t_tid) { | ||
4356 | db->bb_md_cur = NULL; | ||
4357 | md = NULL; | ||
4358 | } | ||
4359 | |||
4360 | if (md == NULL) { | ||
4361 | ext4_unlock_group(sb, group); | ||
4362 | md = kmalloc(sizeof(*md), GFP_NOFS); | ||
4363 | if (md == NULL) | ||
4364 | return -ENOMEM; | ||
4365 | md->num = 0; | ||
4366 | md->group = group; | ||
4367 | |||
4368 | ext4_lock_group(sb, group); | ||
4369 | if (db->bb_md_cur == NULL) { | ||
4370 | spin_lock(&sbi->s_md_lock); | ||
4371 | list_add(&md->list, &sbi->s_active_transaction); | ||
4372 | spin_unlock(&sbi->s_md_lock); | ||
4373 | /* protect buddy cache from being freed, | ||
4374 | * otherwise we'll refresh it from | ||
4375 | * on-disk bitmap and lose not-yet-available | ||
4376 | * blocks */ | ||
4377 | page_cache_get(e4b->bd_buddy_page); | ||
4378 | page_cache_get(e4b->bd_bitmap_page); | ||
4379 | db->bb_md_cur = md; | ||
4380 | db->bb_tid = handle->h_transaction->t_tid; | ||
4381 | mb_debug("new md 0x%p for group %lu\n", | ||
4382 | md, md->group); | ||
4383 | } else { | ||
4384 | kfree(md); | ||
4385 | md = db->bb_md_cur; | ||
4386 | } | ||
4387 | } | ||
4388 | |||
4389 | BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS); | ||
4390 | md->blocks[md->num] = block + i; | ||
4391 | md->num++; | ||
4392 | if (md->num == EXT4_BB_MAX_BLOCKS) { | ||
4393 | /* no more space, put full container on a sb's list */ | ||
4394 | db->bb_md_cur = NULL; | ||
4395 | } | ||
4396 | } | ||
4397 | ext4_unlock_group(sb, group); | ||
4398 | return 0; | ||
4399 | } | ||
4400 | |||
4401 | /* | ||
4402 | * Main entry point into mballoc to free blocks | ||
4403 | */ | ||
4404 | void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, | ||
4405 | unsigned long block, unsigned long count, | ||
4406 | int metadata, unsigned long *freed) | ||
4407 | { | ||
4408 | struct buffer_head *bitmap_bh = 0; | ||
4409 | struct super_block *sb = inode->i_sb; | ||
4410 | struct ext4_allocation_context ac; | ||
4411 | struct ext4_group_desc *gdp; | ||
4412 | struct ext4_super_block *es; | ||
4413 | unsigned long overflow; | ||
4414 | ext4_grpblk_t bit; | ||
4415 | struct buffer_head *gd_bh; | ||
4416 | ext4_group_t block_group; | ||
4417 | struct ext4_sb_info *sbi; | ||
4418 | struct ext4_buddy e4b; | ||
4419 | int err = 0; | ||
4420 | int ret; | ||
4421 | |||
4422 | *freed = 0; | ||
4423 | |||
4424 | ext4_mb_poll_new_transaction(sb, handle); | ||
4425 | |||
4426 | sbi = EXT4_SB(sb); | ||
4427 | es = EXT4_SB(sb)->s_es; | ||
4428 | if (block < le32_to_cpu(es->s_first_data_block) || | ||
4429 | block + count < block || | ||
4430 | block + count > ext4_blocks_count(es)) { | ||
4431 | ext4_error(sb, __FUNCTION__, | ||
4432 | "Freeing blocks not in datazone - " | ||
4433 | "block = %lu, count = %lu", block, count); | ||
4434 | goto error_return; | ||
4435 | } | ||
4436 | |||
4437 | ext4_debug("freeing block %lu\n", block); | ||
4438 | |||
4439 | ac.ac_op = EXT4_MB_HISTORY_FREE; | ||
4440 | ac.ac_inode = inode; | ||
4441 | ac.ac_sb = sb; | ||
4442 | |||
4443 | do_more: | ||
4444 | overflow = 0; | ||
4445 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | ||
4446 | |||
4447 | /* | ||
4448 | * Check to see if we are freeing blocks across a group | ||
4449 | * boundary. | ||
4450 | */ | ||
4451 | if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { | ||
4452 | overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); | ||
4453 | count -= overflow; | ||
4454 | } | ||
4455 | bitmap_bh = read_block_bitmap(sb, block_group); | ||
4456 | if (!bitmap_bh) | ||
4457 | goto error_return; | ||
4458 | gdp = ext4_get_group_desc(sb, block_group, &gd_bh); | ||
4459 | if (!gdp) | ||
4460 | goto error_return; | ||
4461 | |||
4462 | if (in_range(ext4_block_bitmap(sb, gdp), block, count) || | ||
4463 | in_range(ext4_inode_bitmap(sb, gdp), block, count) || | ||
4464 | in_range(block, ext4_inode_table(sb, gdp), | ||
4465 | EXT4_SB(sb)->s_itb_per_group) || | ||
4466 | in_range(block + count - 1, ext4_inode_table(sb, gdp), | ||
4467 | EXT4_SB(sb)->s_itb_per_group)) { | ||
4468 | |||
4469 | ext4_error(sb, __FUNCTION__, | ||
4470 | "Freeing blocks in system zone - " | ||
4471 | "Block = %lu, count = %lu", block, count); | ||
4472 | } | ||
4473 | |||
4474 | BUFFER_TRACE(bitmap_bh, "getting write access"); | ||
4475 | err = ext4_journal_get_write_access(handle, bitmap_bh); | ||
4476 | if (err) | ||
4477 | goto error_return; | ||
4478 | |||
4479 | /* | ||
4480 | * We are about to modify some metadata. Call the journal APIs | ||
4481 | * to unshare ->b_data if a currently-committing transaction is | ||
4482 | * using it | ||
4483 | */ | ||
4484 | BUFFER_TRACE(gd_bh, "get_write_access"); | ||
4485 | err = ext4_journal_get_write_access(handle, gd_bh); | ||
4486 | if (err) | ||
4487 | goto error_return; | ||
4488 | |||
4489 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | ||
4490 | if (err) | ||
4491 | goto error_return; | ||
4492 | |||
4493 | #ifdef AGGRESSIVE_CHECK | ||
4494 | { | ||
4495 | int i; | ||
4496 | for (i = 0; i < count; i++) | ||
4497 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); | ||
4498 | } | ||
4499 | #endif | ||
4500 | mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, | ||
4501 | bit, count); | ||
4502 | |||
4503 | /* We dirtied the bitmap block */ | ||
4504 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | ||
4505 | err = ext4_journal_dirty_metadata(handle, bitmap_bh); | ||
4506 | |||
4507 | ac.ac_b_ex.fe_group = block_group; | ||
4508 | ac.ac_b_ex.fe_start = bit; | ||
4509 | ac.ac_b_ex.fe_len = count; | ||
4510 | ext4_mb_store_history(&ac); | ||
4511 | |||
4512 | if (metadata) { | ||
4513 | /* blocks being freed are metadata. these blocks shouldn't | ||
4514 | * be used until this transaction is committed */ | ||
4515 | ext4_mb_free_metadata(handle, &e4b, block_group, bit, count); | ||
4516 | } else { | ||
4517 | ext4_lock_group(sb, block_group); | ||
4518 | err = mb_free_blocks(inode, &e4b, bit, count); | ||
4519 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); | ||
4520 | ext4_unlock_group(sb, block_group); | ||
4521 | BUG_ON(err != 0); | ||
4522 | } | ||
4523 | |||
4524 | spin_lock(sb_bgl_lock(sbi, block_group)); | ||
4525 | gdp->bg_free_blocks_count = | ||
4526 | cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); | ||
4527 | gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); | ||
4528 | spin_unlock(sb_bgl_lock(sbi, block_group)); | ||
4529 | percpu_counter_add(&sbi->s_freeblocks_counter, count); | ||
4530 | |||
4531 | ext4_mb_release_desc(&e4b); | ||
4532 | |||
4533 | *freed += count; | ||
4534 | |||
4535 | /* And the group descriptor block */ | ||
4536 | BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); | ||
4537 | ret = ext4_journal_dirty_metadata(handle, gd_bh); | ||
4538 | if (!err) | ||
4539 | err = ret; | ||
4540 | |||
4541 | if (overflow && !err) { | ||
4542 | block += count; | ||
4543 | count = overflow; | ||
4544 | put_bh(bitmap_bh); | ||
4545 | goto do_more; | ||
4546 | } | ||
4547 | sb->s_dirt = 1; | ||
4548 | error_return: | ||
4549 | brelse(bitmap_bh); | ||
4550 | ext4_std_error(sb, err); | ||
4551 | return; | ||
4552 | } | ||
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c new file mode 100644 index 000000000000..3ebc2332f52e --- /dev/null +++ b/fs/ext4/migrate.c | |||
@@ -0,0 +1,560 @@ | |||
1 | /* | ||
2 | * Copyright IBM Corporation, 2007 | ||
3 | * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of version 2.1 of the GNU Lesser General Public License | ||
7 | * as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <linux/module.h> | ||
16 | #include <linux/ext4_jbd2.h> | ||
17 | #include <linux/ext4_fs_extents.h> | ||
18 | |||
19 | /* | ||
20 | * The contiguous blocks details which can be | ||
21 | * represented by a single extent | ||
22 | */ | ||
23 | struct list_blocks_struct { | ||
24 | ext4_lblk_t first_block, last_block; | ||
25 | ext4_fsblk_t first_pblock, last_pblock; | ||
26 | }; | ||
27 | |||
28 | static int finish_range(handle_t *handle, struct inode *inode, | ||
29 | struct list_blocks_struct *lb) | ||
30 | |||
31 | { | ||
32 | int retval = 0, needed; | ||
33 | struct ext4_extent newext; | ||
34 | struct ext4_ext_path *path; | ||
35 | if (lb->first_pblock == 0) | ||
36 | return 0; | ||
37 | |||
38 | /* Add the extent to temp inode*/ | ||
39 | newext.ee_block = cpu_to_le32(lb->first_block); | ||
40 | newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1); | ||
41 | ext4_ext_store_pblock(&newext, lb->first_pblock); | ||
42 | path = ext4_ext_find_extent(inode, lb->first_block, NULL); | ||
43 | |||
44 | if (IS_ERR(path)) { | ||
45 | retval = PTR_ERR(path); | ||
46 | goto err_out; | ||
47 | } | ||
48 | |||
49 | /* | ||
50 | * Calculate the credit needed to inserting this extent | ||
51 | * Since we are doing this in loop we may accumalate extra | ||
52 | * credit. But below we try to not accumalate too much | ||
53 | * of them by restarting the journal. | ||
54 | */ | ||
55 | needed = ext4_ext_calc_credits_for_insert(inode, path); | ||
56 | |||
57 | /* | ||
58 | * Make sure the credit we accumalated is not really high | ||
59 | */ | ||
60 | if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) { | ||
61 | retval = ext4_journal_restart(handle, needed); | ||
62 | if (retval) | ||
63 | goto err_out; | ||
64 | } | ||
65 | if (needed) { | ||
66 | retval = ext4_journal_extend(handle, needed); | ||
67 | if (retval != 0) { | ||
68 | /* | ||
69 | * IF not able to extend the journal restart the journal | ||
70 | */ | ||
71 | retval = ext4_journal_restart(handle, needed); | ||
72 | if (retval) | ||
73 | goto err_out; | ||
74 | } | ||
75 | } | ||
76 | retval = ext4_ext_insert_extent(handle, inode, path, &newext); | ||
77 | err_out: | ||
78 | lb->first_pblock = 0; | ||
79 | return retval; | ||
80 | } | ||
81 | |||
82 | static int update_extent_range(handle_t *handle, struct inode *inode, | ||
83 | ext4_fsblk_t pblock, ext4_lblk_t blk_num, | ||
84 | struct list_blocks_struct *lb) | ||
85 | { | ||
86 | int retval; | ||
87 | /* | ||
88 | * See if we can add on to the existing range (if it exists) | ||
89 | */ | ||
90 | if (lb->first_pblock && | ||
91 | (lb->last_pblock+1 == pblock) && | ||
92 | (lb->last_block+1 == blk_num)) { | ||
93 | lb->last_pblock = pblock; | ||
94 | lb->last_block = blk_num; | ||
95 | return 0; | ||
96 | } | ||
97 | /* | ||
98 | * Start a new range. | ||
99 | */ | ||
100 | retval = finish_range(handle, inode, lb); | ||
101 | lb->first_pblock = lb->last_pblock = pblock; | ||
102 | lb->first_block = lb->last_block = blk_num; | ||
103 | |||
104 | return retval; | ||
105 | } | ||
106 | |||
107 | static int update_ind_extent_range(handle_t *handle, struct inode *inode, | ||
108 | ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, | ||
109 | struct list_blocks_struct *lb) | ||
110 | { | ||
111 | struct buffer_head *bh; | ||
112 | __le32 *i_data; | ||
113 | int i, retval = 0; | ||
114 | ext4_lblk_t blk_count = *blk_nump; | ||
115 | unsigned long max_entries = inode->i_sb->s_blocksize >> 2; | ||
116 | |||
117 | if (!pblock) { | ||
118 | /* Only update the file block number */ | ||
119 | *blk_nump += max_entries; | ||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | bh = sb_bread(inode->i_sb, pblock); | ||
124 | if (!bh) | ||
125 | return -EIO; | ||
126 | |||
127 | i_data = (__le32 *)bh->b_data; | ||
128 | for (i = 0; i < max_entries; i++, blk_count++) { | ||
129 | if (i_data[i]) { | ||
130 | retval = update_extent_range(handle, inode, | ||
131 | le32_to_cpu(i_data[i]), | ||
132 | blk_count, lb); | ||
133 | if (retval) | ||
134 | break; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | /* Update the file block number */ | ||
139 | *blk_nump = blk_count; | ||
140 | put_bh(bh); | ||
141 | return retval; | ||
142 | |||
143 | } | ||
144 | |||
145 | static int update_dind_extent_range(handle_t *handle, struct inode *inode, | ||
146 | ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, | ||
147 | struct list_blocks_struct *lb) | ||
148 | { | ||
149 | struct buffer_head *bh; | ||
150 | __le32 *i_data; | ||
151 | int i, retval = 0; | ||
152 | ext4_lblk_t blk_count = *blk_nump; | ||
153 | unsigned long max_entries = inode->i_sb->s_blocksize >> 2; | ||
154 | |||
155 | if (!pblock) { | ||
156 | /* Only update the file block number */ | ||
157 | *blk_nump += max_entries * max_entries; | ||
158 | return 0; | ||
159 | } | ||
160 | bh = sb_bread(inode->i_sb, pblock); | ||
161 | if (!bh) | ||
162 | return -EIO; | ||
163 | |||
164 | i_data = (__le32 *)bh->b_data; | ||
165 | for (i = 0; i < max_entries; i++) { | ||
166 | if (i_data[i]) { | ||
167 | retval = update_ind_extent_range(handle, inode, | ||
168 | le32_to_cpu(i_data[i]), | ||
169 | &blk_count, lb); | ||
170 | if (retval) | ||
171 | break; | ||
172 | } else { | ||
173 | /* Only update the file block number */ | ||
174 | blk_count += max_entries; | ||
175 | } | ||
176 | } | ||
177 | |||
178 | /* Update the file block number */ | ||
179 | *blk_nump = blk_count; | ||
180 | put_bh(bh); | ||
181 | return retval; | ||
182 | |||
183 | } | ||
184 | |||
185 | static int update_tind_extent_range(handle_t *handle, struct inode *inode, | ||
186 | ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, | ||
187 | struct list_blocks_struct *lb) | ||
188 | { | ||
189 | struct buffer_head *bh; | ||
190 | __le32 *i_data; | ||
191 | int i, retval = 0; | ||
192 | ext4_lblk_t blk_count = *blk_nump; | ||
193 | unsigned long max_entries = inode->i_sb->s_blocksize >> 2; | ||
194 | |||
195 | if (!pblock) { | ||
196 | /* Only update the file block number */ | ||
197 | *blk_nump += max_entries * max_entries * max_entries; | ||
198 | return 0; | ||
199 | } | ||
200 | bh = sb_bread(inode->i_sb, pblock); | ||
201 | if (!bh) | ||
202 | return -EIO; | ||
203 | |||
204 | i_data = (__le32 *)bh->b_data; | ||
205 | for (i = 0; i < max_entries; i++) { | ||
206 | if (i_data[i]) { | ||
207 | retval = update_dind_extent_range(handle, inode, | ||
208 | le32_to_cpu(i_data[i]), | ||
209 | &blk_count, lb); | ||
210 | if (retval) | ||
211 | break; | ||
212 | } else | ||
213 | /* Only update the file block number */ | ||
214 | blk_count += max_entries * max_entries; | ||
215 | } | ||
216 | /* Update the file block number */ | ||
217 | *blk_nump = blk_count; | ||
218 | put_bh(bh); | ||
219 | return retval; | ||
220 | |||
221 | } | ||
222 | |||
223 | static int free_dind_blocks(handle_t *handle, | ||
224 | struct inode *inode, __le32 i_data) | ||
225 | { | ||
226 | int i; | ||
227 | __le32 *tmp_idata; | ||
228 | struct buffer_head *bh; | ||
229 | unsigned long max_entries = inode->i_sb->s_blocksize >> 2; | ||
230 | |||
231 | bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); | ||
232 | if (!bh) | ||
233 | return -EIO; | ||
234 | |||
235 | tmp_idata = (__le32 *)bh->b_data; | ||
236 | for (i = 0; i < max_entries; i++) { | ||
237 | if (tmp_idata[i]) | ||
238 | ext4_free_blocks(handle, inode, | ||
239 | le32_to_cpu(tmp_idata[i]), 1, 1); | ||
240 | } | ||
241 | put_bh(bh); | ||
242 | ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); | ||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | static int free_tind_blocks(handle_t *handle, | ||
247 | struct inode *inode, __le32 i_data) | ||
248 | { | ||
249 | int i, retval = 0; | ||
250 | __le32 *tmp_idata; | ||
251 | struct buffer_head *bh; | ||
252 | unsigned long max_entries = inode->i_sb->s_blocksize >> 2; | ||
253 | |||
254 | bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); | ||
255 | if (!bh) | ||
256 | return -EIO; | ||
257 | |||
258 | tmp_idata = (__le32 *)bh->b_data; | ||
259 | for (i = 0; i < max_entries; i++) { | ||
260 | if (tmp_idata[i]) { | ||
261 | retval = free_dind_blocks(handle, | ||
262 | inode, tmp_idata[i]); | ||
263 | if (retval) { | ||
264 | put_bh(bh); | ||
265 | return retval; | ||
266 | } | ||
267 | } | ||
268 | } | ||
269 | put_bh(bh); | ||
270 | ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); | ||
271 | return 0; | ||
272 | } | ||
273 | |||
274 | static int free_ind_block(handle_t *handle, struct inode *inode) | ||
275 | { | ||
276 | int retval; | ||
277 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
278 | |||
279 | if (ei->i_data[EXT4_IND_BLOCK]) | ||
280 | ext4_free_blocks(handle, inode, | ||
281 | le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1, 1); | ||
282 | |||
283 | if (ei->i_data[EXT4_DIND_BLOCK]) { | ||
284 | retval = free_dind_blocks(handle, inode, | ||
285 | ei->i_data[EXT4_DIND_BLOCK]); | ||
286 | if (retval) | ||
287 | return retval; | ||
288 | } | ||
289 | |||
290 | if (ei->i_data[EXT4_TIND_BLOCK]) { | ||
291 | retval = free_tind_blocks(handle, inode, | ||
292 | ei->i_data[EXT4_TIND_BLOCK]); | ||
293 | if (retval) | ||
294 | return retval; | ||
295 | } | ||
296 | return 0; | ||
297 | } | ||
298 | |||
299 | static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | ||
300 | struct inode *tmp_inode, int retval) | ||
301 | { | ||
302 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
303 | struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode); | ||
304 | |||
305 | retval = free_ind_block(handle, inode); | ||
306 | if (retval) | ||
307 | goto err_out; | ||
308 | |||
309 | /* | ||
310 | * One credit accounted for writing the | ||
311 | * i_data field of the original inode | ||
312 | */ | ||
313 | retval = ext4_journal_extend(handle, 1); | ||
314 | if (retval != 0) { | ||
315 | retval = ext4_journal_restart(handle, 1); | ||
316 | if (retval) | ||
317 | goto err_out; | ||
318 | } | ||
319 | |||
320 | /* | ||
321 | * We have the extent map build with the tmp inode. | ||
322 | * Now copy the i_data across | ||
323 | */ | ||
324 | ei->i_flags |= EXT4_EXTENTS_FL; | ||
325 | memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); | ||
326 | |||
327 | /* | ||
328 | * Update i_blocks with the new blocks that got | ||
329 | * allocated while adding extents for extent index | ||
330 | * blocks. | ||
331 | * | ||
332 | * While converting to extents we need not | ||
333 | * update the orignal inode i_blocks for extent blocks | ||
334 | * via quota APIs. The quota update happened via tmp_inode already. | ||
335 | */ | ||
336 | spin_lock(&inode->i_lock); | ||
337 | inode->i_blocks += tmp_inode->i_blocks; | ||
338 | spin_unlock(&inode->i_lock); | ||
339 | |||
340 | ext4_mark_inode_dirty(handle, inode); | ||
341 | err_out: | ||
342 | return retval; | ||
343 | } | ||
344 | |||
345 | static int free_ext_idx(handle_t *handle, struct inode *inode, | ||
346 | struct ext4_extent_idx *ix) | ||
347 | { | ||
348 | int i, retval = 0; | ||
349 | ext4_fsblk_t block; | ||
350 | struct buffer_head *bh; | ||
351 | struct ext4_extent_header *eh; | ||
352 | |||
353 | block = idx_pblock(ix); | ||
354 | bh = sb_bread(inode->i_sb, block); | ||
355 | if (!bh) | ||
356 | return -EIO; | ||
357 | |||
358 | eh = (struct ext4_extent_header *)bh->b_data; | ||
359 | if (eh->eh_depth != 0) { | ||
360 | ix = EXT_FIRST_INDEX(eh); | ||
361 | for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { | ||
362 | retval = free_ext_idx(handle, inode, ix); | ||
363 | if (retval) | ||
364 | break; | ||
365 | } | ||
366 | } | ||
367 | put_bh(bh); | ||
368 | ext4_free_blocks(handle, inode, block, 1, 1); | ||
369 | return retval; | ||
370 | } | ||
371 | |||
372 | /* | ||
373 | * Free the extent meta data blocks only | ||
374 | */ | ||
375 | static int free_ext_block(handle_t *handle, struct inode *inode) | ||
376 | { | ||
377 | int i, retval = 0; | ||
378 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
379 | struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data; | ||
380 | struct ext4_extent_idx *ix; | ||
381 | if (eh->eh_depth == 0) | ||
382 | /* | ||
383 | * No extra blocks allocated for extent meta data | ||
384 | */ | ||
385 | return 0; | ||
386 | ix = EXT_FIRST_INDEX(eh); | ||
387 | for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { | ||
388 | retval = free_ext_idx(handle, inode, ix); | ||
389 | if (retval) | ||
390 | return retval; | ||
391 | } | ||
392 | return retval; | ||
393 | |||
394 | } | ||
395 | |||
396 | int ext4_ext_migrate(struct inode *inode, struct file *filp, | ||
397 | unsigned int cmd, unsigned long arg) | ||
398 | { | ||
399 | handle_t *handle; | ||
400 | int retval = 0, i; | ||
401 | __le32 *i_data; | ||
402 | ext4_lblk_t blk_count = 0; | ||
403 | struct ext4_inode_info *ei; | ||
404 | struct inode *tmp_inode = NULL; | ||
405 | struct list_blocks_struct lb; | ||
406 | unsigned long max_entries; | ||
407 | |||
408 | if (!test_opt(inode->i_sb, EXTENTS)) | ||
409 | /* | ||
410 | * if mounted with noextents we don't allow the migrate | ||
411 | */ | ||
412 | return -EINVAL; | ||
413 | |||
414 | if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
415 | return -EINVAL; | ||
416 | |||
417 | down_write(&EXT4_I(inode)->i_data_sem); | ||
418 | handle = ext4_journal_start(inode, | ||
419 | EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + | ||
420 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + | ||
421 | 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb) | ||
422 | + 1); | ||
423 | if (IS_ERR(handle)) { | ||
424 | retval = PTR_ERR(handle); | ||
425 | goto err_out; | ||
426 | } | ||
427 | tmp_inode = ext4_new_inode(handle, | ||
428 | inode->i_sb->s_root->d_inode, | ||
429 | S_IFREG); | ||
430 | if (IS_ERR(tmp_inode)) { | ||
431 | retval = -ENOMEM; | ||
432 | ext4_journal_stop(handle); | ||
433 | tmp_inode = NULL; | ||
434 | goto err_out; | ||
435 | } | ||
436 | i_size_write(tmp_inode, i_size_read(inode)); | ||
437 | /* | ||
438 | * We don't want the inode to be reclaimed | ||
439 | * if we got interrupted in between. We have | ||
440 | * this tmp inode carrying reference to the | ||
441 | * data blocks of the original file. We set | ||
442 | * the i_nlink to zero at the last stage after | ||
443 | * switching the original file to extent format | ||
444 | */ | ||
445 | tmp_inode->i_nlink = 1; | ||
446 | |||
447 | ext4_ext_tree_init(handle, tmp_inode); | ||
448 | ext4_orphan_add(handle, tmp_inode); | ||
449 | ext4_journal_stop(handle); | ||
450 | |||
451 | ei = EXT4_I(inode); | ||
452 | i_data = ei->i_data; | ||
453 | memset(&lb, 0, sizeof(lb)); | ||
454 | |||
455 | /* 32 bit block address 4 bytes */ | ||
456 | max_entries = inode->i_sb->s_blocksize >> 2; | ||
457 | |||
458 | /* | ||
459 | * start with one credit accounted for | ||
460 | * superblock modification. | ||
461 | * | ||
462 | * For the tmp_inode we already have commited the | ||
463 | * trascation that created the inode. Later as and | ||
464 | * when we add extents we extent the journal | ||
465 | */ | ||
466 | handle = ext4_journal_start(inode, 1); | ||
467 | for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { | ||
468 | if (i_data[i]) { | ||
469 | retval = update_extent_range(handle, tmp_inode, | ||
470 | le32_to_cpu(i_data[i]), | ||
471 | blk_count, &lb); | ||
472 | if (retval) | ||
473 | goto err_out; | ||
474 | } | ||
475 | } | ||
476 | if (i_data[EXT4_IND_BLOCK]) { | ||
477 | retval = update_ind_extent_range(handle, tmp_inode, | ||
478 | le32_to_cpu(i_data[EXT4_IND_BLOCK]), | ||
479 | &blk_count, &lb); | ||
480 | if (retval) | ||
481 | goto err_out; | ||
482 | } else | ||
483 | blk_count += max_entries; | ||
484 | if (i_data[EXT4_DIND_BLOCK]) { | ||
485 | retval = update_dind_extent_range(handle, tmp_inode, | ||
486 | le32_to_cpu(i_data[EXT4_DIND_BLOCK]), | ||
487 | &blk_count, &lb); | ||
488 | if (retval) | ||
489 | goto err_out; | ||
490 | } else | ||
491 | blk_count += max_entries * max_entries; | ||
492 | if (i_data[EXT4_TIND_BLOCK]) { | ||
493 | retval = update_tind_extent_range(handle, tmp_inode, | ||
494 | le32_to_cpu(i_data[EXT4_TIND_BLOCK]), | ||
495 | &blk_count, &lb); | ||
496 | if (retval) | ||
497 | goto err_out; | ||
498 | } | ||
499 | /* | ||
500 | * Build the last extent | ||
501 | */ | ||
502 | retval = finish_range(handle, tmp_inode, &lb); | ||
503 | err_out: | ||
504 | /* | ||
505 | * We are either freeing extent information or indirect | ||
506 | * blocks. During this we touch superblock, group descriptor | ||
507 | * and block bitmap. Later we mark the tmp_inode dirty | ||
508 | * via ext4_ext_tree_init. So allocate a credit of 4 | ||
509 | * We may update quota (user and group). | ||
510 | * | ||
511 | * FIXME!! we may be touching bitmaps in different block groups. | ||
512 | */ | ||
513 | if (ext4_journal_extend(handle, | ||
514 | 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0) | ||
515 | ext4_journal_restart(handle, | ||
516 | 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); | ||
517 | if (retval) | ||
518 | /* | ||
519 | * Failure case delete the extent information with the | ||
520 | * tmp_inode | ||
521 | */ | ||
522 | free_ext_block(handle, tmp_inode); | ||
523 | else | ||
524 | retval = ext4_ext_swap_inode_data(handle, inode, | ||
525 | tmp_inode, retval); | ||
526 | |||
527 | /* | ||
528 | * Mark the tmp_inode as of size zero | ||
529 | */ | ||
530 | i_size_write(tmp_inode, 0); | ||
531 | |||
532 | /* | ||
533 | * set the i_blocks count to zero | ||
534 | * so that the ext4_delete_inode does the | ||
535 | * right job | ||
536 | * | ||
537 | * We don't need to take the i_lock because | ||
538 | * the inode is not visible to user space. | ||
539 | */ | ||
540 | tmp_inode->i_blocks = 0; | ||
541 | |||
542 | /* Reset the extent details */ | ||
543 | ext4_ext_tree_init(handle, tmp_inode); | ||
544 | |||
545 | /* | ||
546 | * Set the i_nlink to zero so that | ||
547 | * generic_drop_inode really deletes the | ||
548 | * inode | ||
549 | */ | ||
550 | tmp_inode->i_nlink = 0; | ||
551 | |||
552 | ext4_journal_stop(handle); | ||
553 | |||
554 | up_write(&EXT4_I(inode)->i_data_sem); | ||
555 | |||
556 | if (tmp_inode) | ||
557 | iput(tmp_inode); | ||
558 | |||
559 | return retval; | ||
560 | } | ||
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 94ee6f315dc1..67b6d8a1ceff 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -51,7 +51,7 @@ | |||
51 | 51 | ||
52 | static struct buffer_head *ext4_append(handle_t *handle, | 52 | static struct buffer_head *ext4_append(handle_t *handle, |
53 | struct inode *inode, | 53 | struct inode *inode, |
54 | u32 *block, int *err) | 54 | ext4_lblk_t *block, int *err) |
55 | { | 55 | { |
56 | struct buffer_head *bh; | 56 | struct buffer_head *bh; |
57 | 57 | ||
@@ -144,8 +144,8 @@ struct dx_map_entry | |||
144 | u16 size; | 144 | u16 size; |
145 | }; | 145 | }; |
146 | 146 | ||
147 | static inline unsigned dx_get_block (struct dx_entry *entry); | 147 | static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); |
148 | static void dx_set_block (struct dx_entry *entry, unsigned value); | 148 | static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); |
149 | static inline unsigned dx_get_hash (struct dx_entry *entry); | 149 | static inline unsigned dx_get_hash (struct dx_entry *entry); |
150 | static void dx_set_hash (struct dx_entry *entry, unsigned value); | 150 | static void dx_set_hash (struct dx_entry *entry, unsigned value); |
151 | static unsigned dx_get_count (struct dx_entry *entries); | 151 | static unsigned dx_get_count (struct dx_entry *entries); |
@@ -166,7 +166,8 @@ static void dx_sort_map(struct dx_map_entry *map, unsigned count); | |||
166 | static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to, | 166 | static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to, |
167 | struct dx_map_entry *offsets, int count); | 167 | struct dx_map_entry *offsets, int count); |
168 | static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size); | 168 | static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size); |
169 | static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); | 169 | static void dx_insert_block(struct dx_frame *frame, |
170 | u32 hash, ext4_lblk_t block); | ||
170 | static int ext4_htree_next_block(struct inode *dir, __u32 hash, | 171 | static int ext4_htree_next_block(struct inode *dir, __u32 hash, |
171 | struct dx_frame *frame, | 172 | struct dx_frame *frame, |
172 | struct dx_frame *frames, | 173 | struct dx_frame *frames, |
@@ -181,12 +182,12 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
181 | * Mask them off for now. | 182 | * Mask them off for now. |
182 | */ | 183 | */ |
183 | 184 | ||
184 | static inline unsigned dx_get_block (struct dx_entry *entry) | 185 | static inline ext4_lblk_t dx_get_block(struct dx_entry *entry) |
185 | { | 186 | { |
186 | return le32_to_cpu(entry->block) & 0x00ffffff; | 187 | return le32_to_cpu(entry->block) & 0x00ffffff; |
187 | } | 188 | } |
188 | 189 | ||
189 | static inline void dx_set_block (struct dx_entry *entry, unsigned value) | 190 | static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value) |
190 | { | 191 | { |
191 | entry->block = cpu_to_le32(value); | 192 | entry->block = cpu_to_le32(value); |
192 | } | 193 | } |
@@ -243,8 +244,8 @@ static void dx_show_index (char * label, struct dx_entry *entries) | |||
243 | int i, n = dx_get_count (entries); | 244 | int i, n = dx_get_count (entries); |
244 | printk("%s index ", label); | 245 | printk("%s index ", label); |
245 | for (i = 0; i < n; i++) { | 246 | for (i = 0; i < n; i++) { |
246 | printk("%x->%u ", i? dx_get_hash(entries + i) : | 247 | printk("%x->%lu ", i? dx_get_hash(entries + i) : |
247 | 0, dx_get_block(entries + i)); | 248 | 0, (unsigned long)dx_get_block(entries + i)); |
248 | } | 249 | } |
249 | printk("\n"); | 250 | printk("\n"); |
250 | } | 251 | } |
@@ -280,7 +281,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent | |||
280 | space += EXT4_DIR_REC_LEN(de->name_len); | 281 | space += EXT4_DIR_REC_LEN(de->name_len); |
281 | names++; | 282 | names++; |
282 | } | 283 | } |
283 | de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); | 284 | de = ext4_next_entry(de); |
284 | } | 285 | } |
285 | printk("(%i)\n", names); | 286 | printk("(%i)\n", names); |
286 | return (struct stats) { names, space, 1 }; | 287 | return (struct stats) { names, space, 1 }; |
@@ -297,7 +298,8 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
297 | printk("%i indexed blocks...\n", count); | 298 | printk("%i indexed blocks...\n", count); |
298 | for (i = 0; i < count; i++, entries++) | 299 | for (i = 0; i < count; i++, entries++) |
299 | { | 300 | { |
300 | u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0; | 301 | ext4_lblk_t block = dx_get_block(entries); |
302 | ext4_lblk_t hash = i ? dx_get_hash(entries): 0; | ||
301 | u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; | 303 | u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; |
302 | struct stats stats; | 304 | struct stats stats; |
303 | printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); | 305 | printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); |
@@ -551,7 +553,8 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, | |||
551 | */ | 553 | */ |
552 | static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *p) | 554 | static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *p) |
553 | { | 555 | { |
554 | return (struct ext4_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len)); | 556 | return (struct ext4_dir_entry_2 *)((char *)p + |
557 | ext4_rec_len_from_disk(p->rec_len)); | ||
555 | } | 558 | } |
556 | 559 | ||
557 | /* | 560 | /* |
@@ -560,7 +563,7 @@ static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 * | |||
560 | * into the tree. If there is an error it is returned in err. | 563 | * into the tree. If there is an error it is returned in err. |
561 | */ | 564 | */ |
562 | static int htree_dirblock_to_tree(struct file *dir_file, | 565 | static int htree_dirblock_to_tree(struct file *dir_file, |
563 | struct inode *dir, int block, | 566 | struct inode *dir, ext4_lblk_t block, |
564 | struct dx_hash_info *hinfo, | 567 | struct dx_hash_info *hinfo, |
565 | __u32 start_hash, __u32 start_minor_hash) | 568 | __u32 start_hash, __u32 start_minor_hash) |
566 | { | 569 | { |
@@ -568,7 +571,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
568 | struct ext4_dir_entry_2 *de, *top; | 571 | struct ext4_dir_entry_2 *de, *top; |
569 | int err, count = 0; | 572 | int err, count = 0; |
570 | 573 | ||
571 | dxtrace(printk("In htree dirblock_to_tree: block %d\n", block)); | 574 | dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", |
575 | (unsigned long)block)); | ||
572 | if (!(bh = ext4_bread (NULL, dir, block, 0, &err))) | 576 | if (!(bh = ext4_bread (NULL, dir, block, 0, &err))) |
573 | return err; | 577 | return err; |
574 | 578 | ||
@@ -620,9 +624,9 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
620 | struct ext4_dir_entry_2 *de; | 624 | struct ext4_dir_entry_2 *de; |
621 | struct dx_frame frames[2], *frame; | 625 | struct dx_frame frames[2], *frame; |
622 | struct inode *dir; | 626 | struct inode *dir; |
623 | int block, err; | 627 | ext4_lblk_t block; |
624 | int count = 0; | 628 | int count = 0; |
625 | int ret; | 629 | int ret, err; |
626 | __u32 hashval; | 630 | __u32 hashval; |
627 | 631 | ||
628 | dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, | 632 | dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, |
@@ -720,7 +724,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size, | |||
720 | cond_resched(); | 724 | cond_resched(); |
721 | } | 725 | } |
722 | /* XXX: do we need to check rec_len == 0 case? -Chris */ | 726 | /* XXX: do we need to check rec_len == 0 case? -Chris */ |
723 | de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); | 727 | de = ext4_next_entry(de); |
724 | } | 728 | } |
725 | return count; | 729 | return count; |
726 | } | 730 | } |
@@ -752,7 +756,7 @@ static void dx_sort_map (struct dx_map_entry *map, unsigned count) | |||
752 | } while(more); | 756 | } while(more); |
753 | } | 757 | } |
754 | 758 | ||
755 | static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) | 759 | static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) |
756 | { | 760 | { |
757 | struct dx_entry *entries = frame->entries; | 761 | struct dx_entry *entries = frame->entries; |
758 | struct dx_entry *old = frame->at, *new = old + 1; | 762 | struct dx_entry *old = frame->at, *new = old + 1; |
@@ -820,7 +824,7 @@ static inline int search_dirblock(struct buffer_head * bh, | |||
820 | return 1; | 824 | return 1; |
821 | } | 825 | } |
822 | /* prevent looping on a bad block */ | 826 | /* prevent looping on a bad block */ |
823 | de_len = le16_to_cpu(de->rec_len); | 827 | de_len = ext4_rec_len_from_disk(de->rec_len); |
824 | if (de_len <= 0) | 828 | if (de_len <= 0) |
825 | return -1; | 829 | return -1; |
826 | offset += de_len; | 830 | offset += de_len; |
@@ -847,23 +851,20 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry, | |||
847 | struct super_block * sb; | 851 | struct super_block * sb; |
848 | struct buffer_head * bh_use[NAMEI_RA_SIZE]; | 852 | struct buffer_head * bh_use[NAMEI_RA_SIZE]; |
849 | struct buffer_head * bh, *ret = NULL; | 853 | struct buffer_head * bh, *ret = NULL; |
850 | unsigned long start, block, b; | 854 | ext4_lblk_t start, block, b; |
851 | int ra_max = 0; /* Number of bh's in the readahead | 855 | int ra_max = 0; /* Number of bh's in the readahead |
852 | buffer, bh_use[] */ | 856 | buffer, bh_use[] */ |
853 | int ra_ptr = 0; /* Current index into readahead | 857 | int ra_ptr = 0; /* Current index into readahead |
854 | buffer */ | 858 | buffer */ |
855 | int num = 0; | 859 | int num = 0; |
856 | int nblocks, i, err; | 860 | ext4_lblk_t nblocks; |
861 | int i, err; | ||
857 | struct inode *dir = dentry->d_parent->d_inode; | 862 | struct inode *dir = dentry->d_parent->d_inode; |
858 | int namelen; | 863 | int namelen; |
859 | const u8 *name; | ||
860 | unsigned blocksize; | ||
861 | 864 | ||
862 | *res_dir = NULL; | 865 | *res_dir = NULL; |
863 | sb = dir->i_sb; | 866 | sb = dir->i_sb; |
864 | blocksize = sb->s_blocksize; | ||
865 | namelen = dentry->d_name.len; | 867 | namelen = dentry->d_name.len; |
866 | name = dentry->d_name.name; | ||
867 | if (namelen > EXT4_NAME_LEN) | 868 | if (namelen > EXT4_NAME_LEN) |
868 | return NULL; | 869 | return NULL; |
869 | if (is_dx(dir)) { | 870 | if (is_dx(dir)) { |
@@ -914,7 +915,8 @@ restart: | |||
914 | if (!buffer_uptodate(bh)) { | 915 | if (!buffer_uptodate(bh)) { |
915 | /* read error, skip block & hope for the best */ | 916 | /* read error, skip block & hope for the best */ |
916 | ext4_error(sb, __FUNCTION__, "reading directory #%lu " | 917 | ext4_error(sb, __FUNCTION__, "reading directory #%lu " |
917 | "offset %lu", dir->i_ino, block); | 918 | "offset %lu", dir->i_ino, |
919 | (unsigned long)block); | ||
918 | brelse(bh); | 920 | brelse(bh); |
919 | goto next; | 921 | goto next; |
920 | } | 922 | } |
@@ -961,7 +963,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | |||
961 | struct dx_frame frames[2], *frame; | 963 | struct dx_frame frames[2], *frame; |
962 | struct ext4_dir_entry_2 *de, *top; | 964 | struct ext4_dir_entry_2 *de, *top; |
963 | struct buffer_head *bh; | 965 | struct buffer_head *bh; |
964 | unsigned long block; | 966 | ext4_lblk_t block; |
965 | int retval; | 967 | int retval; |
966 | int namelen = dentry->d_name.len; | 968 | int namelen = dentry->d_name.len; |
967 | const u8 *name = dentry->d_name.name; | 969 | const u8 *name = dentry->d_name.name; |
@@ -1128,7 +1130,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) | |||
1128 | rec_len = EXT4_DIR_REC_LEN(de->name_len); | 1130 | rec_len = EXT4_DIR_REC_LEN(de->name_len); |
1129 | memcpy (to, de, rec_len); | 1131 | memcpy (to, de, rec_len); |
1130 | ((struct ext4_dir_entry_2 *) to)->rec_len = | 1132 | ((struct ext4_dir_entry_2 *) to)->rec_len = |
1131 | cpu_to_le16(rec_len); | 1133 | ext4_rec_len_to_disk(rec_len); |
1132 | de->inode = 0; | 1134 | de->inode = 0; |
1133 | map++; | 1135 | map++; |
1134 | to += rec_len; | 1136 | to += rec_len; |
@@ -1147,13 +1149,12 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size) | |||
1147 | 1149 | ||
1148 | prev = to = de; | 1150 | prev = to = de; |
1149 | while ((char*)de < base + size) { | 1151 | while ((char*)de < base + size) { |
1150 | next = (struct ext4_dir_entry_2 *) ((char *) de + | 1152 | next = ext4_next_entry(de); |
1151 | le16_to_cpu(de->rec_len)); | ||
1152 | if (de->inode && de->name_len) { | 1153 | if (de->inode && de->name_len) { |
1153 | rec_len = EXT4_DIR_REC_LEN(de->name_len); | 1154 | rec_len = EXT4_DIR_REC_LEN(de->name_len); |
1154 | if (de > to) | 1155 | if (de > to) |
1155 | memmove(to, de, rec_len); | 1156 | memmove(to, de, rec_len); |
1156 | to->rec_len = cpu_to_le16(rec_len); | 1157 | to->rec_len = ext4_rec_len_to_disk(rec_len); |
1157 | prev = to; | 1158 | prev = to; |
1158 | to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len); | 1159 | to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len); |
1159 | } | 1160 | } |
@@ -1174,7 +1175,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1174 | unsigned blocksize = dir->i_sb->s_blocksize; | 1175 | unsigned blocksize = dir->i_sb->s_blocksize; |
1175 | unsigned count, continued; | 1176 | unsigned count, continued; |
1176 | struct buffer_head *bh2; | 1177 | struct buffer_head *bh2; |
1177 | u32 newblock; | 1178 | ext4_lblk_t newblock; |
1178 | u32 hash2; | 1179 | u32 hash2; |
1179 | struct dx_map_entry *map; | 1180 | struct dx_map_entry *map; |
1180 | char *data1 = (*bh)->b_data, *data2; | 1181 | char *data1 = (*bh)->b_data, *data2; |
@@ -1221,14 +1222,15 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1221 | split = count - move; | 1222 | split = count - move; |
1222 | hash2 = map[split].hash; | 1223 | hash2 = map[split].hash; |
1223 | continued = hash2 == map[split - 1].hash; | 1224 | continued = hash2 == map[split - 1].hash; |
1224 | dxtrace(printk("Split block %i at %x, %i/%i\n", | 1225 | dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n", |
1225 | dx_get_block(frame->at), hash2, split, count-split)); | 1226 | (unsigned long)dx_get_block(frame->at), |
1227 | hash2, split, count-split)); | ||
1226 | 1228 | ||
1227 | /* Fancy dance to stay within two buffers */ | 1229 | /* Fancy dance to stay within two buffers */ |
1228 | de2 = dx_move_dirents(data1, data2, map + split, count - split); | 1230 | de2 = dx_move_dirents(data1, data2, map + split, count - split); |
1229 | de = dx_pack_dirents(data1,blocksize); | 1231 | de = dx_pack_dirents(data1,blocksize); |
1230 | de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); | 1232 | de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); |
1231 | de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2); | 1233 | de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2); |
1232 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); | 1234 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); |
1233 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); | 1235 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); |
1234 | 1236 | ||
@@ -1297,7 +1299,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1297 | return -EEXIST; | 1299 | return -EEXIST; |
1298 | } | 1300 | } |
1299 | nlen = EXT4_DIR_REC_LEN(de->name_len); | 1301 | nlen = EXT4_DIR_REC_LEN(de->name_len); |
1300 | rlen = le16_to_cpu(de->rec_len); | 1302 | rlen = ext4_rec_len_from_disk(de->rec_len); |
1301 | if ((de->inode? rlen - nlen: rlen) >= reclen) | 1303 | if ((de->inode? rlen - nlen: rlen) >= reclen) |
1302 | break; | 1304 | break; |
1303 | de = (struct ext4_dir_entry_2 *)((char *)de + rlen); | 1305 | de = (struct ext4_dir_entry_2 *)((char *)de + rlen); |
@@ -1316,11 +1318,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1316 | 1318 | ||
1317 | /* By now the buffer is marked for journaling */ | 1319 | /* By now the buffer is marked for journaling */ |
1318 | nlen = EXT4_DIR_REC_LEN(de->name_len); | 1320 | nlen = EXT4_DIR_REC_LEN(de->name_len); |
1319 | rlen = le16_to_cpu(de->rec_len); | 1321 | rlen = ext4_rec_len_from_disk(de->rec_len); |
1320 | if (de->inode) { | 1322 | if (de->inode) { |
1321 | struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen); | 1323 | struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen); |
1322 | de1->rec_len = cpu_to_le16(rlen - nlen); | 1324 | de1->rec_len = ext4_rec_len_to_disk(rlen - nlen); |
1323 | de->rec_len = cpu_to_le16(nlen); | 1325 | de->rec_len = ext4_rec_len_to_disk(nlen); |
1324 | de = de1; | 1326 | de = de1; |
1325 | } | 1327 | } |
1326 | de->file_type = EXT4_FT_UNKNOWN; | 1328 | de->file_type = EXT4_FT_UNKNOWN; |
@@ -1374,7 +1376,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1374 | int retval; | 1376 | int retval; |
1375 | unsigned blocksize; | 1377 | unsigned blocksize; |
1376 | struct dx_hash_info hinfo; | 1378 | struct dx_hash_info hinfo; |
1377 | u32 block; | 1379 | ext4_lblk_t block; |
1378 | struct fake_dirent *fde; | 1380 | struct fake_dirent *fde; |
1379 | 1381 | ||
1380 | blocksize = dir->i_sb->s_blocksize; | 1382 | blocksize = dir->i_sb->s_blocksize; |
@@ -1397,17 +1399,18 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1397 | 1399 | ||
1398 | /* The 0th block becomes the root, move the dirents out */ | 1400 | /* The 0th block becomes the root, move the dirents out */ |
1399 | fde = &root->dotdot; | 1401 | fde = &root->dotdot; |
1400 | de = (struct ext4_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len)); | 1402 | de = (struct ext4_dir_entry_2 *)((char *)fde + |
1403 | ext4_rec_len_from_disk(fde->rec_len)); | ||
1401 | len = ((char *) root) + blocksize - (char *) de; | 1404 | len = ((char *) root) + blocksize - (char *) de; |
1402 | memcpy (data1, de, len); | 1405 | memcpy (data1, de, len); |
1403 | de = (struct ext4_dir_entry_2 *) data1; | 1406 | de = (struct ext4_dir_entry_2 *) data1; |
1404 | top = data1 + len; | 1407 | top = data1 + len; |
1405 | while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top) | 1408 | while ((char *)(de2 = ext4_next_entry(de)) < top) |
1406 | de = de2; | 1409 | de = de2; |
1407 | de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); | 1410 | de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); |
1408 | /* Initialize the root; the dot dirents already exist */ | 1411 | /* Initialize the root; the dot dirents already exist */ |
1409 | de = (struct ext4_dir_entry_2 *) (&root->dotdot); | 1412 | de = (struct ext4_dir_entry_2 *) (&root->dotdot); |
1410 | de->rec_len = cpu_to_le16(blocksize - EXT4_DIR_REC_LEN(2)); | 1413 | de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2)); |
1411 | memset (&root->info, 0, sizeof(root->info)); | 1414 | memset (&root->info, 0, sizeof(root->info)); |
1412 | root->info.info_length = sizeof(root->info); | 1415 | root->info.info_length = sizeof(root->info); |
1413 | root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; | 1416 | root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; |
@@ -1454,7 +1457,7 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry, | |||
1454 | int retval; | 1457 | int retval; |
1455 | int dx_fallback=0; | 1458 | int dx_fallback=0; |
1456 | unsigned blocksize; | 1459 | unsigned blocksize; |
1457 | u32 block, blocks; | 1460 | ext4_lblk_t block, blocks; |
1458 | 1461 | ||
1459 | sb = dir->i_sb; | 1462 | sb = dir->i_sb; |
1460 | blocksize = sb->s_blocksize; | 1463 | blocksize = sb->s_blocksize; |
@@ -1487,7 +1490,7 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry, | |||
1487 | return retval; | 1490 | return retval; |
1488 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 1491 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
1489 | de->inode = 0; | 1492 | de->inode = 0; |
1490 | de->rec_len = cpu_to_le16(blocksize); | 1493 | de->rec_len = ext4_rec_len_to_disk(blocksize); |
1491 | return add_dirent_to_buf(handle, dentry, inode, de, bh); | 1494 | return add_dirent_to_buf(handle, dentry, inode, de, bh); |
1492 | } | 1495 | } |
1493 | 1496 | ||
@@ -1531,7 +1534,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1531 | dx_get_count(entries), dx_get_limit(entries))); | 1534 | dx_get_count(entries), dx_get_limit(entries))); |
1532 | /* Need to split index? */ | 1535 | /* Need to split index? */ |
1533 | if (dx_get_count(entries) == dx_get_limit(entries)) { | 1536 | if (dx_get_count(entries) == dx_get_limit(entries)) { |
1534 | u32 newblock; | 1537 | ext4_lblk_t newblock; |
1535 | unsigned icount = dx_get_count(entries); | 1538 | unsigned icount = dx_get_count(entries); |
1536 | int levels = frame - frames; | 1539 | int levels = frame - frames; |
1537 | struct dx_entry *entries2; | 1540 | struct dx_entry *entries2; |
@@ -1550,7 +1553,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1550 | goto cleanup; | 1553 | goto cleanup; |
1551 | node2 = (struct dx_node *)(bh2->b_data); | 1554 | node2 = (struct dx_node *)(bh2->b_data); |
1552 | entries2 = node2->entries; | 1555 | entries2 = node2->entries; |
1553 | node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); | 1556 | node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize); |
1554 | node2->fake.inode = 0; | 1557 | node2->fake.inode = 0; |
1555 | BUFFER_TRACE(frame->bh, "get_write_access"); | 1558 | BUFFER_TRACE(frame->bh, "get_write_access"); |
1556 | err = ext4_journal_get_write_access(handle, frame->bh); | 1559 | err = ext4_journal_get_write_access(handle, frame->bh); |
@@ -1648,9 +1651,9 @@ static int ext4_delete_entry (handle_t *handle, | |||
1648 | BUFFER_TRACE(bh, "get_write_access"); | 1651 | BUFFER_TRACE(bh, "get_write_access"); |
1649 | ext4_journal_get_write_access(handle, bh); | 1652 | ext4_journal_get_write_access(handle, bh); |
1650 | if (pde) | 1653 | if (pde) |
1651 | pde->rec_len = | 1654 | pde->rec_len = ext4_rec_len_to_disk( |
1652 | cpu_to_le16(le16_to_cpu(pde->rec_len) + | 1655 | ext4_rec_len_from_disk(pde->rec_len) + |
1653 | le16_to_cpu(de->rec_len)); | 1656 | ext4_rec_len_from_disk(de->rec_len)); |
1654 | else | 1657 | else |
1655 | de->inode = 0; | 1658 | de->inode = 0; |
1656 | dir->i_version++; | 1659 | dir->i_version++; |
@@ -1658,10 +1661,9 @@ static int ext4_delete_entry (handle_t *handle, | |||
1658 | ext4_journal_dirty_metadata(handle, bh); | 1661 | ext4_journal_dirty_metadata(handle, bh); |
1659 | return 0; | 1662 | return 0; |
1660 | } | 1663 | } |
1661 | i += le16_to_cpu(de->rec_len); | 1664 | i += ext4_rec_len_from_disk(de->rec_len); |
1662 | pde = de; | 1665 | pde = de; |
1663 | de = (struct ext4_dir_entry_2 *) | 1666 | de = ext4_next_entry(de); |
1664 | ((char *) de + le16_to_cpu(de->rec_len)); | ||
1665 | } | 1667 | } |
1666 | return -ENOENT; | 1668 | return -ENOENT; |
1667 | } | 1669 | } |
@@ -1824,13 +1826,13 @@ retry: | |||
1824 | de = (struct ext4_dir_entry_2 *) dir_block->b_data; | 1826 | de = (struct ext4_dir_entry_2 *) dir_block->b_data; |
1825 | de->inode = cpu_to_le32(inode->i_ino); | 1827 | de->inode = cpu_to_le32(inode->i_ino); |
1826 | de->name_len = 1; | 1828 | de->name_len = 1; |
1827 | de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de->name_len)); | 1829 | de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len)); |
1828 | strcpy (de->name, "."); | 1830 | strcpy (de->name, "."); |
1829 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); | 1831 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); |
1830 | de = (struct ext4_dir_entry_2 *) | 1832 | de = ext4_next_entry(de); |
1831 | ((char *) de + le16_to_cpu(de->rec_len)); | ||
1832 | de->inode = cpu_to_le32(dir->i_ino); | 1833 | de->inode = cpu_to_le32(dir->i_ino); |
1833 | de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT4_DIR_REC_LEN(1)); | 1834 | de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize - |
1835 | EXT4_DIR_REC_LEN(1)); | ||
1834 | de->name_len = 2; | 1836 | de->name_len = 2; |
1835 | strcpy (de->name, ".."); | 1837 | strcpy (de->name, ".."); |
1836 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); | 1838 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); |
@@ -1882,8 +1884,7 @@ static int empty_dir (struct inode * inode) | |||
1882 | return 1; | 1884 | return 1; |
1883 | } | 1885 | } |
1884 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 1886 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
1885 | de1 = (struct ext4_dir_entry_2 *) | 1887 | de1 = ext4_next_entry(de); |
1886 | ((char *) de + le16_to_cpu(de->rec_len)); | ||
1887 | if (le32_to_cpu(de->inode) != inode->i_ino || | 1888 | if (le32_to_cpu(de->inode) != inode->i_ino || |
1888 | !le32_to_cpu(de1->inode) || | 1889 | !le32_to_cpu(de1->inode) || |
1889 | strcmp (".", de->name) || | 1890 | strcmp (".", de->name) || |
@@ -1894,9 +1895,9 @@ static int empty_dir (struct inode * inode) | |||
1894 | brelse (bh); | 1895 | brelse (bh); |
1895 | return 1; | 1896 | return 1; |
1896 | } | 1897 | } |
1897 | offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len); | 1898 | offset = ext4_rec_len_from_disk(de->rec_len) + |
1898 | de = (struct ext4_dir_entry_2 *) | 1899 | ext4_rec_len_from_disk(de1->rec_len); |
1899 | ((char *) de1 + le16_to_cpu(de1->rec_len)); | 1900 | de = ext4_next_entry(de1); |
1900 | while (offset < inode->i_size ) { | 1901 | while (offset < inode->i_size ) { |
1901 | if (!bh || | 1902 | if (!bh || |
1902 | (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { | 1903 | (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { |
@@ -1925,9 +1926,8 @@ static int empty_dir (struct inode * inode) | |||
1925 | brelse (bh); | 1926 | brelse (bh); |
1926 | return 0; | 1927 | return 0; |
1927 | } | 1928 | } |
1928 | offset += le16_to_cpu(de->rec_len); | 1929 | offset += ext4_rec_len_from_disk(de->rec_len); |
1929 | de = (struct ext4_dir_entry_2 *) | 1930 | de = ext4_next_entry(de); |
1930 | ((char *) de + le16_to_cpu(de->rec_len)); | ||
1931 | } | 1931 | } |
1932 | brelse (bh); | 1932 | brelse (bh); |
1933 | return 1; | 1933 | return 1; |
@@ -2282,8 +2282,7 @@ retry: | |||
2282 | } | 2282 | } |
2283 | 2283 | ||
2284 | #define PARENT_INO(buffer) \ | 2284 | #define PARENT_INO(buffer) \ |
2285 | ((struct ext4_dir_entry_2 *) ((char *) buffer + \ | 2285 | (ext4_next_entry((struct ext4_dir_entry_2 *)(buffer))->inode) |
2286 | le16_to_cpu(((struct ext4_dir_entry_2 *) buffer)->rec_len)))->inode | ||
2287 | 2286 | ||
2288 | /* | 2287 | /* |
2289 | * Anybody can rename anything with this: the permission checks are left to the | 2288 | * Anybody can rename anything with this: the permission checks are left to the |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index bd8a52bb3999..4fbba60816f4 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -28,7 +28,7 @@ static int verify_group_input(struct super_block *sb, | |||
28 | struct ext4_super_block *es = sbi->s_es; | 28 | struct ext4_super_block *es = sbi->s_es; |
29 | ext4_fsblk_t start = ext4_blocks_count(es); | 29 | ext4_fsblk_t start = ext4_blocks_count(es); |
30 | ext4_fsblk_t end = start + input->blocks_count; | 30 | ext4_fsblk_t end = start + input->blocks_count; |
31 | unsigned group = input->group; | 31 | ext4_group_t group = input->group; |
32 | ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; | 32 | ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; |
33 | unsigned overhead = ext4_bg_has_super(sb, group) ? | 33 | unsigned overhead = ext4_bg_has_super(sb, group) ? |
34 | (1 + ext4_bg_num_gdb(sb, group) + | 34 | (1 + ext4_bg_num_gdb(sb, group) + |
@@ -206,7 +206,7 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
206 | } | 206 | } |
207 | 207 | ||
208 | if (ext4_bg_has_super(sb, input->group)) { | 208 | if (ext4_bg_has_super(sb, input->group)) { |
209 | ext4_debug("mark backup superblock %#04lx (+0)\n", start); | 209 | ext4_debug("mark backup superblock %#04llx (+0)\n", start); |
210 | ext4_set_bit(0, bh->b_data); | 210 | ext4_set_bit(0, bh->b_data); |
211 | } | 211 | } |
212 | 212 | ||
@@ -215,7 +215,7 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
215 | i < gdblocks; i++, block++, bit++) { | 215 | i < gdblocks; i++, block++, bit++) { |
216 | struct buffer_head *gdb; | 216 | struct buffer_head *gdb; |
217 | 217 | ||
218 | ext4_debug("update backup group %#04lx (+%d)\n", block, bit); | 218 | ext4_debug("update backup group %#04llx (+%d)\n", block, bit); |
219 | 219 | ||
220 | if ((err = extend_or_restart_transaction(handle, 1, bh))) | 220 | if ((err = extend_or_restart_transaction(handle, 1, bh))) |
221 | goto exit_bh; | 221 | goto exit_bh; |
@@ -243,7 +243,7 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
243 | i < reserved_gdb; i++, block++, bit++) { | 243 | i < reserved_gdb; i++, block++, bit++) { |
244 | struct buffer_head *gdb; | 244 | struct buffer_head *gdb; |
245 | 245 | ||
246 | ext4_debug("clear reserved block %#04lx (+%d)\n", block, bit); | 246 | ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit); |
247 | 247 | ||
248 | if ((err = extend_or_restart_transaction(handle, 1, bh))) | 248 | if ((err = extend_or_restart_transaction(handle, 1, bh))) |
249 | goto exit_bh; | 249 | goto exit_bh; |
@@ -256,10 +256,10 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
256 | ext4_set_bit(bit, bh->b_data); | 256 | ext4_set_bit(bit, bh->b_data); |
257 | brelse(gdb); | 257 | brelse(gdb); |
258 | } | 258 | } |
259 | ext4_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap, | 259 | ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, |
260 | input->block_bitmap - start); | 260 | input->block_bitmap - start); |
261 | ext4_set_bit(input->block_bitmap - start, bh->b_data); | 261 | ext4_set_bit(input->block_bitmap - start, bh->b_data); |
262 | ext4_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap, | 262 | ext4_debug("mark inode bitmap %#04llx (+%llu)\n", input->inode_bitmap, |
263 | input->inode_bitmap - start); | 263 | input->inode_bitmap - start); |
264 | ext4_set_bit(input->inode_bitmap - start, bh->b_data); | 264 | ext4_set_bit(input->inode_bitmap - start, bh->b_data); |
265 | 265 | ||
@@ -268,7 +268,7 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
268 | i < sbi->s_itb_per_group; i++, bit++, block++) { | 268 | i < sbi->s_itb_per_group; i++, bit++, block++) { |
269 | struct buffer_head *it; | 269 | struct buffer_head *it; |
270 | 270 | ||
271 | ext4_debug("clear inode block %#04lx (+%d)\n", block, bit); | 271 | ext4_debug("clear inode block %#04llx (+%d)\n", block, bit); |
272 | 272 | ||
273 | if ((err = extend_or_restart_transaction(handle, 1, bh))) | 273 | if ((err = extend_or_restart_transaction(handle, 1, bh))) |
274 | goto exit_bh; | 274 | goto exit_bh; |
@@ -291,7 +291,7 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
291 | brelse(bh); | 291 | brelse(bh); |
292 | 292 | ||
293 | /* Mark unused entries in inode bitmap used */ | 293 | /* Mark unused entries in inode bitmap used */ |
294 | ext4_debug("clear inode bitmap %#04x (+%ld)\n", | 294 | ext4_debug("clear inode bitmap %#04llx (+%llu)\n", |
295 | input->inode_bitmap, input->inode_bitmap - start); | 295 | input->inode_bitmap, input->inode_bitmap - start); |
296 | if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { | 296 | if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { |
297 | err = PTR_ERR(bh); | 297 | err = PTR_ERR(bh); |
@@ -357,7 +357,7 @@ static int verify_reserved_gdb(struct super_block *sb, | |||
357 | struct buffer_head *primary) | 357 | struct buffer_head *primary) |
358 | { | 358 | { |
359 | const ext4_fsblk_t blk = primary->b_blocknr; | 359 | const ext4_fsblk_t blk = primary->b_blocknr; |
360 | const unsigned long end = EXT4_SB(sb)->s_groups_count; | 360 | const ext4_group_t end = EXT4_SB(sb)->s_groups_count; |
361 | unsigned three = 1; | 361 | unsigned three = 1; |
362 | unsigned five = 5; | 362 | unsigned five = 5; |
363 | unsigned seven = 7; | 363 | unsigned seven = 7; |
@@ -656,12 +656,12 @@ static void update_backups(struct super_block *sb, | |||
656 | int blk_off, char *data, int size) | 656 | int blk_off, char *data, int size) |
657 | { | 657 | { |
658 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 658 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
659 | const unsigned long last = sbi->s_groups_count; | 659 | const ext4_group_t last = sbi->s_groups_count; |
660 | const int bpg = EXT4_BLOCKS_PER_GROUP(sb); | 660 | const int bpg = EXT4_BLOCKS_PER_GROUP(sb); |
661 | unsigned three = 1; | 661 | unsigned three = 1; |
662 | unsigned five = 5; | 662 | unsigned five = 5; |
663 | unsigned seven = 7; | 663 | unsigned seven = 7; |
664 | unsigned group; | 664 | ext4_group_t group; |
665 | int rest = sb->s_blocksize - size; | 665 | int rest = sb->s_blocksize - size; |
666 | handle_t *handle; | 666 | handle_t *handle; |
667 | int err = 0, err2; | 667 | int err = 0, err2; |
@@ -716,7 +716,7 @@ static void update_backups(struct super_block *sb, | |||
716 | exit_err: | 716 | exit_err: |
717 | if (err) { | 717 | if (err) { |
718 | ext4_warning(sb, __FUNCTION__, | 718 | ext4_warning(sb, __FUNCTION__, |
719 | "can't update backup for group %d (err %d), " | 719 | "can't update backup for group %lu (err %d), " |
720 | "forcing fsck on next reboot", group, err); | 720 | "forcing fsck on next reboot", group, err); |
721 | sbi->s_mount_state &= ~EXT4_VALID_FS; | 721 | sbi->s_mount_state &= ~EXT4_VALID_FS; |
722 | sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); | 722 | sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); |
@@ -952,7 +952,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
952 | ext4_fsblk_t n_blocks_count) | 952 | ext4_fsblk_t n_blocks_count) |
953 | { | 953 | { |
954 | ext4_fsblk_t o_blocks_count; | 954 | ext4_fsblk_t o_blocks_count; |
955 | unsigned long o_groups_count; | 955 | ext4_group_t o_groups_count; |
956 | ext4_grpblk_t last; | 956 | ext4_grpblk_t last; |
957 | ext4_grpblk_t add; | 957 | ext4_grpblk_t add; |
958 | struct buffer_head * bh; | 958 | struct buffer_head * bh; |
@@ -1054,7 +1054,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1054 | ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); | 1054 | ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); |
1055 | sb->s_dirt = 1; | 1055 | sb->s_dirt = 1; |
1056 | unlock_super(sb); | 1056 | unlock_super(sb); |
1057 | ext4_debug("freeing blocks %lu through %llu\n", o_blocks_count, | 1057 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, |
1058 | o_blocks_count + add); | 1058 | o_blocks_count + add); |
1059 | ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); | 1059 | ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); |
1060 | ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, | 1060 | ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8031dc0e24e5..055a0cd0168e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -373,6 +373,66 @@ void ext4_update_dynamic_rev(struct super_block *sb) | |||
373 | */ | 373 | */ |
374 | } | 374 | } |
375 | 375 | ||
376 | int ext4_update_compat_feature(handle_t *handle, | ||
377 | struct super_block *sb, __u32 compat) | ||
378 | { | ||
379 | int err = 0; | ||
380 | if (!EXT4_HAS_COMPAT_FEATURE(sb, compat)) { | ||
381 | err = ext4_journal_get_write_access(handle, | ||
382 | EXT4_SB(sb)->s_sbh); | ||
383 | if (err) | ||
384 | return err; | ||
385 | EXT4_SET_COMPAT_FEATURE(sb, compat); | ||
386 | sb->s_dirt = 1; | ||
387 | handle->h_sync = 1; | ||
388 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, | ||
389 | "call ext4_journal_dirty_met adata"); | ||
390 | err = ext4_journal_dirty_metadata(handle, | ||
391 | EXT4_SB(sb)->s_sbh); | ||
392 | } | ||
393 | return err; | ||
394 | } | ||
395 | |||
396 | int ext4_update_rocompat_feature(handle_t *handle, | ||
397 | struct super_block *sb, __u32 rocompat) | ||
398 | { | ||
399 | int err = 0; | ||
400 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, rocompat)) { | ||
401 | err = ext4_journal_get_write_access(handle, | ||
402 | EXT4_SB(sb)->s_sbh); | ||
403 | if (err) | ||
404 | return err; | ||
405 | EXT4_SET_RO_COMPAT_FEATURE(sb, rocompat); | ||
406 | sb->s_dirt = 1; | ||
407 | handle->h_sync = 1; | ||
408 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, | ||
409 | "call ext4_journal_dirty_met adata"); | ||
410 | err = ext4_journal_dirty_metadata(handle, | ||
411 | EXT4_SB(sb)->s_sbh); | ||
412 | } | ||
413 | return err; | ||
414 | } | ||
415 | |||
416 | int ext4_update_incompat_feature(handle_t *handle, | ||
417 | struct super_block *sb, __u32 incompat) | ||
418 | { | ||
419 | int err = 0; | ||
420 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, incompat)) { | ||
421 | err = ext4_journal_get_write_access(handle, | ||
422 | EXT4_SB(sb)->s_sbh); | ||
423 | if (err) | ||
424 | return err; | ||
425 | EXT4_SET_INCOMPAT_FEATURE(sb, incompat); | ||
426 | sb->s_dirt = 1; | ||
427 | handle->h_sync = 1; | ||
428 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, | ||
429 | "call ext4_journal_dirty_met adata"); | ||
430 | err = ext4_journal_dirty_metadata(handle, | ||
431 | EXT4_SB(sb)->s_sbh); | ||
432 | } | ||
433 | return err; | ||
434 | } | ||
435 | |||
376 | /* | 436 | /* |
377 | * Open the external journal device | 437 | * Open the external journal device |
378 | */ | 438 | */ |
@@ -443,6 +503,7 @@ static void ext4_put_super (struct super_block * sb) | |||
443 | struct ext4_super_block *es = sbi->s_es; | 503 | struct ext4_super_block *es = sbi->s_es; |
444 | int i; | 504 | int i; |
445 | 505 | ||
506 | ext4_mb_release(sb); | ||
446 | ext4_ext_release(sb); | 507 | ext4_ext_release(sb); |
447 | ext4_xattr_put_super(sb); | 508 | ext4_xattr_put_super(sb); |
448 | jbd2_journal_destroy(sbi->s_journal); | 509 | jbd2_journal_destroy(sbi->s_journal); |
@@ -509,6 +570,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
509 | ei->i_block_alloc_info = NULL; | 570 | ei->i_block_alloc_info = NULL; |
510 | ei->vfs_inode.i_version = 1; | 571 | ei->vfs_inode.i_version = 1; |
511 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); | 572 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); |
573 | INIT_LIST_HEAD(&ei->i_prealloc_list); | ||
574 | spin_lock_init(&ei->i_prealloc_lock); | ||
512 | return &ei->vfs_inode; | 575 | return &ei->vfs_inode; |
513 | } | 576 | } |
514 | 577 | ||
@@ -533,7 +596,7 @@ static void init_once(struct kmem_cache *cachep, void *foo) | |||
533 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 596 | #ifdef CONFIG_EXT4DEV_FS_XATTR |
534 | init_rwsem(&ei->xattr_sem); | 597 | init_rwsem(&ei->xattr_sem); |
535 | #endif | 598 | #endif |
536 | mutex_init(&ei->truncate_mutex); | 599 | init_rwsem(&ei->i_data_sem); |
537 | inode_init_once(&ei->vfs_inode); | 600 | inode_init_once(&ei->vfs_inode); |
538 | } | 601 | } |
539 | 602 | ||
@@ -605,18 +668,20 @@ static inline void ext4_show_quota_options(struct seq_file *seq, struct super_bl | |||
605 | */ | 668 | */ |
606 | static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | 669 | static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) |
607 | { | 670 | { |
671 | int def_errors; | ||
672 | unsigned long def_mount_opts; | ||
608 | struct super_block *sb = vfs->mnt_sb; | 673 | struct super_block *sb = vfs->mnt_sb; |
609 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 674 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
610 | struct ext4_super_block *es = sbi->s_es; | 675 | struct ext4_super_block *es = sbi->s_es; |
611 | unsigned long def_mount_opts; | ||
612 | 676 | ||
613 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); | 677 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); |
678 | def_errors = le16_to_cpu(es->s_errors); | ||
614 | 679 | ||
615 | if (sbi->s_sb_block != 1) | 680 | if (sbi->s_sb_block != 1) |
616 | seq_printf(seq, ",sb=%llu", sbi->s_sb_block); | 681 | seq_printf(seq, ",sb=%llu", sbi->s_sb_block); |
617 | if (test_opt(sb, MINIX_DF)) | 682 | if (test_opt(sb, MINIX_DF)) |
618 | seq_puts(seq, ",minixdf"); | 683 | seq_puts(seq, ",minixdf"); |
619 | if (test_opt(sb, GRPID)) | 684 | if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) |
620 | seq_puts(seq, ",grpid"); | 685 | seq_puts(seq, ",grpid"); |
621 | if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) | 686 | if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) |
622 | seq_puts(seq, ",nogrpid"); | 687 | seq_puts(seq, ",nogrpid"); |
@@ -628,34 +693,33 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
628 | le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { | 693 | le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { |
629 | seq_printf(seq, ",resgid=%u", sbi->s_resgid); | 694 | seq_printf(seq, ",resgid=%u", sbi->s_resgid); |
630 | } | 695 | } |
631 | if (test_opt(sb, ERRORS_CONT)) { | 696 | if (test_opt(sb, ERRORS_RO)) { |
632 | int def_errors = le16_to_cpu(es->s_errors); | ||
633 | |||
634 | if (def_errors == EXT4_ERRORS_PANIC || | 697 | if (def_errors == EXT4_ERRORS_PANIC || |
635 | def_errors == EXT4_ERRORS_RO) { | 698 | def_errors == EXT4_ERRORS_CONTINUE) { |
636 | seq_puts(seq, ",errors=continue"); | 699 | seq_puts(seq, ",errors=remount-ro"); |
637 | } | 700 | } |
638 | } | 701 | } |
639 | if (test_opt(sb, ERRORS_RO)) | 702 | if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) |
640 | seq_puts(seq, ",errors=remount-ro"); | 703 | seq_puts(seq, ",errors=continue"); |
641 | if (test_opt(sb, ERRORS_PANIC)) | 704 | if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) |
642 | seq_puts(seq, ",errors=panic"); | 705 | seq_puts(seq, ",errors=panic"); |
643 | if (test_opt(sb, NO_UID32)) | 706 | if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) |
644 | seq_puts(seq, ",nouid32"); | 707 | seq_puts(seq, ",nouid32"); |
645 | if (test_opt(sb, DEBUG)) | 708 | if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) |
646 | seq_puts(seq, ",debug"); | 709 | seq_puts(seq, ",debug"); |
647 | if (test_opt(sb, OLDALLOC)) | 710 | if (test_opt(sb, OLDALLOC)) |
648 | seq_puts(seq, ",oldalloc"); | 711 | seq_puts(seq, ",oldalloc"); |
649 | #ifdef CONFIG_EXT4_FS_XATTR | 712 | #ifdef CONFIG_EXT4DEV_FS_XATTR |
650 | if (test_opt(sb, XATTR_USER)) | 713 | if (test_opt(sb, XATTR_USER) && |
714 | !(def_mount_opts & EXT4_DEFM_XATTR_USER)) | ||
651 | seq_puts(seq, ",user_xattr"); | 715 | seq_puts(seq, ",user_xattr"); |
652 | if (!test_opt(sb, XATTR_USER) && | 716 | if (!test_opt(sb, XATTR_USER) && |
653 | (def_mount_opts & EXT4_DEFM_XATTR_USER)) { | 717 | (def_mount_opts & EXT4_DEFM_XATTR_USER)) { |
654 | seq_puts(seq, ",nouser_xattr"); | 718 | seq_puts(seq, ",nouser_xattr"); |
655 | } | 719 | } |
656 | #endif | 720 | #endif |
657 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 721 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL |
658 | if (test_opt(sb, POSIX_ACL)) | 722 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) |
659 | seq_puts(seq, ",acl"); | 723 | seq_puts(seq, ",acl"); |
660 | if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) | 724 | if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) |
661 | seq_puts(seq, ",noacl"); | 725 | seq_puts(seq, ",noacl"); |
@@ -672,7 +736,17 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
672 | seq_puts(seq, ",nobh"); | 736 | seq_puts(seq, ",nobh"); |
673 | if (!test_opt(sb, EXTENTS)) | 737 | if (!test_opt(sb, EXTENTS)) |
674 | seq_puts(seq, ",noextents"); | 738 | seq_puts(seq, ",noextents"); |
739 | if (!test_opt(sb, MBALLOC)) | ||
740 | seq_puts(seq, ",nomballoc"); | ||
741 | if (test_opt(sb, I_VERSION)) | ||
742 | seq_puts(seq, ",i_version"); | ||
675 | 743 | ||
744 | if (sbi->s_stripe) | ||
745 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); | ||
746 | /* | ||
747 | * journal mode get enabled in different ways | ||
748 | * So just print the value even if we didn't specify it | ||
749 | */ | ||
676 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) | 750 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) |
677 | seq_puts(seq, ",data=journal"); | 751 | seq_puts(seq, ",data=journal"); |
678 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) | 752 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) |
@@ -681,7 +755,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
681 | seq_puts(seq, ",data=writeback"); | 755 | seq_puts(seq, ",data=writeback"); |
682 | 756 | ||
683 | ext4_show_quota_options(seq, sb); | 757 | ext4_show_quota_options(seq, sb); |
684 | |||
685 | return 0; | 758 | return 0; |
686 | } | 759 | } |
687 | 760 | ||
@@ -809,11 +882,13 @@ enum { | |||
809 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, | 882 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, |
810 | Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, | 883 | Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, |
811 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, | 884 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, |
885 | Opt_journal_checksum, Opt_journal_async_commit, | ||
812 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 886 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
813 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 887 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
814 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | 888 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, |
815 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, | 889 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, |
816 | Opt_grpquota, Opt_extents, Opt_noextents, | 890 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, |
891 | Opt_mballoc, Opt_nomballoc, Opt_stripe, | ||
817 | }; | 892 | }; |
818 | 893 | ||
819 | static match_table_t tokens = { | 894 | static match_table_t tokens = { |
@@ -848,6 +923,8 @@ static match_table_t tokens = { | |||
848 | {Opt_journal_update, "journal=update"}, | 923 | {Opt_journal_update, "journal=update"}, |
849 | {Opt_journal_inum, "journal=%u"}, | 924 | {Opt_journal_inum, "journal=%u"}, |
850 | {Opt_journal_dev, "journal_dev=%u"}, | 925 | {Opt_journal_dev, "journal_dev=%u"}, |
926 | {Opt_journal_checksum, "journal_checksum"}, | ||
927 | {Opt_journal_async_commit, "journal_async_commit"}, | ||
851 | {Opt_abort, "abort"}, | 928 | {Opt_abort, "abort"}, |
852 | {Opt_data_journal, "data=journal"}, | 929 | {Opt_data_journal, "data=journal"}, |
853 | {Opt_data_ordered, "data=ordered"}, | 930 | {Opt_data_ordered, "data=ordered"}, |
@@ -865,6 +942,10 @@ static match_table_t tokens = { | |||
865 | {Opt_barrier, "barrier=%u"}, | 942 | {Opt_barrier, "barrier=%u"}, |
866 | {Opt_extents, "extents"}, | 943 | {Opt_extents, "extents"}, |
867 | {Opt_noextents, "noextents"}, | 944 | {Opt_noextents, "noextents"}, |
945 | {Opt_i_version, "i_version"}, | ||
946 | {Opt_mballoc, "mballoc"}, | ||
947 | {Opt_nomballoc, "nomballoc"}, | ||
948 | {Opt_stripe, "stripe=%u"}, | ||
868 | {Opt_err, NULL}, | 949 | {Opt_err, NULL}, |
869 | {Opt_resize, "resize"}, | 950 | {Opt_resize, "resize"}, |
870 | }; | 951 | }; |
@@ -1035,6 +1116,13 @@ static int parse_options (char *options, struct super_block *sb, | |||
1035 | return 0; | 1116 | return 0; |
1036 | *journal_devnum = option; | 1117 | *journal_devnum = option; |
1037 | break; | 1118 | break; |
1119 | case Opt_journal_checksum: | ||
1120 | set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); | ||
1121 | break; | ||
1122 | case Opt_journal_async_commit: | ||
1123 | set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); | ||
1124 | set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); | ||
1125 | break; | ||
1038 | case Opt_noload: | 1126 | case Opt_noload: |
1039 | set_opt (sbi->s_mount_opt, NOLOAD); | 1127 | set_opt (sbi->s_mount_opt, NOLOAD); |
1040 | break; | 1128 | break; |
@@ -1203,6 +1291,23 @@ clear_qf_name: | |||
1203 | case Opt_noextents: | 1291 | case Opt_noextents: |
1204 | clear_opt (sbi->s_mount_opt, EXTENTS); | 1292 | clear_opt (sbi->s_mount_opt, EXTENTS); |
1205 | break; | 1293 | break; |
1294 | case Opt_i_version: | ||
1295 | set_opt(sbi->s_mount_opt, I_VERSION); | ||
1296 | sb->s_flags |= MS_I_VERSION; | ||
1297 | break; | ||
1298 | case Opt_mballoc: | ||
1299 | set_opt(sbi->s_mount_opt, MBALLOC); | ||
1300 | break; | ||
1301 | case Opt_nomballoc: | ||
1302 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
1303 | break; | ||
1304 | case Opt_stripe: | ||
1305 | if (match_int(&args[0], &option)) | ||
1306 | return 0; | ||
1307 | if (option < 0) | ||
1308 | return 0; | ||
1309 | sbi->s_stripe = option; | ||
1310 | break; | ||
1206 | default: | 1311 | default: |
1207 | printk (KERN_ERR | 1312 | printk (KERN_ERR |
1208 | "EXT4-fs: Unrecognized mount option \"%s\" " | 1313 | "EXT4-fs: Unrecognized mount option \"%s\" " |
@@ -1364,7 +1469,7 @@ static int ext4_check_descriptors (struct super_block * sb) | |||
1364 | struct ext4_group_desc * gdp = NULL; | 1469 | struct ext4_group_desc * gdp = NULL; |
1365 | int desc_block = 0; | 1470 | int desc_block = 0; |
1366 | int flexbg_flag = 0; | 1471 | int flexbg_flag = 0; |
1367 | int i; | 1472 | ext4_group_t i; |
1368 | 1473 | ||
1369 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) | 1474 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) |
1370 | flexbg_flag = 1; | 1475 | flexbg_flag = 1; |
@@ -1386,7 +1491,7 @@ static int ext4_check_descriptors (struct super_block * sb) | |||
1386 | if (block_bitmap < first_block || block_bitmap > last_block) | 1491 | if (block_bitmap < first_block || block_bitmap > last_block) |
1387 | { | 1492 | { |
1388 | ext4_error (sb, "ext4_check_descriptors", | 1493 | ext4_error (sb, "ext4_check_descriptors", |
1389 | "Block bitmap for group %d" | 1494 | "Block bitmap for group %lu" |
1390 | " not in group (block %llu)!", | 1495 | " not in group (block %llu)!", |
1391 | i, block_bitmap); | 1496 | i, block_bitmap); |
1392 | return 0; | 1497 | return 0; |
@@ -1395,7 +1500,7 @@ static int ext4_check_descriptors (struct super_block * sb) | |||
1395 | if (inode_bitmap < first_block || inode_bitmap > last_block) | 1500 | if (inode_bitmap < first_block || inode_bitmap > last_block) |
1396 | { | 1501 | { |
1397 | ext4_error (sb, "ext4_check_descriptors", | 1502 | ext4_error (sb, "ext4_check_descriptors", |
1398 | "Inode bitmap for group %d" | 1503 | "Inode bitmap for group %lu" |
1399 | " not in group (block %llu)!", | 1504 | " not in group (block %llu)!", |
1400 | i, inode_bitmap); | 1505 | i, inode_bitmap); |
1401 | return 0; | 1506 | return 0; |
@@ -1405,17 +1510,16 @@ static int ext4_check_descriptors (struct super_block * sb) | |||
1405 | inode_table + sbi->s_itb_per_group - 1 > last_block) | 1510 | inode_table + sbi->s_itb_per_group - 1 > last_block) |
1406 | { | 1511 | { |
1407 | ext4_error (sb, "ext4_check_descriptors", | 1512 | ext4_error (sb, "ext4_check_descriptors", |
1408 | "Inode table for group %d" | 1513 | "Inode table for group %lu" |
1409 | " not in group (block %llu)!", | 1514 | " not in group (block %llu)!", |
1410 | i, inode_table); | 1515 | i, inode_table); |
1411 | return 0; | 1516 | return 0; |
1412 | } | 1517 | } |
1413 | if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { | 1518 | if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { |
1414 | ext4_error(sb, __FUNCTION__, | 1519 | ext4_error(sb, __FUNCTION__, |
1415 | "Checksum for group %d failed (%u!=%u)\n", i, | 1520 | "Checksum for group %lu failed (%u!=%u)\n", |
1416 | le16_to_cpu(ext4_group_desc_csum(sbi, i, | 1521 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, |
1417 | gdp)), | 1522 | gdp)), le16_to_cpu(gdp->bg_checksum)); |
1418 | le16_to_cpu(gdp->bg_checksum)); | ||
1419 | return 0; | 1523 | return 0; |
1420 | } | 1524 | } |
1421 | if (!flexbg_flag) | 1525 | if (!flexbg_flag) |
@@ -1429,7 +1533,6 @@ static int ext4_check_descriptors (struct super_block * sb) | |||
1429 | return 1; | 1533 | return 1; |
1430 | } | 1534 | } |
1431 | 1535 | ||
1432 | |||
1433 | /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at | 1536 | /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at |
1434 | * the superblock) which were deleted from all directories, but held open by | 1537 | * the superblock) which were deleted from all directories, but held open by |
1435 | * a process at the time of a crash. We walk the list and try to delete these | 1538 | * a process at the time of a crash. We walk the list and try to delete these |
@@ -1542,20 +1645,95 @@ static void ext4_orphan_cleanup (struct super_block * sb, | |||
1542 | #endif | 1645 | #endif |
1543 | sb->s_flags = s_flags; /* Restore MS_RDONLY status */ | 1646 | sb->s_flags = s_flags; /* Restore MS_RDONLY status */ |
1544 | } | 1647 | } |
1648 | /* | ||
1649 | * Maximal extent format file size. | ||
1650 | * Resulting logical blkno at s_maxbytes must fit in our on-disk | ||
1651 | * extent format containers, within a sector_t, and within i_blocks | ||
1652 | * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, | ||
1653 | * so that won't be a limiting factor. | ||
1654 | * | ||
1655 | * Note, this does *not* consider any metadata overhead for vfs i_blocks. | ||
1656 | */ | ||
1657 | static loff_t ext4_max_size(int blkbits) | ||
1658 | { | ||
1659 | loff_t res; | ||
1660 | loff_t upper_limit = MAX_LFS_FILESIZE; | ||
1661 | |||
1662 | /* small i_blocks in vfs inode? */ | ||
1663 | if (sizeof(blkcnt_t) < sizeof(u64)) { | ||
1664 | /* | ||
1665 | * CONFIG_LSF is not enabled implies the inode | ||
1666 | * i_block represent total blocks in 512 bytes | ||
1667 | * 32 == size of vfs inode i_blocks * 8 | ||
1668 | */ | ||
1669 | upper_limit = (1LL << 32) - 1; | ||
1670 | |||
1671 | /* total blocks in file system block size */ | ||
1672 | upper_limit >>= (blkbits - 9); | ||
1673 | upper_limit <<= blkbits; | ||
1674 | } | ||
1675 | |||
1676 | /* 32-bit extent-start container, ee_block */ | ||
1677 | res = 1LL << 32; | ||
1678 | res <<= blkbits; | ||
1679 | res -= 1; | ||
1680 | |||
1681 | /* Sanity check against vm- & vfs- imposed limits */ | ||
1682 | if (res > upper_limit) | ||
1683 | res = upper_limit; | ||
1684 | |||
1685 | return res; | ||
1686 | } | ||
1545 | 1687 | ||
1546 | /* | 1688 | /* |
1547 | * Maximal file size. There is a direct, and {,double-,triple-}indirect | 1689 | * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect |
1548 | * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. | 1690 | * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. |
1549 | * We need to be 1 filesystem block less than the 2^32 sector limit. | 1691 | * We need to be 1 filesystem block less than the 2^48 sector limit. |
1550 | */ | 1692 | */ |
1551 | static loff_t ext4_max_size(int bits) | 1693 | static loff_t ext4_max_bitmap_size(int bits) |
1552 | { | 1694 | { |
1553 | loff_t res = EXT4_NDIR_BLOCKS; | 1695 | loff_t res = EXT4_NDIR_BLOCKS; |
1554 | /* This constant is calculated to be the largest file size for a | 1696 | int meta_blocks; |
1555 | * dense, 4k-blocksize file such that the total number of | 1697 | loff_t upper_limit; |
1698 | /* This is calculated to be the largest file size for a | ||
1699 | * dense, bitmapped file such that the total number of | ||
1556 | * sectors in the file, including data and all indirect blocks, | 1700 | * sectors in the file, including data and all indirect blocks, |
1557 | * does not exceed 2^32. */ | 1701 | * does not exceed 2^48 -1 |
1558 | const loff_t upper_limit = 0x1ff7fffd000LL; | 1702 | * __u32 i_blocks_lo and _u16 i_blocks_high representing the |
1703 | * total number of 512 bytes blocks of the file | ||
1704 | */ | ||
1705 | |||
1706 | if (sizeof(blkcnt_t) < sizeof(u64)) { | ||
1707 | /* | ||
1708 | * CONFIG_LSF is not enabled implies the inode | ||
1709 | * i_block represent total blocks in 512 bytes | ||
1710 | * 32 == size of vfs inode i_blocks * 8 | ||
1711 | */ | ||
1712 | upper_limit = (1LL << 32) - 1; | ||
1713 | |||
1714 | /* total blocks in file system block size */ | ||
1715 | upper_limit >>= (bits - 9); | ||
1716 | |||
1717 | } else { | ||
1718 | /* | ||
1719 | * We use 48 bit ext4_inode i_blocks | ||
1720 | * With EXT4_HUGE_FILE_FL set the i_blocks | ||
1721 | * represent total number of blocks in | ||
1722 | * file system block size | ||
1723 | */ | ||
1724 | upper_limit = (1LL << 48) - 1; | ||
1725 | |||
1726 | } | ||
1727 | |||
1728 | /* indirect blocks */ | ||
1729 | meta_blocks = 1; | ||
1730 | /* double indirect blocks */ | ||
1731 | meta_blocks += 1 + (1LL << (bits-2)); | ||
1732 | /* tripple indirect blocks */ | ||
1733 | meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); | ||
1734 | |||
1735 | upper_limit -= meta_blocks; | ||
1736 | upper_limit <<= bits; | ||
1559 | 1737 | ||
1560 | res += 1LL << (bits-2); | 1738 | res += 1LL << (bits-2); |
1561 | res += 1LL << (2*(bits-2)); | 1739 | res += 1LL << (2*(bits-2)); |
@@ -1563,6 +1741,10 @@ static loff_t ext4_max_size(int bits) | |||
1563 | res <<= bits; | 1741 | res <<= bits; |
1564 | if (res > upper_limit) | 1742 | if (res > upper_limit) |
1565 | res = upper_limit; | 1743 | res = upper_limit; |
1744 | |||
1745 | if (res > MAX_LFS_FILESIZE) | ||
1746 | res = MAX_LFS_FILESIZE; | ||
1747 | |||
1566 | return res; | 1748 | return res; |
1567 | } | 1749 | } |
1568 | 1750 | ||
@@ -1570,7 +1752,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb, | |||
1570 | ext4_fsblk_t logical_sb_block, int nr) | 1752 | ext4_fsblk_t logical_sb_block, int nr) |
1571 | { | 1753 | { |
1572 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1754 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1573 | unsigned long bg, first_meta_bg; | 1755 | ext4_group_t bg, first_meta_bg; |
1574 | int has_super = 0; | 1756 | int has_super = 0; |
1575 | 1757 | ||
1576 | first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); | 1758 | first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); |
@@ -1584,8 +1766,39 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb, | |||
1584 | return (has_super + ext4_group_first_block_no(sb, bg)); | 1766 | return (has_super + ext4_group_first_block_no(sb, bg)); |
1585 | } | 1767 | } |
1586 | 1768 | ||
1769 | /** | ||
1770 | * ext4_get_stripe_size: Get the stripe size. | ||
1771 | * @sbi: In memory super block info | ||
1772 | * | ||
1773 | * If we have specified it via mount option, then | ||
1774 | * use the mount option value. If the value specified at mount time is | ||
1775 | * greater than the blocks per group use the super block value. | ||
1776 | * If the super block value is greater than blocks per group return 0. | ||
1777 | * Allocator needs it be less than blocks per group. | ||
1778 | * | ||
1779 | */ | ||
1780 | static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) | ||
1781 | { | ||
1782 | unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); | ||
1783 | unsigned long stripe_width = | ||
1784 | le32_to_cpu(sbi->s_es->s_raid_stripe_width); | ||
1785 | |||
1786 | if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) | ||
1787 | return sbi->s_stripe; | ||
1788 | |||
1789 | if (stripe_width <= sbi->s_blocks_per_group) | ||
1790 | return stripe_width; | ||
1791 | |||
1792 | if (stride <= sbi->s_blocks_per_group) | ||
1793 | return stride; | ||
1794 | |||
1795 | return 0; | ||
1796 | } | ||
1587 | 1797 | ||
1588 | static int ext4_fill_super (struct super_block *sb, void *data, int silent) | 1798 | static int ext4_fill_super (struct super_block *sb, void *data, int silent) |
1799 | __releases(kernel_sem) | ||
1800 | __acquires(kernel_sem) | ||
1801 | |||
1589 | { | 1802 | { |
1590 | struct buffer_head * bh; | 1803 | struct buffer_head * bh; |
1591 | struct ext4_super_block *es = NULL; | 1804 | struct ext4_super_block *es = NULL; |
@@ -1599,7 +1812,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
1599 | unsigned long def_mount_opts; | 1812 | unsigned long def_mount_opts; |
1600 | struct inode *root; | 1813 | struct inode *root; |
1601 | int blocksize; | 1814 | int blocksize; |
1602 | int hblock; | ||
1603 | int db_count; | 1815 | int db_count; |
1604 | int i; | 1816 | int i; |
1605 | int needs_recovery; | 1817 | int needs_recovery; |
@@ -1624,6 +1836,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
1624 | goto out_fail; | 1836 | goto out_fail; |
1625 | } | 1837 | } |
1626 | 1838 | ||
1839 | if (!sb_set_blocksize(sb, blocksize)) { | ||
1840 | printk(KERN_ERR "EXT4-fs: bad blocksize %d.\n", blocksize); | ||
1841 | goto out_fail; | ||
1842 | } | ||
1843 | |||
1627 | /* | 1844 | /* |
1628 | * The ext4 superblock will not be buffer aligned for other than 1kB | 1845 | * The ext4 superblock will not be buffer aligned for other than 1kB |
1629 | * block sizes. We need to calculate the offset from buffer start. | 1846 | * block sizes. We need to calculate the offset from buffer start. |
@@ -1674,10 +1891,10 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
1674 | 1891 | ||
1675 | if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) | 1892 | if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) |
1676 | set_opt(sbi->s_mount_opt, ERRORS_PANIC); | 1893 | set_opt(sbi->s_mount_opt, ERRORS_PANIC); |
1677 | else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_RO) | 1894 | else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) |
1678 | set_opt(sbi->s_mount_opt, ERRORS_RO); | ||
1679 | else | ||
1680 | set_opt(sbi->s_mount_opt, ERRORS_CONT); | 1895 | set_opt(sbi->s_mount_opt, ERRORS_CONT); |
1896 | else | ||
1897 | set_opt(sbi->s_mount_opt, ERRORS_RO); | ||
1681 | 1898 | ||
1682 | sbi->s_resuid = le16_to_cpu(es->s_def_resuid); | 1899 | sbi->s_resuid = le16_to_cpu(es->s_def_resuid); |
1683 | sbi->s_resgid = le16_to_cpu(es->s_def_resgid); | 1900 | sbi->s_resgid = le16_to_cpu(es->s_def_resgid); |
@@ -1689,6 +1906,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
1689 | * User -o noextents to turn it off | 1906 | * User -o noextents to turn it off |
1690 | */ | 1907 | */ |
1691 | set_opt(sbi->s_mount_opt, EXTENTS); | 1908 | set_opt(sbi->s_mount_opt, EXTENTS); |
1909 | /* | ||
1910 | * turn on mballoc feature by default in ext4 filesystem | ||
1911 | * User -o nomballoc to turn it off | ||
1912 | */ | ||
1913 | set_opt(sbi->s_mount_opt, MBALLOC); | ||
1692 | 1914 | ||
1693 | if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, | 1915 | if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, |
1694 | NULL, 0)) | 1916 | NULL, 0)) |
@@ -1723,6 +1945,19 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
1723 | sb->s_id, le32_to_cpu(features)); | 1945 | sb->s_id, le32_to_cpu(features)); |
1724 | goto failed_mount; | 1946 | goto failed_mount; |
1725 | } | 1947 | } |
1948 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { | ||
1949 | /* | ||
1950 | * Large file size enabled file system can only be | ||
1951 | * mount if kernel is build with CONFIG_LSF | ||
1952 | */ | ||
1953 | if (sizeof(root->i_blocks) < sizeof(u64) && | ||
1954 | !(sb->s_flags & MS_RDONLY)) { | ||
1955 | printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " | ||
1956 | "files cannot be mounted read-write " | ||
1957 | "without CONFIG_LSF.\n", sb->s_id); | ||
1958 | goto failed_mount; | ||
1959 | } | ||
1960 | } | ||
1726 | blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); | 1961 | blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); |
1727 | 1962 | ||
1728 | if (blocksize < EXT4_MIN_BLOCK_SIZE || | 1963 | if (blocksize < EXT4_MIN_BLOCK_SIZE || |
@@ -1733,20 +1968,16 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
1733 | goto failed_mount; | 1968 | goto failed_mount; |
1734 | } | 1969 | } |
1735 | 1970 | ||
1736 | hblock = bdev_hardsect_size(sb->s_bdev); | ||
1737 | if (sb->s_blocksize != blocksize) { | 1971 | if (sb->s_blocksize != blocksize) { |
1738 | /* | 1972 | |
1739 | * Make sure the blocksize for the filesystem is larger | 1973 | /* Validate the filesystem blocksize */ |
1740 | * than the hardware sectorsize for the machine. | 1974 | if (!sb_set_blocksize(sb, blocksize)) { |
1741 | */ | 1975 | printk(KERN_ERR "EXT4-fs: bad block size %d.\n", |
1742 | if (blocksize < hblock) { | 1976 | blocksize); |
1743 | printk(KERN_ERR "EXT4-fs: blocksize %d too small for " | ||
1744 | "device blocksize %d.\n", blocksize, hblock); | ||
1745 | goto failed_mount; | 1977 | goto failed_mount; |
1746 | } | 1978 | } |
1747 | 1979 | ||
1748 | brelse (bh); | 1980 | brelse (bh); |
1749 | sb_set_blocksize(sb, blocksize); | ||
1750 | logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; | 1981 | logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; |
1751 | offset = do_div(logical_sb_block, blocksize); | 1982 | offset = do_div(logical_sb_block, blocksize); |
1752 | bh = sb_bread(sb, logical_sb_block); | 1983 | bh = sb_bread(sb, logical_sb_block); |
@@ -1764,6 +1995,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
1764 | } | 1995 | } |
1765 | } | 1996 | } |
1766 | 1997 | ||
1998 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits); | ||
1767 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits); | 1999 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits); |
1768 | 2000 | ||
1769 | if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { | 2001 | if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { |
@@ -1797,7 +2029,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
1797 | sbi->s_desc_size = EXT4_MIN_DESC_SIZE; | 2029 | sbi->s_desc_size = EXT4_MIN_DESC_SIZE; |
1798 | sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); | 2030 | sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); |
1799 | sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); | 2031 | sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); |
1800 | if (EXT4_INODE_SIZE(sb) == 0) | 2032 | if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) |
1801 | goto cantfind_ext4; | 2033 | goto cantfind_ext4; |
1802 | sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); | 2034 | sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); |
1803 | if (sbi->s_inodes_per_block == 0) | 2035 | if (sbi->s_inodes_per_block == 0) |
@@ -1838,6 +2070,17 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
1838 | 2070 | ||
1839 | if (EXT4_BLOCKS_PER_GROUP(sb) == 0) | 2071 | if (EXT4_BLOCKS_PER_GROUP(sb) == 0) |
1840 | goto cantfind_ext4; | 2072 | goto cantfind_ext4; |
2073 | |||
2074 | /* ensure blocks_count calculation below doesn't sign-extend */ | ||
2075 | if (ext4_blocks_count(es) + EXT4_BLOCKS_PER_GROUP(sb) < | ||
2076 | le32_to_cpu(es->s_first_data_block) + 1) { | ||
2077 | printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu, " | ||
2078 | "first data block %u, blocks per group %lu\n", | ||
2079 | ext4_blocks_count(es), | ||
2080 | le32_to_cpu(es->s_first_data_block), | ||
2081 | EXT4_BLOCKS_PER_GROUP(sb)); | ||
2082 | goto failed_mount; | ||
2083 | } | ||
1841 | blocks_count = (ext4_blocks_count(es) - | 2084 | blocks_count = (ext4_blocks_count(es) - |
1842 | le32_to_cpu(es->s_first_data_block) + | 2085 | le32_to_cpu(es->s_first_data_block) + |
1843 | EXT4_BLOCKS_PER_GROUP(sb) - 1); | 2086 | EXT4_BLOCKS_PER_GROUP(sb) - 1); |
@@ -1900,6 +2143,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
1900 | sbi->s_rsv_window_head.rsv_goal_size = 0; | 2143 | sbi->s_rsv_window_head.rsv_goal_size = 0; |
1901 | ext4_rsv_window_add(sb, &sbi->s_rsv_window_head); | 2144 | ext4_rsv_window_add(sb, &sbi->s_rsv_window_head); |
1902 | 2145 | ||
2146 | sbi->s_stripe = ext4_get_stripe_size(sbi); | ||
2147 | |||
1903 | /* | 2148 | /* |
1904 | * set up enough so that it can read an inode | 2149 | * set up enough so that it can read an inode |
1905 | */ | 2150 | */ |
@@ -1944,6 +2189,21 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
1944 | goto failed_mount4; | 2189 | goto failed_mount4; |
1945 | } | 2190 | } |
1946 | 2191 | ||
2192 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { | ||
2193 | jbd2_journal_set_features(sbi->s_journal, | ||
2194 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | ||
2195 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | ||
2196 | } else if (test_opt(sb, JOURNAL_CHECKSUM)) { | ||
2197 | jbd2_journal_set_features(sbi->s_journal, | ||
2198 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); | ||
2199 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, | ||
2200 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | ||
2201 | } else { | ||
2202 | jbd2_journal_clear_features(sbi->s_journal, | ||
2203 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | ||
2204 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | ||
2205 | } | ||
2206 | |||
1947 | /* We have now updated the journal if required, so we can | 2207 | /* We have now updated the journal if required, so we can |
1948 | * validate the data journaling mode. */ | 2208 | * validate the data journaling mode. */ |
1949 | switch (test_opt(sb, DATA_FLAGS)) { | 2209 | switch (test_opt(sb, DATA_FLAGS)) { |
@@ -2044,6 +2304,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
2044 | "writeback"); | 2304 | "writeback"); |
2045 | 2305 | ||
2046 | ext4_ext_init(sb); | 2306 | ext4_ext_init(sb); |
2307 | ext4_mb_init(sb, needs_recovery); | ||
2047 | 2308 | ||
2048 | lock_kernel(); | 2309 | lock_kernel(); |
2049 | return 0; | 2310 | return 0; |
@@ -2673,7 +2934,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) | |||
2673 | if (test_opt(sb, MINIX_DF)) { | 2934 | if (test_opt(sb, MINIX_DF)) { |
2674 | sbi->s_overhead_last = 0; | 2935 | sbi->s_overhead_last = 0; |
2675 | } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { | 2936 | } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { |
2676 | unsigned long ngroups = sbi->s_groups_count, i; | 2937 | ext4_group_t ngroups = sbi->s_groups_count, i; |
2677 | ext4_fsblk_t overhead = 0; | 2938 | ext4_fsblk_t overhead = 0; |
2678 | smp_rmb(); | 2939 | smp_rmb(); |
2679 | 2940 | ||
@@ -2909,7 +3170,7 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, | |||
2909 | size_t len, loff_t off) | 3170 | size_t len, loff_t off) |
2910 | { | 3171 | { |
2911 | struct inode *inode = sb_dqopt(sb)->files[type]; | 3172 | struct inode *inode = sb_dqopt(sb)->files[type]; |
2912 | sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); | 3173 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); |
2913 | int err = 0; | 3174 | int err = 0; |
2914 | int offset = off & (sb->s_blocksize - 1); | 3175 | int offset = off & (sb->s_blocksize - 1); |
2915 | int tocopy; | 3176 | int tocopy; |
@@ -2947,7 +3208,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
2947 | const char *data, size_t len, loff_t off) | 3208 | const char *data, size_t len, loff_t off) |
2948 | { | 3209 | { |
2949 | struct inode *inode = sb_dqopt(sb)->files[type]; | 3210 | struct inode *inode = sb_dqopt(sb)->files[type]; |
2950 | sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); | 3211 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); |
2951 | int err = 0; | 3212 | int err = 0; |
2952 | int offset = off & (sb->s_blocksize - 1); | 3213 | int offset = off & (sb->s_blocksize - 1); |
2953 | int tocopy; | 3214 | int tocopy; |
@@ -3002,7 +3263,6 @@ out: | |||
3002 | i_size_write(inode, off+len-towrite); | 3263 | i_size_write(inode, off+len-towrite); |
3003 | EXT4_I(inode)->i_disksize = inode->i_size; | 3264 | EXT4_I(inode)->i_disksize = inode->i_size; |
3004 | } | 3265 | } |
3005 | inode->i_version++; | ||
3006 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 3266 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
3007 | ext4_mark_inode_dirty(handle, inode); | 3267 | ext4_mark_inode_dirty(handle, inode); |
3008 | mutex_unlock(&inode->i_mutex); | 3268 | mutex_unlock(&inode->i_mutex); |
@@ -3027,9 +3287,15 @@ static struct file_system_type ext4dev_fs_type = { | |||
3027 | 3287 | ||
3028 | static int __init init_ext4_fs(void) | 3288 | static int __init init_ext4_fs(void) |
3029 | { | 3289 | { |
3030 | int err = init_ext4_xattr(); | 3290 | int err; |
3291 | |||
3292 | err = init_ext4_mballoc(); | ||
3031 | if (err) | 3293 | if (err) |
3032 | return err; | 3294 | return err; |
3295 | |||
3296 | err = init_ext4_xattr(); | ||
3297 | if (err) | ||
3298 | goto out2; | ||
3033 | err = init_inodecache(); | 3299 | err = init_inodecache(); |
3034 | if (err) | 3300 | if (err) |
3035 | goto out1; | 3301 | goto out1; |
@@ -3041,6 +3307,8 @@ out: | |||
3041 | destroy_inodecache(); | 3307 | destroy_inodecache(); |
3042 | out1: | 3308 | out1: |
3043 | exit_ext4_xattr(); | 3309 | exit_ext4_xattr(); |
3310 | out2: | ||
3311 | exit_ext4_mballoc(); | ||
3044 | return err; | 3312 | return err; |
3045 | } | 3313 | } |
3046 | 3314 | ||
@@ -3049,6 +3317,7 @@ static void __exit exit_ext4_fs(void) | |||
3049 | unregister_filesystem(&ext4dev_fs_type); | 3317 | unregister_filesystem(&ext4dev_fs_type); |
3050 | destroy_inodecache(); | 3318 | destroy_inodecache(); |
3051 | exit_ext4_xattr(); | 3319 | exit_ext4_xattr(); |
3320 | exit_ext4_mballoc(); | ||
3052 | } | 3321 | } |
3053 | 3322 | ||
3054 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); | 3323 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 86387302c2a9..d7962139c010 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -480,7 +480,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, | |||
480 | ea_bdebug(bh, "refcount now=0; freeing"); | 480 | ea_bdebug(bh, "refcount now=0; freeing"); |
481 | if (ce) | 481 | if (ce) |
482 | mb_cache_entry_free(ce); | 482 | mb_cache_entry_free(ce); |
483 | ext4_free_blocks(handle, inode, bh->b_blocknr, 1); | 483 | ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1); |
484 | get_bh(bh); | 484 | get_bh(bh); |
485 | ext4_forget(handle, 1, inode, bh, bh->b_blocknr); | 485 | ext4_forget(handle, 1, inode, bh, bh->b_blocknr); |
486 | } else { | 486 | } else { |
@@ -821,7 +821,7 @@ inserted: | |||
821 | new_bh = sb_getblk(sb, block); | 821 | new_bh = sb_getblk(sb, block); |
822 | if (!new_bh) { | 822 | if (!new_bh) { |
823 | getblk_failed: | 823 | getblk_failed: |
824 | ext4_free_blocks(handle, inode, block, 1); | 824 | ext4_free_blocks(handle, inode, block, 1, 1); |
825 | error = -EIO; | 825 | error = -EIO; |
826 | goto cleanup; | 826 | goto cleanup; |
827 | } | 827 | } |
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 2c1b73fb82ae..5fb366992b73 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c | |||
@@ -590,21 +590,49 @@ error: | |||
590 | 590 | ||
591 | EXPORT_SYMBOL_GPL(fat_free_clusters); | 591 | EXPORT_SYMBOL_GPL(fat_free_clusters); |
592 | 592 | ||
593 | /* 128kb is the whole sectors for FAT12 and FAT16 */ | ||
594 | #define FAT_READA_SIZE (128 * 1024) | ||
595 | |||
596 | static void fat_ent_reada(struct super_block *sb, struct fat_entry *fatent, | ||
597 | unsigned long reada_blocks) | ||
598 | { | ||
599 | struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops; | ||
600 | sector_t blocknr; | ||
601 | int i, offset; | ||
602 | |||
603 | ops->ent_blocknr(sb, fatent->entry, &offset, &blocknr); | ||
604 | |||
605 | for (i = 0; i < reada_blocks; i++) | ||
606 | sb_breadahead(sb, blocknr + i); | ||
607 | } | ||
608 | |||
593 | int fat_count_free_clusters(struct super_block *sb) | 609 | int fat_count_free_clusters(struct super_block *sb) |
594 | { | 610 | { |
595 | struct msdos_sb_info *sbi = MSDOS_SB(sb); | 611 | struct msdos_sb_info *sbi = MSDOS_SB(sb); |
596 | struct fatent_operations *ops = sbi->fatent_ops; | 612 | struct fatent_operations *ops = sbi->fatent_ops; |
597 | struct fat_entry fatent; | 613 | struct fat_entry fatent; |
614 | unsigned long reada_blocks, reada_mask, cur_block; | ||
598 | int err = 0, free; | 615 | int err = 0, free; |
599 | 616 | ||
600 | lock_fat(sbi); | 617 | lock_fat(sbi); |
601 | if (sbi->free_clusters != -1) | 618 | if (sbi->free_clusters != -1) |
602 | goto out; | 619 | goto out; |
603 | 620 | ||
621 | reada_blocks = FAT_READA_SIZE >> sb->s_blocksize_bits; | ||
622 | reada_mask = reada_blocks - 1; | ||
623 | cur_block = 0; | ||
624 | |||
604 | free = 0; | 625 | free = 0; |
605 | fatent_init(&fatent); | 626 | fatent_init(&fatent); |
606 | fatent_set_entry(&fatent, FAT_START_ENT); | 627 | fatent_set_entry(&fatent, FAT_START_ENT); |
607 | while (fatent.entry < sbi->max_cluster) { | 628 | while (fatent.entry < sbi->max_cluster) { |
629 | /* readahead of fat blocks */ | ||
630 | if ((cur_block & reada_mask) == 0) { | ||
631 | unsigned long rest = sbi->fat_length - cur_block; | ||
632 | fat_ent_reada(sb, &fatent, min(reada_blocks, rest)); | ||
633 | } | ||
634 | cur_block++; | ||
635 | |||
608 | err = fat_ent_read_block(sb, &fatent); | 636 | err = fat_ent_read_block(sb, &fatent); |
609 | if (err) | 637 | if (err) |
610 | goto out; | 638 | goto out; |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 0fca82021d76..300324bd563c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -482,8 +482,6 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) | |||
482 | if (wbc->nr_to_write <= 0) | 482 | if (wbc->nr_to_write <= 0) |
483 | break; | 483 | break; |
484 | } | 484 | } |
485 | if (!list_empty(&sb->s_more_io)) | ||
486 | wbc->more_io = 1; | ||
487 | return; /* Leave any unwritten inodes on s_io */ | 485 | return; /* Leave any unwritten inodes on s_io */ |
488 | } | 486 | } |
489 | 487 | ||
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 84f9f7dfdf5b..e5e80d1a4687 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -744,9 +744,6 @@ static inline void unregister_fuseblk(void) | |||
744 | } | 744 | } |
745 | #endif | 745 | #endif |
746 | 746 | ||
747 | static decl_subsys(fuse, NULL, NULL); | ||
748 | static decl_subsys(connections, NULL, NULL); | ||
749 | |||
750 | static void fuse_inode_init_once(struct kmem_cache *cachep, void *foo) | 747 | static void fuse_inode_init_once(struct kmem_cache *cachep, void *foo) |
751 | { | 748 | { |
752 | struct inode * inode = foo; | 749 | struct inode * inode = foo; |
@@ -791,32 +788,37 @@ static void fuse_fs_cleanup(void) | |||
791 | kmem_cache_destroy(fuse_inode_cachep); | 788 | kmem_cache_destroy(fuse_inode_cachep); |
792 | } | 789 | } |
793 | 790 | ||
791 | static struct kobject *fuse_kobj; | ||
792 | static struct kobject *connections_kobj; | ||
793 | |||
794 | static int fuse_sysfs_init(void) | 794 | static int fuse_sysfs_init(void) |
795 | { | 795 | { |
796 | int err; | 796 | int err; |
797 | 797 | ||
798 | kobj_set_kset_s(&fuse_subsys, fs_subsys); | 798 | fuse_kobj = kobject_create_and_add("fuse", fs_kobj); |
799 | err = subsystem_register(&fuse_subsys); | 799 | if (!fuse_kobj) { |
800 | if (err) | 800 | err = -ENOMEM; |
801 | goto out_err; | 801 | goto out_err; |
802 | } | ||
802 | 803 | ||
803 | kobj_set_kset_s(&connections_subsys, fuse_subsys); | 804 | connections_kobj = kobject_create_and_add("connections", fuse_kobj); |
804 | err = subsystem_register(&connections_subsys); | 805 | if (!connections_kobj) { |
805 | if (err) | 806 | err = -ENOMEM; |
806 | goto out_fuse_unregister; | 807 | goto out_fuse_unregister; |
808 | } | ||
807 | 809 | ||
808 | return 0; | 810 | return 0; |
809 | 811 | ||
810 | out_fuse_unregister: | 812 | out_fuse_unregister: |
811 | subsystem_unregister(&fuse_subsys); | 813 | kobject_put(fuse_kobj); |
812 | out_err: | 814 | out_err: |
813 | return err; | 815 | return err; |
814 | } | 816 | } |
815 | 817 | ||
816 | static void fuse_sysfs_cleanup(void) | 818 | static void fuse_sysfs_cleanup(void) |
817 | { | 819 | { |
818 | subsystem_unregister(&connections_subsys); | 820 | kobject_put(connections_kobj); |
819 | subsystem_unregister(&fuse_subsys); | 821 | kobject_put(fuse_kobj); |
820 | } | 822 | } |
821 | 823 | ||
822 | static int __init fuse_init(void) | 824 | static int __init fuse_init(void) |
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index 04ad0caebedb..8fff11058cee 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile | |||
@@ -2,7 +2,7 @@ obj-$(CONFIG_GFS2_FS) += gfs2.o | |||
2 | gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ | 2 | gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ |
3 | glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \ | 3 | glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \ |
4 | mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ | 4 | mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ |
5 | ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \ | 5 | ops_fstype.o ops_inode.o ops_super.o quota.o \ |
6 | recovery.o rgrp.o super.o sys.o trans.o util.o | 6 | recovery.o rgrp.o super.o sys.o trans.o util.o |
7 | 7 | ||
8 | obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/ | 8 | obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/ |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 93fa427bb5f5..e4effc47abfc 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -59,7 +59,6 @@ struct strip_mine { | |||
59 | static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, | 59 | static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, |
60 | u64 block, struct page *page) | 60 | u64 block, struct page *page) |
61 | { | 61 | { |
62 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
63 | struct inode *inode = &ip->i_inode; | 62 | struct inode *inode = &ip->i_inode; |
64 | struct buffer_head *bh; | 63 | struct buffer_head *bh; |
65 | int release = 0; | 64 | int release = 0; |
@@ -95,7 +94,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
95 | set_buffer_uptodate(bh); | 94 | set_buffer_uptodate(bh); |
96 | if (!gfs2_is_jdata(ip)) | 95 | if (!gfs2_is_jdata(ip)) |
97 | mark_buffer_dirty(bh); | 96 | mark_buffer_dirty(bh); |
98 | if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) | 97 | if (!gfs2_is_writeback(ip)) |
99 | gfs2_trans_add_bh(ip->i_gl, bh, 0); | 98 | gfs2_trans_add_bh(ip->i_gl, bh, 0); |
100 | 99 | ||
101 | if (release) { | 100 | if (release) { |
@@ -453,8 +452,8 @@ static inline void bmap_unlock(struct inode *inode, int create) | |||
453 | * Returns: errno | 452 | * Returns: errno |
454 | */ | 453 | */ |
455 | 454 | ||
456 | int gfs2_block_map(struct inode *inode, u64 lblock, int create, | 455 | int gfs2_block_map(struct inode *inode, sector_t lblock, |
457 | struct buffer_head *bh_map) | 456 | struct buffer_head *bh_map, int create) |
458 | { | 457 | { |
459 | struct gfs2_inode *ip = GFS2_I(inode); | 458 | struct gfs2_inode *ip = GFS2_I(inode); |
460 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 459 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
@@ -470,6 +469,7 @@ int gfs2_block_map(struct inode *inode, u64 lblock, int create, | |||
470 | unsigned int maxlen = bh_map->b_size >> inode->i_blkbits; | 469 | unsigned int maxlen = bh_map->b_size >> inode->i_blkbits; |
471 | struct metapath mp; | 470 | struct metapath mp; |
472 | u64 size; | 471 | u64 size; |
472 | struct buffer_head *dibh = NULL; | ||
473 | 473 | ||
474 | BUG_ON(maxlen == 0); | 474 | BUG_ON(maxlen == 0); |
475 | 475 | ||
@@ -500,6 +500,8 @@ int gfs2_block_map(struct inode *inode, u64 lblock, int create, | |||
500 | error = gfs2_meta_inode_buffer(ip, &bh); | 500 | error = gfs2_meta_inode_buffer(ip, &bh); |
501 | if (error) | 501 | if (error) |
502 | goto out_fail; | 502 | goto out_fail; |
503 | dibh = bh; | ||
504 | get_bh(dibh); | ||
503 | 505 | ||
504 | for (x = 0; x < end_of_metadata; x++) { | 506 | for (x = 0; x < end_of_metadata; x++) { |
505 | lookup_block(ip, bh, x, &mp, create, &new, &dblock); | 507 | lookup_block(ip, bh, x, &mp, create, &new, &dblock); |
@@ -518,13 +520,8 @@ int gfs2_block_map(struct inode *inode, u64 lblock, int create, | |||
518 | if (boundary) | 520 | if (boundary) |
519 | set_buffer_boundary(bh_map); | 521 | set_buffer_boundary(bh_map); |
520 | if (new) { | 522 | if (new) { |
521 | struct buffer_head *dibh; | 523 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
522 | error = gfs2_meta_inode_buffer(ip, &dibh); | 524 | gfs2_dinode_out(ip, dibh->b_data); |
523 | if (!error) { | ||
524 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
525 | gfs2_dinode_out(ip, dibh->b_data); | ||
526 | brelse(dibh); | ||
527 | } | ||
528 | set_buffer_new(bh_map); | 525 | set_buffer_new(bh_map); |
529 | goto out_brelse; | 526 | goto out_brelse; |
530 | } | 527 | } |
@@ -545,6 +542,8 @@ out_brelse: | |||
545 | out_ok: | 542 | out_ok: |
546 | error = 0; | 543 | error = 0; |
547 | out_fail: | 544 | out_fail: |
545 | if (dibh) | ||
546 | brelse(dibh); | ||
548 | bmap_unlock(inode, create); | 547 | bmap_unlock(inode, create); |
549 | return error; | 548 | return error; |
550 | } | 549 | } |
@@ -560,7 +559,7 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi | |||
560 | BUG_ON(!new); | 559 | BUG_ON(!new); |
561 | 560 | ||
562 | bh.b_size = 1 << (inode->i_blkbits + 5); | 561 | bh.b_size = 1 << (inode->i_blkbits + 5); |
563 | ret = gfs2_block_map(inode, lblock, create, &bh); | 562 | ret = gfs2_block_map(inode, lblock, &bh, create); |
564 | *extlen = bh.b_size >> inode->i_blkbits; | 563 | *extlen = bh.b_size >> inode->i_blkbits; |
565 | *dblock = bh.b_blocknr; | 564 | *dblock = bh.b_blocknr; |
566 | if (buffer_new(&bh)) | 565 | if (buffer_new(&bh)) |
@@ -684,7 +683,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
684 | if (metadata) | 683 | if (metadata) |
685 | revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; | 684 | revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; |
686 | 685 | ||
687 | error = gfs2_rindex_hold(sdp, &ip->i_alloc.al_ri_gh); | 686 | error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); |
688 | if (error) | 687 | if (error) |
689 | return error; | 688 | return error; |
690 | 689 | ||
@@ -786,7 +785,7 @@ out_rg_gunlock: | |||
786 | out_rlist: | 785 | out_rlist: |
787 | gfs2_rlist_free(&rlist); | 786 | gfs2_rlist_free(&rlist); |
788 | out: | 787 | out: |
789 | gfs2_glock_dq_uninit(&ip->i_alloc.al_ri_gh); | 788 | gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh); |
790 | return error; | 789 | return error; |
791 | } | 790 | } |
792 | 791 | ||
@@ -879,7 +878,6 @@ static int gfs2_block_truncate_page(struct address_space *mapping) | |||
879 | { | 878 | { |
880 | struct inode *inode = mapping->host; | 879 | struct inode *inode = mapping->host; |
881 | struct gfs2_inode *ip = GFS2_I(inode); | 880 | struct gfs2_inode *ip = GFS2_I(inode); |
882 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
883 | loff_t from = inode->i_size; | 881 | loff_t from = inode->i_size; |
884 | unsigned long index = from >> PAGE_CACHE_SHIFT; | 882 | unsigned long index = from >> PAGE_CACHE_SHIFT; |
885 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 883 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
@@ -911,7 +909,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping) | |||
911 | err = 0; | 909 | err = 0; |
912 | 910 | ||
913 | if (!buffer_mapped(bh)) { | 911 | if (!buffer_mapped(bh)) { |
914 | gfs2_get_block(inode, iblock, bh, 0); | 912 | gfs2_block_map(inode, iblock, bh, 0); |
915 | /* unmapped? It's a hole - nothing to do */ | 913 | /* unmapped? It's a hole - nothing to do */ |
916 | if (!buffer_mapped(bh)) | 914 | if (!buffer_mapped(bh)) |
917 | goto unlock; | 915 | goto unlock; |
@@ -931,7 +929,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping) | |||
931 | err = 0; | 929 | err = 0; |
932 | } | 930 | } |
933 | 931 | ||
934 | if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) | 932 | if (!gfs2_is_writeback(ip)) |
935 | gfs2_trans_add_bh(ip->i_gl, bh, 0); | 933 | gfs2_trans_add_bh(ip->i_gl, bh, 0); |
936 | 934 | ||
937 | zero_user_page(page, offset, length, KM_USER0); | 935 | zero_user_page(page, offset, length, KM_USER0); |
@@ -1224,8 +1222,13 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | |||
1224 | do_div(lblock_stop, bsize); | 1222 | do_div(lblock_stop, bsize); |
1225 | } else { | 1223 | } else { |
1226 | unsigned int shift = sdp->sd_sb.sb_bsize_shift; | 1224 | unsigned int shift = sdp->sd_sb.sb_bsize_shift; |
1225 | u64 end_of_file = (ip->i_di.di_size + sdp->sd_sb.sb_bsize - 1) >> shift; | ||
1227 | lblock = offset >> shift; | 1226 | lblock = offset >> shift; |
1228 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; | 1227 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; |
1228 | if (lblock_stop > end_of_file) { | ||
1229 | *alloc_required = 1; | ||
1230 | return 0; | ||
1231 | } | ||
1229 | } | 1232 | } |
1230 | 1233 | ||
1231 | for (; lblock < lblock_stop; lblock += extlen) { | 1234 | for (; lblock < lblock_stop; lblock += extlen) { |
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h index ac2fd04370dc..4e6cde2943bd 100644 --- a/fs/gfs2/bmap.h +++ b/fs/gfs2/bmap.h | |||
@@ -15,7 +15,7 @@ struct gfs2_inode; | |||
15 | struct page; | 15 | struct page; |
16 | 16 | ||
17 | int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page); | 17 | int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page); |
18 | int gfs2_block_map(struct inode *inode, u64 lblock, int create, struct buffer_head *bh); | 18 | int gfs2_block_map(struct inode *inode, sector_t lblock, struct buffer_head *bh, int create); |
19 | int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen); | 19 | int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen); |
20 | 20 | ||
21 | int gfs2_truncatei(struct gfs2_inode *ip, u64 size); | 21 | int gfs2_truncatei(struct gfs2_inode *ip, u64 size); |
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c index 3731ab0771d5..e51991947d2c 100644 --- a/fs/gfs2/daemon.c +++ b/fs/gfs2/daemon.c | |||
@@ -83,56 +83,6 @@ int gfs2_recoverd(void *data) | |||
83 | } | 83 | } |
84 | 84 | ||
85 | /** | 85 | /** |
86 | * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks | ||
87 | * @sdp: Pointer to GFS2 superblock | ||
88 | * | ||
89 | * Also, periodically check to make sure that we're using the most recent | ||
90 | * journal index. | ||
91 | */ | ||
92 | |||
93 | int gfs2_logd(void *data) | ||
94 | { | ||
95 | struct gfs2_sbd *sdp = data; | ||
96 | struct gfs2_holder ji_gh; | ||
97 | unsigned long t; | ||
98 | int need_flush; | ||
99 | |||
100 | while (!kthread_should_stop()) { | ||
101 | /* Advance the log tail */ | ||
102 | |||
103 | t = sdp->sd_log_flush_time + | ||
104 | gfs2_tune_get(sdp, gt_log_flush_secs) * HZ; | ||
105 | |||
106 | gfs2_ail1_empty(sdp, DIO_ALL); | ||
107 | gfs2_log_lock(sdp); | ||
108 | need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks); | ||
109 | gfs2_log_unlock(sdp); | ||
110 | if (need_flush || time_after_eq(jiffies, t)) { | ||
111 | gfs2_log_flush(sdp, NULL); | ||
112 | sdp->sd_log_flush_time = jiffies; | ||
113 | } | ||
114 | |||
115 | /* Check for latest journal index */ | ||
116 | |||
117 | t = sdp->sd_jindex_refresh_time + | ||
118 | gfs2_tune_get(sdp, gt_jindex_refresh_secs) * HZ; | ||
119 | |||
120 | if (time_after_eq(jiffies, t)) { | ||
121 | if (!gfs2_jindex_hold(sdp, &ji_gh)) | ||
122 | gfs2_glock_dq_uninit(&ji_gh); | ||
123 | sdp->sd_jindex_refresh_time = jiffies; | ||
124 | } | ||
125 | |||
126 | t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; | ||
127 | if (freezing(current)) | ||
128 | refrigerator(); | ||
129 | schedule_timeout_interruptible(t); | ||
130 | } | ||
131 | |||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | /** | ||
136 | * gfs2_quotad - Write cached quota changes into the quota file | 86 | * gfs2_quotad - Write cached quota changes into the quota file |
137 | * @sdp: Pointer to GFS2 superblock | 87 | * @sdp: Pointer to GFS2 superblock |
138 | * | 88 | * |
diff --git a/fs/gfs2/daemon.h b/fs/gfs2/daemon.h index 0de9b3557955..4be084fb6a62 100644 --- a/fs/gfs2/daemon.h +++ b/fs/gfs2/daemon.h | |||
@@ -12,7 +12,6 @@ | |||
12 | 12 | ||
13 | int gfs2_glockd(void *data); | 13 | int gfs2_glockd(void *data); |
14 | int gfs2_recoverd(void *data); | 14 | int gfs2_recoverd(void *data); |
15 | int gfs2_logd(void *data); | ||
16 | int gfs2_quotad(void *data); | 15 | int gfs2_quotad(void *data); |
17 | 16 | ||
18 | #endif /* __DAEMON_DOT_H__ */ | 17 | #endif /* __DAEMON_DOT_H__ */ |
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 9949bb746a52..57e2ed932adc 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -1876,7 +1876,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, | |||
1876 | if (error) | 1876 | if (error) |
1877 | goto out; | 1877 | goto out; |
1878 | 1878 | ||
1879 | error = gfs2_rindex_hold(sdp, &dip->i_alloc.al_ri_gh); | 1879 | error = gfs2_rindex_hold(sdp, &dip->i_alloc->al_ri_gh); |
1880 | if (error) | 1880 | if (error) |
1881 | goto out_qs; | 1881 | goto out_qs; |
1882 | 1882 | ||
@@ -1949,7 +1949,7 @@ out_rg_gunlock: | |||
1949 | gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); | 1949 | gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); |
1950 | out_rlist: | 1950 | out_rlist: |
1951 | gfs2_rlist_free(&rlist); | 1951 | gfs2_rlist_free(&rlist); |
1952 | gfs2_glock_dq_uninit(&dip->i_alloc.al_ri_gh); | 1952 | gfs2_glock_dq_uninit(&dip->i_alloc->al_ri_gh); |
1953 | out_qs: | 1953 | out_qs: |
1954 | gfs2_quota_unhold(dip); | 1954 | gfs2_quota_unhold(dip); |
1955 | out: | 1955 | out: |
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c index aa8dbf303f6d..f114ba2b3557 100644 --- a/fs/gfs2/eaops.c +++ b/fs/gfs2/eaops.c | |||
@@ -56,46 +56,6 @@ unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name) | |||
56 | return type; | 56 | return type; |
57 | } | 57 | } |
58 | 58 | ||
59 | static int user_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
60 | { | ||
61 | struct inode *inode = &ip->i_inode; | ||
62 | int error = permission(inode, MAY_READ, NULL); | ||
63 | if (error) | ||
64 | return error; | ||
65 | |||
66 | return gfs2_ea_get_i(ip, er); | ||
67 | } | ||
68 | |||
69 | static int user_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
70 | { | ||
71 | struct inode *inode = &ip->i_inode; | ||
72 | |||
73 | if (S_ISREG(inode->i_mode) || | ||
74 | (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) { | ||
75 | int error = permission(inode, MAY_WRITE, NULL); | ||
76 | if (error) | ||
77 | return error; | ||
78 | } else | ||
79 | return -EPERM; | ||
80 | |||
81 | return gfs2_ea_set_i(ip, er); | ||
82 | } | ||
83 | |||
84 | static int user_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
85 | { | ||
86 | struct inode *inode = &ip->i_inode; | ||
87 | |||
88 | if (S_ISREG(inode->i_mode) || | ||
89 | (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) { | ||
90 | int error = permission(inode, MAY_WRITE, NULL); | ||
91 | if (error) | ||
92 | return error; | ||
93 | } else | ||
94 | return -EPERM; | ||
95 | |||
96 | return gfs2_ea_remove_i(ip, er); | ||
97 | } | ||
98 | |||
99 | static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) | 59 | static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) |
100 | { | 60 | { |
101 | if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) && | 61 | if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) && |
@@ -108,8 +68,6 @@ static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) | |||
108 | GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len))) | 68 | GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len))) |
109 | return -EOPNOTSUPP; | 69 | return -EOPNOTSUPP; |
110 | 70 | ||
111 | |||
112 | |||
113 | return gfs2_ea_get_i(ip, er); | 71 | return gfs2_ea_get_i(ip, er); |
114 | } | 72 | } |
115 | 73 | ||
@@ -170,40 +128,10 @@ static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) | |||
170 | return gfs2_ea_remove_i(ip, er); | 128 | return gfs2_ea_remove_i(ip, er); |
171 | } | 129 | } |
172 | 130 | ||
173 | static int security_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
174 | { | ||
175 | struct inode *inode = &ip->i_inode; | ||
176 | int error = permission(inode, MAY_READ, NULL); | ||
177 | if (error) | ||
178 | return error; | ||
179 | |||
180 | return gfs2_ea_get_i(ip, er); | ||
181 | } | ||
182 | |||
183 | static int security_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
184 | { | ||
185 | struct inode *inode = &ip->i_inode; | ||
186 | int error = permission(inode, MAY_WRITE, NULL); | ||
187 | if (error) | ||
188 | return error; | ||
189 | |||
190 | return gfs2_ea_set_i(ip, er); | ||
191 | } | ||
192 | |||
193 | static int security_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) | ||
194 | { | ||
195 | struct inode *inode = &ip->i_inode; | ||
196 | int error = permission(inode, MAY_WRITE, NULL); | ||
197 | if (error) | ||
198 | return error; | ||
199 | |||
200 | return gfs2_ea_remove_i(ip, er); | ||
201 | } | ||
202 | |||
203 | static const struct gfs2_eattr_operations gfs2_user_eaops = { | 131 | static const struct gfs2_eattr_operations gfs2_user_eaops = { |
204 | .eo_get = user_eo_get, | 132 | .eo_get = gfs2_ea_get_i, |
205 | .eo_set = user_eo_set, | 133 | .eo_set = gfs2_ea_set_i, |
206 | .eo_remove = user_eo_remove, | 134 | .eo_remove = gfs2_ea_remove_i, |
207 | .eo_name = "user", | 135 | .eo_name = "user", |
208 | }; | 136 | }; |
209 | 137 | ||
@@ -215,9 +143,9 @@ const struct gfs2_eattr_operations gfs2_system_eaops = { | |||
215 | }; | 143 | }; |
216 | 144 | ||
217 | static const struct gfs2_eattr_operations gfs2_security_eaops = { | 145 | static const struct gfs2_eattr_operations gfs2_security_eaops = { |
218 | .eo_get = security_eo_get, | 146 | .eo_get = gfs2_ea_get_i, |
219 | .eo_set = security_eo_set, | 147 | .eo_set = gfs2_ea_set_i, |
220 | .eo_remove = security_eo_remove, | 148 | .eo_remove = gfs2_ea_remove_i, |
221 | .eo_name = "security", | 149 | .eo_name = "security", |
222 | }; | 150 | }; |
223 | 151 | ||
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index 2a7435b5c4dc..bee99704ea10 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c | |||
@@ -1418,7 +1418,7 @@ out: | |||
1418 | static int ea_dealloc_block(struct gfs2_inode *ip) | 1418 | static int ea_dealloc_block(struct gfs2_inode *ip) |
1419 | { | 1419 | { |
1420 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1420 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1421 | struct gfs2_alloc *al = &ip->i_alloc; | 1421 | struct gfs2_alloc *al = ip->i_alloc; |
1422 | struct gfs2_rgrpd *rgd; | 1422 | struct gfs2_rgrpd *rgd; |
1423 | struct buffer_head *dibh; | 1423 | struct buffer_head *dibh; |
1424 | int error; | 1424 | int error; |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a37efe4aae6f..80e09c50590a 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -217,7 +217,6 @@ int gfs2_glock_put(struct gfs2_glock *gl) | |||
217 | if (atomic_dec_and_test(&gl->gl_ref)) { | 217 | if (atomic_dec_and_test(&gl->gl_ref)) { |
218 | hlist_del(&gl->gl_list); | 218 | hlist_del(&gl->gl_list); |
219 | write_unlock(gl_lock_addr(gl->gl_hash)); | 219 | write_unlock(gl_lock_addr(gl->gl_hash)); |
220 | BUG_ON(spin_is_locked(&gl->gl_spin)); | ||
221 | gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED); | 220 | gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED); |
222 | gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); | 221 | gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); |
223 | gfs2_assert(sdp, list_empty(&gl->gl_holders)); | 222 | gfs2_assert(sdp, list_empty(&gl->gl_holders)); |
@@ -346,7 +345,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | |||
346 | gl->gl_object = NULL; | 345 | gl->gl_object = NULL; |
347 | gl->gl_sbd = sdp; | 346 | gl->gl_sbd = sdp; |
348 | gl->gl_aspace = NULL; | 347 | gl->gl_aspace = NULL; |
349 | lops_init_le(&gl->gl_le, &gfs2_glock_lops); | ||
350 | INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); | 348 | INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); |
351 | 349 | ||
352 | /* If this glock protects actual on-disk data or metadata blocks, | 350 | /* If this glock protects actual on-disk data or metadata blocks, |
@@ -461,7 +459,6 @@ static void wait_on_holder(struct gfs2_holder *gh) | |||
461 | 459 | ||
462 | static void gfs2_demote_wake(struct gfs2_glock *gl) | 460 | static void gfs2_demote_wake(struct gfs2_glock *gl) |
463 | { | 461 | { |
464 | BUG_ON(!spin_is_locked(&gl->gl_spin)); | ||
465 | gl->gl_demote_state = LM_ST_EXCLUSIVE; | 462 | gl->gl_demote_state = LM_ST_EXCLUSIVE; |
466 | clear_bit(GLF_DEMOTE, &gl->gl_flags); | 463 | clear_bit(GLF_DEMOTE, &gl->gl_flags); |
467 | smp_mb__after_clear_bit(); | 464 | smp_mb__after_clear_bit(); |
@@ -507,21 +504,12 @@ static int rq_mutex(struct gfs2_holder *gh) | |||
507 | static int rq_promote(struct gfs2_holder *gh) | 504 | static int rq_promote(struct gfs2_holder *gh) |
508 | { | 505 | { |
509 | struct gfs2_glock *gl = gh->gh_gl; | 506 | struct gfs2_glock *gl = gh->gh_gl; |
510 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
511 | 507 | ||
512 | if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { | 508 | if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { |
513 | if (list_empty(&gl->gl_holders)) { | 509 | if (list_empty(&gl->gl_holders)) { |
514 | gl->gl_req_gh = gh; | 510 | gl->gl_req_gh = gh; |
515 | set_bit(GLF_LOCK, &gl->gl_flags); | 511 | set_bit(GLF_LOCK, &gl->gl_flags); |
516 | spin_unlock(&gl->gl_spin); | 512 | spin_unlock(&gl->gl_spin); |
517 | |||
518 | if (atomic_read(&sdp->sd_reclaim_count) > | ||
519 | gfs2_tune_get(sdp, gt_reclaim_limit) && | ||
520 | !(gh->gh_flags & LM_FLAG_PRIORITY)) { | ||
521 | gfs2_reclaim_glock(sdp); | ||
522 | gfs2_reclaim_glock(sdp); | ||
523 | } | ||
524 | |||
525 | gfs2_glock_xmote_th(gh->gh_gl, gh); | 513 | gfs2_glock_xmote_th(gh->gh_gl, gh); |
526 | spin_lock(&gl->gl_spin); | 514 | spin_lock(&gl->gl_spin); |
527 | } | 515 | } |
@@ -567,7 +555,10 @@ static int rq_demote(struct gfs2_glock *gl) | |||
567 | gfs2_demote_wake(gl); | 555 | gfs2_demote_wake(gl); |
568 | return 0; | 556 | return 0; |
569 | } | 557 | } |
558 | |||
570 | set_bit(GLF_LOCK, &gl->gl_flags); | 559 | set_bit(GLF_LOCK, &gl->gl_flags); |
560 | set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); | ||
561 | |||
571 | if (gl->gl_demote_state == LM_ST_UNLOCKED || | 562 | if (gl->gl_demote_state == LM_ST_UNLOCKED || |
572 | gl->gl_state != LM_ST_EXCLUSIVE) { | 563 | gl->gl_state != LM_ST_EXCLUSIVE) { |
573 | spin_unlock(&gl->gl_spin); | 564 | spin_unlock(&gl->gl_spin); |
@@ -576,7 +567,9 @@ static int rq_demote(struct gfs2_glock *gl) | |||
576 | spin_unlock(&gl->gl_spin); | 567 | spin_unlock(&gl->gl_spin); |
577 | gfs2_glock_xmote_th(gl, NULL); | 568 | gfs2_glock_xmote_th(gl, NULL); |
578 | } | 569 | } |
570 | |||
579 | spin_lock(&gl->gl_spin); | 571 | spin_lock(&gl->gl_spin); |
572 | clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); | ||
580 | 573 | ||
581 | return 0; | 574 | return 0; |
582 | } | 575 | } |
@@ -598,23 +591,18 @@ static void run_queue(struct gfs2_glock *gl) | |||
598 | if (!list_empty(&gl->gl_waiters1)) { | 591 | if (!list_empty(&gl->gl_waiters1)) { |
599 | gh = list_entry(gl->gl_waiters1.next, | 592 | gh = list_entry(gl->gl_waiters1.next, |
600 | struct gfs2_holder, gh_list); | 593 | struct gfs2_holder, gh_list); |
601 | 594 | blocked = rq_mutex(gh); | |
602 | if (test_bit(HIF_MUTEX, &gh->gh_iflags)) | ||
603 | blocked = rq_mutex(gh); | ||
604 | else | ||
605 | gfs2_assert_warn(gl->gl_sbd, 0); | ||
606 | |||
607 | } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { | 595 | } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { |
608 | blocked = rq_demote(gl); | 596 | blocked = rq_demote(gl); |
597 | if (gl->gl_waiters2 && !blocked) { | ||
598 | set_bit(GLF_DEMOTE, &gl->gl_flags); | ||
599 | gl->gl_demote_state = LM_ST_UNLOCKED; | ||
600 | } | ||
601 | gl->gl_waiters2 = 0; | ||
609 | } else if (!list_empty(&gl->gl_waiters3)) { | 602 | } else if (!list_empty(&gl->gl_waiters3)) { |
610 | gh = list_entry(gl->gl_waiters3.next, | 603 | gh = list_entry(gl->gl_waiters3.next, |
611 | struct gfs2_holder, gh_list); | 604 | struct gfs2_holder, gh_list); |
612 | 605 | blocked = rq_promote(gh); | |
613 | if (test_bit(HIF_PROMOTE, &gh->gh_iflags)) | ||
614 | blocked = rq_promote(gh); | ||
615 | else | ||
616 | gfs2_assert_warn(gl->gl_sbd, 0); | ||
617 | |||
618 | } else | 606 | } else |
619 | break; | 607 | break; |
620 | 608 | ||
@@ -632,27 +620,21 @@ static void run_queue(struct gfs2_glock *gl) | |||
632 | 620 | ||
633 | static void gfs2_glmutex_lock(struct gfs2_glock *gl) | 621 | static void gfs2_glmutex_lock(struct gfs2_glock *gl) |
634 | { | 622 | { |
635 | struct gfs2_holder gh; | ||
636 | |||
637 | gfs2_holder_init(gl, 0, 0, &gh); | ||
638 | set_bit(HIF_MUTEX, &gh.gh_iflags); | ||
639 | if (test_and_set_bit(HIF_WAIT, &gh.gh_iflags)) | ||
640 | BUG(); | ||
641 | |||
642 | spin_lock(&gl->gl_spin); | 623 | spin_lock(&gl->gl_spin); |
643 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { | 624 | if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { |
625 | struct gfs2_holder gh; | ||
626 | |||
627 | gfs2_holder_init(gl, 0, 0, &gh); | ||
628 | set_bit(HIF_WAIT, &gh.gh_iflags); | ||
644 | list_add_tail(&gh.gh_list, &gl->gl_waiters1); | 629 | list_add_tail(&gh.gh_list, &gl->gl_waiters1); |
630 | spin_unlock(&gl->gl_spin); | ||
631 | wait_on_holder(&gh); | ||
632 | gfs2_holder_uninit(&gh); | ||
645 | } else { | 633 | } else { |
646 | gl->gl_owner_pid = current->pid; | 634 | gl->gl_owner_pid = current->pid; |
647 | gl->gl_ip = (unsigned long)__builtin_return_address(0); | 635 | gl->gl_ip = (unsigned long)__builtin_return_address(0); |
648 | clear_bit(HIF_WAIT, &gh.gh_iflags); | 636 | spin_unlock(&gl->gl_spin); |
649 | smp_mb(); | ||
650 | wake_up_bit(&gh.gh_iflags, HIF_WAIT); | ||
651 | } | 637 | } |
652 | spin_unlock(&gl->gl_spin); | ||
653 | |||
654 | wait_on_holder(&gh); | ||
655 | gfs2_holder_uninit(&gh); | ||
656 | } | 638 | } |
657 | 639 | ||
658 | /** | 640 | /** |
@@ -691,7 +673,6 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl) | |||
691 | gl->gl_owner_pid = 0; | 673 | gl->gl_owner_pid = 0; |
692 | gl->gl_ip = 0; | 674 | gl->gl_ip = 0; |
693 | run_queue(gl); | 675 | run_queue(gl); |
694 | BUG_ON(!spin_is_locked(&gl->gl_spin)); | ||
695 | spin_unlock(&gl->gl_spin); | 676 | spin_unlock(&gl->gl_spin); |
696 | } | 677 | } |
697 | 678 | ||
@@ -722,7 +703,10 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, | |||
722 | } | 703 | } |
723 | } else if (gl->gl_demote_state != LM_ST_UNLOCKED && | 704 | } else if (gl->gl_demote_state != LM_ST_UNLOCKED && |
724 | gl->gl_demote_state != state) { | 705 | gl->gl_demote_state != state) { |
725 | gl->gl_demote_state = LM_ST_UNLOCKED; | 706 | if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) |
707 | gl->gl_waiters2 = 1; | ||
708 | else | ||
709 | gl->gl_demote_state = LM_ST_UNLOCKED; | ||
726 | } | 710 | } |
727 | spin_unlock(&gl->gl_spin); | 711 | spin_unlock(&gl->gl_spin); |
728 | } | 712 | } |
@@ -943,8 +927,8 @@ static void gfs2_glock_drop_th(struct gfs2_glock *gl) | |||
943 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 927 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
944 | unsigned int ret; | 928 | unsigned int ret; |
945 | 929 | ||
946 | if (glops->go_drop_th) | 930 | if (glops->go_xmote_th) |
947 | glops->go_drop_th(gl); | 931 | glops->go_xmote_th(gl); |
948 | 932 | ||
949 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | 933 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); |
950 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | 934 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); |
@@ -1156,8 +1140,6 @@ restart: | |||
1156 | return -EIO; | 1140 | return -EIO; |
1157 | } | 1141 | } |
1158 | 1142 | ||
1159 | set_bit(HIF_PROMOTE, &gh->gh_iflags); | ||
1160 | |||
1161 | spin_lock(&gl->gl_spin); | 1143 | spin_lock(&gl->gl_spin); |
1162 | add_to_queue(gh); | 1144 | add_to_queue(gh); |
1163 | run_queue(gl); | 1145 | run_queue(gl); |
@@ -1248,12 +1230,11 @@ void gfs2_glock_dq(struct gfs2_holder *gh) | |||
1248 | list_del_init(&gh->gh_list); | 1230 | list_del_init(&gh->gh_list); |
1249 | 1231 | ||
1250 | if (list_empty(&gl->gl_holders)) { | 1232 | if (list_empty(&gl->gl_holders)) { |
1251 | spin_unlock(&gl->gl_spin); | 1233 | if (glops->go_unlock) { |
1252 | 1234 | spin_unlock(&gl->gl_spin); | |
1253 | if (glops->go_unlock) | ||
1254 | glops->go_unlock(gh); | 1235 | glops->go_unlock(gh); |
1255 | 1236 | spin_lock(&gl->gl_spin); | |
1256 | spin_lock(&gl->gl_spin); | 1237 | } |
1257 | gl->gl_stamp = jiffies; | 1238 | gl->gl_stamp = jiffies; |
1258 | } | 1239 | } |
1259 | 1240 | ||
@@ -1910,8 +1891,6 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) | |||
1910 | print_dbg(gi, " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); | 1891 | print_dbg(gi, " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); |
1911 | print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); | 1892 | print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); |
1912 | print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); | 1893 | print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); |
1913 | print_dbg(gi, " le = %s\n", | ||
1914 | (list_empty(&gl->gl_le.le_list)) ? "no" : "yes"); | ||
1915 | print_dbg(gi, " reclaim = %s\n", | 1894 | print_dbg(gi, " reclaim = %s\n", |
1916 | (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); | 1895 | (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); |
1917 | if (gl->gl_aspace) | 1896 | if (gl->gl_aspace) |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 4670dcb2a877..c663b7a0f410 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -56,7 +56,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) | |||
56 | bd = list_entry(head->next, struct gfs2_bufdata, | 56 | bd = list_entry(head->next, struct gfs2_bufdata, |
57 | bd_ail_gl_list); | 57 | bd_ail_gl_list); |
58 | bh = bd->bd_bh; | 58 | bh = bd->bd_bh; |
59 | gfs2_remove_from_ail(NULL, bd); | 59 | gfs2_remove_from_ail(bd); |
60 | bd->bd_bh = NULL; | 60 | bd->bd_bh = NULL; |
61 | bh->b_private = NULL; | 61 | bh->b_private = NULL; |
62 | bd->bd_blkno = bh->b_blocknr; | 62 | bd->bd_blkno = bh->b_blocknr; |
@@ -86,15 +86,10 @@ static void gfs2_pte_inval(struct gfs2_glock *gl) | |||
86 | if (!ip || !S_ISREG(inode->i_mode)) | 86 | if (!ip || !S_ISREG(inode->i_mode)) |
87 | return; | 87 | return; |
88 | 88 | ||
89 | if (!test_bit(GIF_PAGED, &ip->i_flags)) | ||
90 | return; | ||
91 | |||
92 | unmap_shared_mapping_range(inode->i_mapping, 0, 0); | 89 | unmap_shared_mapping_range(inode->i_mapping, 0, 0); |
93 | |||
94 | if (test_bit(GIF_SW_PAGED, &ip->i_flags)) | 90 | if (test_bit(GIF_SW_PAGED, &ip->i_flags)) |
95 | set_bit(GLF_DIRTY, &gl->gl_flags); | 91 | set_bit(GLF_DIRTY, &gl->gl_flags); |
96 | 92 | ||
97 | clear_bit(GIF_SW_PAGED, &ip->i_flags); | ||
98 | } | 93 | } |
99 | 94 | ||
100 | /** | 95 | /** |
@@ -143,44 +138,34 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags) | |||
143 | static void inode_go_sync(struct gfs2_glock *gl) | 138 | static void inode_go_sync(struct gfs2_glock *gl) |
144 | { | 139 | { |
145 | struct gfs2_inode *ip = gl->gl_object; | 140 | struct gfs2_inode *ip = gl->gl_object; |
141 | struct address_space *metamapping = gl->gl_aspace->i_mapping; | ||
142 | int error; | ||
143 | |||
144 | if (gl->gl_state != LM_ST_UNLOCKED) | ||
145 | gfs2_pte_inval(gl); | ||
146 | if (gl->gl_state != LM_ST_EXCLUSIVE) | ||
147 | return; | ||
146 | 148 | ||
147 | if (ip && !S_ISREG(ip->i_inode.i_mode)) | 149 | if (ip && !S_ISREG(ip->i_inode.i_mode)) |
148 | ip = NULL; | 150 | ip = NULL; |
149 | 151 | ||
150 | if (test_bit(GLF_DIRTY, &gl->gl_flags)) { | 152 | if (test_bit(GLF_DIRTY, &gl->gl_flags)) { |
151 | if (ip && !gfs2_is_jdata(ip)) | ||
152 | filemap_fdatawrite(ip->i_inode.i_mapping); | ||
153 | gfs2_log_flush(gl->gl_sbd, gl); | 153 | gfs2_log_flush(gl->gl_sbd, gl); |
154 | if (ip && gfs2_is_jdata(ip)) | 154 | filemap_fdatawrite(metamapping); |
155 | filemap_fdatawrite(ip->i_inode.i_mapping); | ||
156 | gfs2_meta_sync(gl); | ||
157 | if (ip) { | 155 | if (ip) { |
158 | struct address_space *mapping = ip->i_inode.i_mapping; | 156 | struct address_space *mapping = ip->i_inode.i_mapping; |
159 | int error = filemap_fdatawait(mapping); | 157 | filemap_fdatawrite(mapping); |
158 | error = filemap_fdatawait(mapping); | ||
160 | mapping_set_error(mapping, error); | 159 | mapping_set_error(mapping, error); |
161 | } | 160 | } |
161 | error = filemap_fdatawait(metamapping); | ||
162 | mapping_set_error(metamapping, error); | ||
162 | clear_bit(GLF_DIRTY, &gl->gl_flags); | 163 | clear_bit(GLF_DIRTY, &gl->gl_flags); |
163 | gfs2_ail_empty_gl(gl); | 164 | gfs2_ail_empty_gl(gl); |
164 | } | 165 | } |
165 | } | 166 | } |
166 | 167 | ||
167 | /** | 168 | /** |
168 | * inode_go_xmote_th - promote/demote a glock | ||
169 | * @gl: the glock | ||
170 | * @state: the requested state | ||
171 | * @flags: | ||
172 | * | ||
173 | */ | ||
174 | |||
175 | static void inode_go_xmote_th(struct gfs2_glock *gl) | ||
176 | { | ||
177 | if (gl->gl_state != LM_ST_UNLOCKED) | ||
178 | gfs2_pte_inval(gl); | ||
179 | if (gl->gl_state == LM_ST_EXCLUSIVE) | ||
180 | inode_go_sync(gl); | ||
181 | } | ||
182 | |||
183 | /** | ||
184 | * inode_go_xmote_bh - After promoting/demoting a glock | 169 | * inode_go_xmote_bh - After promoting/demoting a glock |
185 | * @gl: the glock | 170 | * @gl: the glock |
186 | * | 171 | * |
@@ -201,22 +186,6 @@ static void inode_go_xmote_bh(struct gfs2_glock *gl) | |||
201 | } | 186 | } |
202 | 187 | ||
203 | /** | 188 | /** |
204 | * inode_go_drop_th - unlock a glock | ||
205 | * @gl: the glock | ||
206 | * | ||
207 | * Invoked from rq_demote(). | ||
208 | * Another node needs the lock in EXCLUSIVE mode, or lock (unused for too long) | ||
209 | * is being purged from our node's glock cache; we're dropping lock. | ||
210 | */ | ||
211 | |||
212 | static void inode_go_drop_th(struct gfs2_glock *gl) | ||
213 | { | ||
214 | gfs2_pte_inval(gl); | ||
215 | if (gl->gl_state == LM_ST_EXCLUSIVE) | ||
216 | inode_go_sync(gl); | ||
217 | } | ||
218 | |||
219 | /** | ||
220 | * inode_go_inval - prepare a inode glock to be released | 189 | * inode_go_inval - prepare a inode glock to be released |
221 | * @gl: the glock | 190 | * @gl: the glock |
222 | * @flags: | 191 | * @flags: |
@@ -234,10 +203,8 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) | |||
234 | set_bit(GIF_INVALID, &ip->i_flags); | 203 | set_bit(GIF_INVALID, &ip->i_flags); |
235 | } | 204 | } |
236 | 205 | ||
237 | if (ip && S_ISREG(ip->i_inode.i_mode)) { | 206 | if (ip && S_ISREG(ip->i_inode.i_mode)) |
238 | truncate_inode_pages(ip->i_inode.i_mapping, 0); | 207 | truncate_inode_pages(ip->i_inode.i_mapping, 0); |
239 | clear_bit(GIF_PAGED, &ip->i_flags); | ||
240 | } | ||
241 | } | 208 | } |
242 | 209 | ||
243 | /** | 210 | /** |
@@ -294,23 +261,6 @@ static int inode_go_lock(struct gfs2_holder *gh) | |||
294 | } | 261 | } |
295 | 262 | ||
296 | /** | 263 | /** |
297 | * inode_go_unlock - operation done before an inode lock is unlocked by a | ||
298 | * process | ||
299 | * @gl: the glock | ||
300 | * @flags: | ||
301 | * | ||
302 | */ | ||
303 | |||
304 | static void inode_go_unlock(struct gfs2_holder *gh) | ||
305 | { | ||
306 | struct gfs2_glock *gl = gh->gh_gl; | ||
307 | struct gfs2_inode *ip = gl->gl_object; | ||
308 | |||
309 | if (ip) | ||
310 | gfs2_meta_cache_flush(ip); | ||
311 | } | ||
312 | |||
313 | /** | ||
314 | * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock | 264 | * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock |
315 | * @gl: the glock | 265 | * @gl: the glock |
316 | * | 266 | * |
@@ -350,14 +300,14 @@ static void rgrp_go_unlock(struct gfs2_holder *gh) | |||
350 | } | 300 | } |
351 | 301 | ||
352 | /** | 302 | /** |
353 | * trans_go_xmote_th - promote/demote the transaction glock | 303 | * trans_go_sync - promote/demote the transaction glock |
354 | * @gl: the glock | 304 | * @gl: the glock |
355 | * @state: the requested state | 305 | * @state: the requested state |
356 | * @flags: | 306 | * @flags: |
357 | * | 307 | * |
358 | */ | 308 | */ |
359 | 309 | ||
360 | static void trans_go_xmote_th(struct gfs2_glock *gl) | 310 | static void trans_go_sync(struct gfs2_glock *gl) |
361 | { | 311 | { |
362 | struct gfs2_sbd *sdp = gl->gl_sbd; | 312 | struct gfs2_sbd *sdp = gl->gl_sbd; |
363 | 313 | ||
@@ -384,7 +334,6 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl) | |||
384 | 334 | ||
385 | if (gl->gl_state != LM_ST_UNLOCKED && | 335 | if (gl->gl_state != LM_ST_UNLOCKED && |
386 | test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { | 336 | test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { |
387 | gfs2_meta_cache_flush(GFS2_I(sdp->sd_jdesc->jd_inode)); | ||
388 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); | 337 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); |
389 | 338 | ||
390 | error = gfs2_find_jhead(sdp->sd_jdesc, &head); | 339 | error = gfs2_find_jhead(sdp->sd_jdesc, &head); |
@@ -402,24 +351,6 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl) | |||
402 | } | 351 | } |
403 | 352 | ||
404 | /** | 353 | /** |
405 | * trans_go_drop_th - unlock the transaction glock | ||
406 | * @gl: the glock | ||
407 | * | ||
408 | * We want to sync the device even with localcaching. Remember | ||
409 | * that localcaching journal replay only marks buffers dirty. | ||
410 | */ | ||
411 | |||
412 | static void trans_go_drop_th(struct gfs2_glock *gl) | ||
413 | { | ||
414 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
415 | |||
416 | if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { | ||
417 | gfs2_meta_syncfs(sdp); | ||
418 | gfs2_log_shutdown(sdp); | ||
419 | } | ||
420 | } | ||
421 | |||
422 | /** | ||
423 | * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock | 354 | * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock |
424 | * @gl: the glock | 355 | * @gl: the glock |
425 | * | 356 | * |
@@ -433,25 +364,21 @@ static int quota_go_demote_ok(struct gfs2_glock *gl) | |||
433 | 364 | ||
434 | const struct gfs2_glock_operations gfs2_meta_glops = { | 365 | const struct gfs2_glock_operations gfs2_meta_glops = { |
435 | .go_xmote_th = meta_go_sync, | 366 | .go_xmote_th = meta_go_sync, |
436 | .go_drop_th = meta_go_sync, | ||
437 | .go_type = LM_TYPE_META, | 367 | .go_type = LM_TYPE_META, |
438 | }; | 368 | }; |
439 | 369 | ||
440 | const struct gfs2_glock_operations gfs2_inode_glops = { | 370 | const struct gfs2_glock_operations gfs2_inode_glops = { |
441 | .go_xmote_th = inode_go_xmote_th, | 371 | .go_xmote_th = inode_go_sync, |
442 | .go_xmote_bh = inode_go_xmote_bh, | 372 | .go_xmote_bh = inode_go_xmote_bh, |
443 | .go_drop_th = inode_go_drop_th, | ||
444 | .go_inval = inode_go_inval, | 373 | .go_inval = inode_go_inval, |
445 | .go_demote_ok = inode_go_demote_ok, | 374 | .go_demote_ok = inode_go_demote_ok, |
446 | .go_lock = inode_go_lock, | 375 | .go_lock = inode_go_lock, |
447 | .go_unlock = inode_go_unlock, | ||
448 | .go_type = LM_TYPE_INODE, | 376 | .go_type = LM_TYPE_INODE, |
449 | .go_min_hold_time = HZ / 10, | 377 | .go_min_hold_time = HZ / 10, |
450 | }; | 378 | }; |
451 | 379 | ||
452 | const struct gfs2_glock_operations gfs2_rgrp_glops = { | 380 | const struct gfs2_glock_operations gfs2_rgrp_glops = { |
453 | .go_xmote_th = meta_go_sync, | 381 | .go_xmote_th = meta_go_sync, |
454 | .go_drop_th = meta_go_sync, | ||
455 | .go_inval = meta_go_inval, | 382 | .go_inval = meta_go_inval, |
456 | .go_demote_ok = rgrp_go_demote_ok, | 383 | .go_demote_ok = rgrp_go_demote_ok, |
457 | .go_lock = rgrp_go_lock, | 384 | .go_lock = rgrp_go_lock, |
@@ -461,9 +388,8 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { | |||
461 | }; | 388 | }; |
462 | 389 | ||
463 | const struct gfs2_glock_operations gfs2_trans_glops = { | 390 | const struct gfs2_glock_operations gfs2_trans_glops = { |
464 | .go_xmote_th = trans_go_xmote_th, | 391 | .go_xmote_th = trans_go_sync, |
465 | .go_xmote_bh = trans_go_xmote_bh, | 392 | .go_xmote_bh = trans_go_xmote_bh, |
466 | .go_drop_th = trans_go_drop_th, | ||
467 | .go_type = LM_TYPE_NONDISK, | 393 | .go_type = LM_TYPE_NONDISK, |
468 | }; | 394 | }; |
469 | 395 | ||
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index eaddfb5a8e6f..513aaf0dc0ab 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -131,7 +131,6 @@ struct gfs2_bufdata { | |||
131 | struct gfs2_glock_operations { | 131 | struct gfs2_glock_operations { |
132 | void (*go_xmote_th) (struct gfs2_glock *gl); | 132 | void (*go_xmote_th) (struct gfs2_glock *gl); |
133 | void (*go_xmote_bh) (struct gfs2_glock *gl); | 133 | void (*go_xmote_bh) (struct gfs2_glock *gl); |
134 | void (*go_drop_th) (struct gfs2_glock *gl); | ||
135 | void (*go_inval) (struct gfs2_glock *gl, int flags); | 134 | void (*go_inval) (struct gfs2_glock *gl, int flags); |
136 | int (*go_demote_ok) (struct gfs2_glock *gl); | 135 | int (*go_demote_ok) (struct gfs2_glock *gl); |
137 | int (*go_lock) (struct gfs2_holder *gh); | 136 | int (*go_lock) (struct gfs2_holder *gh); |
@@ -141,10 +140,6 @@ struct gfs2_glock_operations { | |||
141 | }; | 140 | }; |
142 | 141 | ||
143 | enum { | 142 | enum { |
144 | /* Actions */ | ||
145 | HIF_MUTEX = 0, | ||
146 | HIF_PROMOTE = 1, | ||
147 | |||
148 | /* States */ | 143 | /* States */ |
149 | HIF_HOLDER = 6, | 144 | HIF_HOLDER = 6, |
150 | HIF_FIRST = 7, | 145 | HIF_FIRST = 7, |
@@ -171,6 +166,8 @@ enum { | |||
171 | GLF_DEMOTE = 3, | 166 | GLF_DEMOTE = 3, |
172 | GLF_PENDING_DEMOTE = 4, | 167 | GLF_PENDING_DEMOTE = 4, |
173 | GLF_DIRTY = 5, | 168 | GLF_DIRTY = 5, |
169 | GLF_DEMOTE_IN_PROGRESS = 6, | ||
170 | GLF_LFLUSH = 7, | ||
174 | }; | 171 | }; |
175 | 172 | ||
176 | struct gfs2_glock { | 173 | struct gfs2_glock { |
@@ -190,6 +187,7 @@ struct gfs2_glock { | |||
190 | struct list_head gl_holders; | 187 | struct list_head gl_holders; |
191 | struct list_head gl_waiters1; /* HIF_MUTEX */ | 188 | struct list_head gl_waiters1; /* HIF_MUTEX */ |
192 | struct list_head gl_waiters3; /* HIF_PROMOTE */ | 189 | struct list_head gl_waiters3; /* HIF_PROMOTE */ |
190 | int gl_waiters2; /* GIF_DEMOTE */ | ||
193 | 191 | ||
194 | const struct gfs2_glock_operations *gl_ops; | 192 | const struct gfs2_glock_operations *gl_ops; |
195 | 193 | ||
@@ -210,7 +208,6 @@ struct gfs2_glock { | |||
210 | struct gfs2_sbd *gl_sbd; | 208 | struct gfs2_sbd *gl_sbd; |
211 | 209 | ||
212 | struct inode *gl_aspace; | 210 | struct inode *gl_aspace; |
213 | struct gfs2_log_element gl_le; | ||
214 | struct list_head gl_ail_list; | 211 | struct list_head gl_ail_list; |
215 | atomic_t gl_ail_count; | 212 | atomic_t gl_ail_count; |
216 | struct delayed_work gl_work; | 213 | struct delayed_work gl_work; |
@@ -239,7 +236,6 @@ struct gfs2_alloc { | |||
239 | enum { | 236 | enum { |
240 | GIF_INVALID = 0, | 237 | GIF_INVALID = 0, |
241 | GIF_QD_LOCKED = 1, | 238 | GIF_QD_LOCKED = 1, |
242 | GIF_PAGED = 2, | ||
243 | GIF_SW_PAGED = 3, | 239 | GIF_SW_PAGED = 3, |
244 | }; | 240 | }; |
245 | 241 | ||
@@ -268,14 +264,10 @@ struct gfs2_inode { | |||
268 | struct gfs2_glock *i_gl; /* Move into i_gh? */ | 264 | struct gfs2_glock *i_gl; /* Move into i_gh? */ |
269 | struct gfs2_holder i_iopen_gh; | 265 | struct gfs2_holder i_iopen_gh; |
270 | struct gfs2_holder i_gh; /* for prepare/commit_write only */ | 266 | struct gfs2_holder i_gh; /* for prepare/commit_write only */ |
271 | struct gfs2_alloc i_alloc; | 267 | struct gfs2_alloc *i_alloc; |
272 | u64 i_last_rg_alloc; | 268 | u64 i_last_rg_alloc; |
273 | 269 | ||
274 | spinlock_t i_spin; | ||
275 | struct rw_semaphore i_rw_mutex; | 270 | struct rw_semaphore i_rw_mutex; |
276 | unsigned long i_last_pfault; | ||
277 | |||
278 | struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT]; | ||
279 | }; | 271 | }; |
280 | 272 | ||
281 | /* | 273 | /* |
@@ -287,19 +279,12 @@ static inline struct gfs2_inode *GFS2_I(struct inode *inode) | |||
287 | return container_of(inode, struct gfs2_inode, i_inode); | 279 | return container_of(inode, struct gfs2_inode, i_inode); |
288 | } | 280 | } |
289 | 281 | ||
290 | /* To be removed? */ | 282 | static inline struct gfs2_sbd *GFS2_SB(const struct inode *inode) |
291 | static inline struct gfs2_sbd *GFS2_SB(struct inode *inode) | ||
292 | { | 283 | { |
293 | return inode->i_sb->s_fs_info; | 284 | return inode->i_sb->s_fs_info; |
294 | } | 285 | } |
295 | 286 | ||
296 | enum { | ||
297 | GFF_DID_DIRECT_ALLOC = 0, | ||
298 | GFF_EXLOCK = 1, | ||
299 | }; | ||
300 | |||
301 | struct gfs2_file { | 287 | struct gfs2_file { |
302 | unsigned long f_flags; /* GFF_... */ | ||
303 | struct mutex f_fl_mutex; | 288 | struct mutex f_fl_mutex; |
304 | struct gfs2_holder f_fl_gh; | 289 | struct gfs2_holder f_fl_gh; |
305 | }; | 290 | }; |
@@ -373,8 +358,17 @@ struct gfs2_ail { | |||
373 | u64 ai_sync_gen; | 358 | u64 ai_sync_gen; |
374 | }; | 359 | }; |
375 | 360 | ||
361 | struct gfs2_journal_extent { | ||
362 | struct list_head extent_list; | ||
363 | |||
364 | unsigned int lblock; /* First logical block */ | ||
365 | u64 dblock; /* First disk block */ | ||
366 | u64 blocks; | ||
367 | }; | ||
368 | |||
376 | struct gfs2_jdesc { | 369 | struct gfs2_jdesc { |
377 | struct list_head jd_list; | 370 | struct list_head jd_list; |
371 | struct list_head extent_list; | ||
378 | 372 | ||
379 | struct inode *jd_inode; | 373 | struct inode *jd_inode; |
380 | unsigned int jd_jid; | 374 | unsigned int jd_jid; |
@@ -421,13 +415,9 @@ struct gfs2_args { | |||
421 | struct gfs2_tune { | 415 | struct gfs2_tune { |
422 | spinlock_t gt_spin; | 416 | spinlock_t gt_spin; |
423 | 417 | ||
424 | unsigned int gt_ilimit; | ||
425 | unsigned int gt_ilimit_tries; | ||
426 | unsigned int gt_ilimit_min; | ||
427 | unsigned int gt_demote_secs; /* Cache retention for unheld glock */ | 418 | unsigned int gt_demote_secs; /* Cache retention for unheld glock */ |
428 | unsigned int gt_incore_log_blocks; | 419 | unsigned int gt_incore_log_blocks; |
429 | unsigned int gt_log_flush_secs; | 420 | unsigned int gt_log_flush_secs; |
430 | unsigned int gt_jindex_refresh_secs; /* Check for new journal index */ | ||
431 | 421 | ||
432 | unsigned int gt_recoverd_secs; | 422 | unsigned int gt_recoverd_secs; |
433 | unsigned int gt_logd_secs; | 423 | unsigned int gt_logd_secs; |
@@ -443,10 +433,8 @@ struct gfs2_tune { | |||
443 | unsigned int gt_new_files_jdata; | 433 | unsigned int gt_new_files_jdata; |
444 | unsigned int gt_new_files_directio; | 434 | unsigned int gt_new_files_directio; |
445 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ | 435 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ |
446 | unsigned int gt_lockdump_size; | ||
447 | unsigned int gt_stall_secs; /* Detects trouble! */ | 436 | unsigned int gt_stall_secs; /* Detects trouble! */ |
448 | unsigned int gt_complain_secs; | 437 | unsigned int gt_complain_secs; |
449 | unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */ | ||
450 | unsigned int gt_statfs_quantum; | 438 | unsigned int gt_statfs_quantum; |
451 | unsigned int gt_statfs_slow; | 439 | unsigned int gt_statfs_slow; |
452 | }; | 440 | }; |
@@ -539,7 +527,6 @@ struct gfs2_sbd { | |||
539 | /* StatFS stuff */ | 527 | /* StatFS stuff */ |
540 | 528 | ||
541 | spinlock_t sd_statfs_spin; | 529 | spinlock_t sd_statfs_spin; |
542 | struct mutex sd_statfs_mutex; | ||
543 | struct gfs2_statfs_change_host sd_statfs_master; | 530 | struct gfs2_statfs_change_host sd_statfs_master; |
544 | struct gfs2_statfs_change_host sd_statfs_local; | 531 | struct gfs2_statfs_change_host sd_statfs_local; |
545 | unsigned long sd_statfs_sync_time; | 532 | unsigned long sd_statfs_sync_time; |
@@ -602,20 +589,18 @@ struct gfs2_sbd { | |||
602 | unsigned int sd_log_commited_databuf; | 589 | unsigned int sd_log_commited_databuf; |
603 | unsigned int sd_log_commited_revoke; | 590 | unsigned int sd_log_commited_revoke; |
604 | 591 | ||
605 | unsigned int sd_log_num_gl; | ||
606 | unsigned int sd_log_num_buf; | 592 | unsigned int sd_log_num_buf; |
607 | unsigned int sd_log_num_revoke; | 593 | unsigned int sd_log_num_revoke; |
608 | unsigned int sd_log_num_rg; | 594 | unsigned int sd_log_num_rg; |
609 | unsigned int sd_log_num_databuf; | 595 | unsigned int sd_log_num_databuf; |
610 | 596 | ||
611 | struct list_head sd_log_le_gl; | ||
612 | struct list_head sd_log_le_buf; | 597 | struct list_head sd_log_le_buf; |
613 | struct list_head sd_log_le_revoke; | 598 | struct list_head sd_log_le_revoke; |
614 | struct list_head sd_log_le_rg; | 599 | struct list_head sd_log_le_rg; |
615 | struct list_head sd_log_le_databuf; | 600 | struct list_head sd_log_le_databuf; |
616 | struct list_head sd_log_le_ordered; | 601 | struct list_head sd_log_le_ordered; |
617 | 602 | ||
618 | unsigned int sd_log_blks_free; | 603 | atomic_t sd_log_blks_free; |
619 | struct mutex sd_log_reserve_mutex; | 604 | struct mutex sd_log_reserve_mutex; |
620 | 605 | ||
621 | u64 sd_log_sequence; | 606 | u64 sd_log_sequence; |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 5f6dc32946cd..728d3169e7bd 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -31,7 +31,6 @@ | |||
31 | #include "log.h" | 31 | #include "log.h" |
32 | #include "meta_io.h" | 32 | #include "meta_io.h" |
33 | #include "ops_address.h" | 33 | #include "ops_address.h" |
34 | #include "ops_file.h" | ||
35 | #include "ops_inode.h" | 34 | #include "ops_inode.h" |
36 | #include "quota.h" | 35 | #include "quota.h" |
37 | #include "rgrp.h" | 36 | #include "rgrp.h" |
@@ -132,15 +131,21 @@ static struct inode *gfs2_iget_skip(struct super_block *sb, | |||
132 | 131 | ||
133 | void gfs2_set_iop(struct inode *inode) | 132 | void gfs2_set_iop(struct inode *inode) |
134 | { | 133 | { |
134 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
135 | umode_t mode = inode->i_mode; | 135 | umode_t mode = inode->i_mode; |
136 | 136 | ||
137 | if (S_ISREG(mode)) { | 137 | if (S_ISREG(mode)) { |
138 | inode->i_op = &gfs2_file_iops; | 138 | inode->i_op = &gfs2_file_iops; |
139 | inode->i_fop = &gfs2_file_fops; | 139 | if (sdp->sd_args.ar_localflocks) |
140 | inode->i_mapping->a_ops = &gfs2_file_aops; | 140 | inode->i_fop = &gfs2_file_fops_nolock; |
141 | else | ||
142 | inode->i_fop = &gfs2_file_fops; | ||
141 | } else if (S_ISDIR(mode)) { | 143 | } else if (S_ISDIR(mode)) { |
142 | inode->i_op = &gfs2_dir_iops; | 144 | inode->i_op = &gfs2_dir_iops; |
143 | inode->i_fop = &gfs2_dir_fops; | 145 | if (sdp->sd_args.ar_localflocks) |
146 | inode->i_fop = &gfs2_dir_fops_nolock; | ||
147 | else | ||
148 | inode->i_fop = &gfs2_dir_fops; | ||
144 | } else if (S_ISLNK(mode)) { | 149 | } else if (S_ISLNK(mode)) { |
145 | inode->i_op = &gfs2_symlink_iops; | 150 | inode->i_op = &gfs2_symlink_iops; |
146 | } else { | 151 | } else { |
@@ -291,12 +296,10 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
291 | di->di_entries = be32_to_cpu(str->di_entries); | 296 | di->di_entries = be32_to_cpu(str->di_entries); |
292 | 297 | ||
293 | di->di_eattr = be64_to_cpu(str->di_eattr); | 298 | di->di_eattr = be64_to_cpu(str->di_eattr); |
294 | return 0; | 299 | if (S_ISREG(ip->i_inode.i_mode)) |
295 | } | 300 | gfs2_set_aops(&ip->i_inode); |
296 | 301 | ||
297 | static void gfs2_inode_bh(struct gfs2_inode *ip, struct buffer_head *bh) | 302 | return 0; |
298 | { | ||
299 | ip->i_cache[0] = bh; | ||
300 | } | 303 | } |
301 | 304 | ||
302 | /** | 305 | /** |
@@ -366,7 +369,8 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip) | |||
366 | if (error) | 369 | if (error) |
367 | goto out_rg_gunlock; | 370 | goto out_rg_gunlock; |
368 | 371 | ||
369 | gfs2_trans_add_gl(ip->i_gl); | 372 | set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); |
373 | set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags); | ||
370 | 374 | ||
371 | gfs2_free_di(rgd, ip); | 375 | gfs2_free_di(rgd, ip); |
372 | 376 | ||
@@ -707,9 +711,10 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) | |||
707 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | 711 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); |
708 | int error; | 712 | int error; |
709 | 713 | ||
710 | gfs2_alloc_get(dip); | 714 | if (gfs2_alloc_get(dip) == NULL) |
715 | return -ENOMEM; | ||
711 | 716 | ||
712 | dip->i_alloc.al_requested = RES_DINODE; | 717 | dip->i_alloc->al_requested = RES_DINODE; |
713 | error = gfs2_inplace_reserve(dip); | 718 | error = gfs2_inplace_reserve(dip); |
714 | if (error) | 719 | if (error) |
715 | goto out; | 720 | goto out; |
@@ -855,7 +860,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, | |||
855 | 860 | ||
856 | error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name); | 861 | error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name); |
857 | if (alloc_required < 0) | 862 | if (alloc_required < 0) |
858 | goto fail; | 863 | goto fail_quota_locks; |
859 | if (alloc_required) { | 864 | if (alloc_required) { |
860 | error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); | 865 | error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); |
861 | if (error) | 866 | if (error) |
@@ -896,7 +901,7 @@ fail_end_trans: | |||
896 | gfs2_trans_end(sdp); | 901 | gfs2_trans_end(sdp); |
897 | 902 | ||
898 | fail_ipreserv: | 903 | fail_ipreserv: |
899 | if (dip->i_alloc.al_rgd) | 904 | if (dip->i_alloc->al_rgd) |
900 | gfs2_inplace_release(dip); | 905 | gfs2_inplace_release(dip); |
901 | 906 | ||
902 | fail_quota_locks: | 907 | fail_quota_locks: |
@@ -966,7 +971,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, | |||
966 | struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; | 971 | struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; |
967 | int error; | 972 | int error; |
968 | u64 generation; | 973 | u64 generation; |
969 | struct buffer_head *bh=NULL; | 974 | struct buffer_head *bh = NULL; |
970 | 975 | ||
971 | if (!name->len || name->len > GFS2_FNAMESIZE) | 976 | if (!name->len || name->len > GFS2_FNAMESIZE) |
972 | return ERR_PTR(-ENAMETOOLONG); | 977 | return ERR_PTR(-ENAMETOOLONG); |
@@ -1003,8 +1008,6 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, | |||
1003 | if (IS_ERR(inode)) | 1008 | if (IS_ERR(inode)) |
1004 | goto fail_gunlock2; | 1009 | goto fail_gunlock2; |
1005 | 1010 | ||
1006 | gfs2_inode_bh(GFS2_I(inode), bh); | ||
1007 | |||
1008 | error = gfs2_inode_refresh(GFS2_I(inode)); | 1011 | error = gfs2_inode_refresh(GFS2_I(inode)); |
1009 | if (error) | 1012 | if (error) |
1010 | goto fail_gunlock2; | 1013 | goto fail_gunlock2; |
@@ -1021,6 +1024,8 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, | |||
1021 | if (error) | 1024 | if (error) |
1022 | goto fail_gunlock2; | 1025 | goto fail_gunlock2; |
1023 | 1026 | ||
1027 | if (bh) | ||
1028 | brelse(bh); | ||
1024 | if (!inode) | 1029 | if (!inode) |
1025 | return ERR_PTR(-ENOMEM); | 1030 | return ERR_PTR(-ENOMEM); |
1026 | return inode; | 1031 | return inode; |
@@ -1032,6 +1037,8 @@ fail_gunlock2: | |||
1032 | fail_gunlock: | 1037 | fail_gunlock: |
1033 | gfs2_glock_dq(ghs); | 1038 | gfs2_glock_dq(ghs); |
1034 | fail: | 1039 | fail: |
1040 | if (bh) | ||
1041 | brelse(bh); | ||
1035 | return ERR_PTR(error); | 1042 | return ERR_PTR(error); |
1036 | } | 1043 | } |
1037 | 1044 | ||
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 351ac87ab384..d44650662615 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
@@ -20,6 +20,18 @@ static inline int gfs2_is_jdata(const struct gfs2_inode *ip) | |||
20 | return ip->i_di.di_flags & GFS2_DIF_JDATA; | 20 | return ip->i_di.di_flags & GFS2_DIF_JDATA; |
21 | } | 21 | } |
22 | 22 | ||
23 | static inline int gfs2_is_writeback(const struct gfs2_inode *ip) | ||
24 | { | ||
25 | const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
26 | return (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK) && !gfs2_is_jdata(ip); | ||
27 | } | ||
28 | |||
29 | static inline int gfs2_is_ordered(const struct gfs2_inode *ip) | ||
30 | { | ||
31 | const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
32 | return (sdp->sd_args.ar_data == GFS2_DATA_ORDERED) && !gfs2_is_jdata(ip); | ||
33 | } | ||
34 | |||
23 | static inline int gfs2_is_dir(const struct gfs2_inode *ip) | 35 | static inline int gfs2_is_dir(const struct gfs2_inode *ip) |
24 | { | 36 | { |
25 | return S_ISDIR(ip->i_inode.i_mode); | 37 | return S_ISDIR(ip->i_inode.i_mode); |
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index 41c5b04caaba..f2efff424224 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c | |||
@@ -67,6 +67,11 @@ static int make_args(struct gdlm_ls *ls, char *data_arg, int *nodir) | |||
67 | memset(data, 0, 256); | 67 | memset(data, 0, 256); |
68 | strncpy(data, data_arg, 255); | 68 | strncpy(data, data_arg, 255); |
69 | 69 | ||
70 | if (!strlen(data)) { | ||
71 | log_error("no mount options, (u)mount helpers not installed"); | ||
72 | return -EINVAL; | ||
73 | } | ||
74 | |||
70 | for (options = data; (x = strsep(&options, ":")); ) { | 75 | for (options = data; (x = strsep(&options, ":")); ) { |
71 | if (!*x) | 76 | if (!*x) |
72 | continue; | 77 | continue; |
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c index 1f7b038530b4..2ebd374b3143 100644 --- a/fs/gfs2/locking/dlm/plock.c +++ b/fs/gfs2/locking/dlm/plock.c | |||
@@ -89,15 +89,19 @@ int gdlm_plock(void *lockspace, struct lm_lockname *name, | |||
89 | op->info.number = name->ln_number; | 89 | op->info.number = name->ln_number; |
90 | op->info.start = fl->fl_start; | 90 | op->info.start = fl->fl_start; |
91 | op->info.end = fl->fl_end; | 91 | op->info.end = fl->fl_end; |
92 | op->info.owner = (__u64)(long) fl->fl_owner; | ||
93 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) { | 92 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) { |
93 | /* fl_owner is lockd which doesn't distinguish | ||
94 | processes on the nfs client */ | ||
95 | op->info.owner = (__u64) fl->fl_pid; | ||
94 | xop->callback = fl->fl_lmops->fl_grant; | 96 | xop->callback = fl->fl_lmops->fl_grant; |
95 | locks_init_lock(&xop->flc); | 97 | locks_init_lock(&xop->flc); |
96 | locks_copy_lock(&xop->flc, fl); | 98 | locks_copy_lock(&xop->flc, fl); |
97 | xop->fl = fl; | 99 | xop->fl = fl; |
98 | xop->file = file; | 100 | xop->file = file; |
99 | } else | 101 | } else { |
102 | op->info.owner = (__u64)(long) fl->fl_owner; | ||
100 | xop->callback = NULL; | 103 | xop->callback = NULL; |
104 | } | ||
101 | 105 | ||
102 | send_op(op); | 106 | send_op(op); |
103 | 107 | ||
@@ -203,7 +207,10 @@ int gdlm_punlock(void *lockspace, struct lm_lockname *name, | |||
203 | op->info.number = name->ln_number; | 207 | op->info.number = name->ln_number; |
204 | op->info.start = fl->fl_start; | 208 | op->info.start = fl->fl_start; |
205 | op->info.end = fl->fl_end; | 209 | op->info.end = fl->fl_end; |
206 | op->info.owner = (__u64)(long) fl->fl_owner; | 210 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) |
211 | op->info.owner = (__u64) fl->fl_pid; | ||
212 | else | ||
213 | op->info.owner = (__u64)(long) fl->fl_owner; | ||
207 | 214 | ||
208 | send_op(op); | 215 | send_op(op); |
209 | wait_event(recv_wq, (op->done != 0)); | 216 | wait_event(recv_wq, (op->done != 0)); |
@@ -242,7 +249,10 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name, | |||
242 | op->info.number = name->ln_number; | 249 | op->info.number = name->ln_number; |
243 | op->info.start = fl->fl_start; | 250 | op->info.start = fl->fl_start; |
244 | op->info.end = fl->fl_end; | 251 | op->info.end = fl->fl_end; |
245 | op->info.owner = (__u64)(long) fl->fl_owner; | 252 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) |
253 | op->info.owner = (__u64) fl->fl_pid; | ||
254 | else | ||
255 | op->info.owner = (__u64)(long) fl->fl_owner; | ||
246 | 256 | ||
247 | send_op(op); | 257 | send_op(op); |
248 | wait_event(recv_wq, (op->done != 0)); | 258 | wait_event(recv_wq, (op->done != 0)); |
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c index ae9e6a25fe2b..a87b09839761 100644 --- a/fs/gfs2/locking/dlm/sysfs.c +++ b/fs/gfs2/locking/dlm/sysfs.c | |||
@@ -189,51 +189,39 @@ static struct kobj_type gdlm_ktype = { | |||
189 | .sysfs_ops = &gdlm_attr_ops, | 189 | .sysfs_ops = &gdlm_attr_ops, |
190 | }; | 190 | }; |
191 | 191 | ||
192 | static struct kset gdlm_kset = { | 192 | static struct kset *gdlm_kset; |
193 | .ktype = &gdlm_ktype, | ||
194 | }; | ||
195 | 193 | ||
196 | int gdlm_kobject_setup(struct gdlm_ls *ls, struct kobject *fskobj) | 194 | int gdlm_kobject_setup(struct gdlm_ls *ls, struct kobject *fskobj) |
197 | { | 195 | { |
198 | int error; | 196 | int error; |
199 | 197 | ||
200 | error = kobject_set_name(&ls->kobj, "%s", "lock_module"); | 198 | ls->kobj.kset = gdlm_kset; |
201 | if (error) { | 199 | error = kobject_init_and_add(&ls->kobj, &gdlm_ktype, fskobj, |
202 | log_error("can't set kobj name %d", error); | 200 | "lock_module"); |
203 | return error; | ||
204 | } | ||
205 | |||
206 | ls->kobj.kset = &gdlm_kset; | ||
207 | ls->kobj.ktype = &gdlm_ktype; | ||
208 | ls->kobj.parent = fskobj; | ||
209 | |||
210 | error = kobject_register(&ls->kobj); | ||
211 | if (error) | 201 | if (error) |
212 | log_error("can't register kobj %d", error); | 202 | log_error("can't register kobj %d", error); |
203 | kobject_uevent(&ls->kobj, KOBJ_ADD); | ||
213 | 204 | ||
214 | return error; | 205 | return error; |
215 | } | 206 | } |
216 | 207 | ||
217 | void gdlm_kobject_release(struct gdlm_ls *ls) | 208 | void gdlm_kobject_release(struct gdlm_ls *ls) |
218 | { | 209 | { |
219 | kobject_unregister(&ls->kobj); | 210 | kobject_put(&ls->kobj); |
220 | } | 211 | } |
221 | 212 | ||
222 | int gdlm_sysfs_init(void) | 213 | int gdlm_sysfs_init(void) |
223 | { | 214 | { |
224 | int error; | 215 | gdlm_kset = kset_create_and_add("lock_dlm", NULL, kernel_kobj); |
225 | 216 | if (!gdlm_kset) { | |
226 | kobject_set_name(&gdlm_kset.kobj, "lock_dlm"); | 217 | printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__); |
227 | kobj_set_kset_s(&gdlm_kset, kernel_subsys); | 218 | return -ENOMEM; |
228 | error = kset_register(&gdlm_kset); | 219 | } |
229 | if (error) | 220 | return 0; |
230 | printk("lock_dlm: cannot register kset %d\n", error); | ||
231 | |||
232 | return error; | ||
233 | } | 221 | } |
234 | 222 | ||
235 | void gdlm_sysfs_exit(void) | 223 | void gdlm_sysfs_exit(void) |
236 | { | 224 | { |
237 | kset_unregister(&gdlm_kset); | 225 | kset_unregister(gdlm_kset); |
238 | } | 226 | } |
239 | 227 | ||
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c index bd938f06481d..521694fc19d6 100644 --- a/fs/gfs2/locking/dlm/thread.c +++ b/fs/gfs2/locking/dlm/thread.c | |||
@@ -273,18 +273,13 @@ static int gdlm_thread(void *data, int blist) | |||
273 | struct gdlm_ls *ls = (struct gdlm_ls *) data; | 273 | struct gdlm_ls *ls = (struct gdlm_ls *) data; |
274 | struct gdlm_lock *lp = NULL; | 274 | struct gdlm_lock *lp = NULL; |
275 | uint8_t complete, blocking, submit, drop; | 275 | uint8_t complete, blocking, submit, drop; |
276 | DECLARE_WAITQUEUE(wait, current); | ||
277 | 276 | ||
278 | /* Only thread1 is allowed to do blocking callbacks since gfs | 277 | /* Only thread1 is allowed to do blocking callbacks since gfs |
279 | may wait for a completion callback within a blocking cb. */ | 278 | may wait for a completion callback within a blocking cb. */ |
280 | 279 | ||
281 | while (!kthread_should_stop()) { | 280 | while (!kthread_should_stop()) { |
282 | set_current_state(TASK_INTERRUPTIBLE); | 281 | wait_event_interruptible(ls->thread_wait, |
283 | add_wait_queue(&ls->thread_wait, &wait); | 282 | !no_work(ls, blist) || kthread_should_stop()); |
284 | if (no_work(ls, blist)) | ||
285 | schedule(); | ||
286 | remove_wait_queue(&ls->thread_wait, &wait); | ||
287 | set_current_state(TASK_RUNNING); | ||
288 | 283 | ||
289 | complete = blocking = submit = drop = 0; | 284 | complete = blocking = submit = drop = 0; |
290 | 285 | ||
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 7df702473252..161ab6f2058e 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -16,6 +16,8 @@ | |||
16 | #include <linux/crc32.h> | 16 | #include <linux/crc32.h> |
17 | #include <linux/lm_interface.h> | 17 | #include <linux/lm_interface.h> |
18 | #include <linux/delay.h> | 18 | #include <linux/delay.h> |
19 | #include <linux/kthread.h> | ||
20 | #include <linux/freezer.h> | ||
19 | 21 | ||
20 | #include "gfs2.h" | 22 | #include "gfs2.h" |
21 | #include "incore.h" | 23 | #include "incore.h" |
@@ -68,14 +70,12 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, | |||
68 | * | 70 | * |
69 | */ | 71 | */ |
70 | 72 | ||
71 | void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd) | 73 | void gfs2_remove_from_ail(struct gfs2_bufdata *bd) |
72 | { | 74 | { |
73 | bd->bd_ail = NULL; | 75 | bd->bd_ail = NULL; |
74 | list_del_init(&bd->bd_ail_st_list); | 76 | list_del_init(&bd->bd_ail_st_list); |
75 | list_del_init(&bd->bd_ail_gl_list); | 77 | list_del_init(&bd->bd_ail_gl_list); |
76 | atomic_dec(&bd->bd_gl->gl_ail_count); | 78 | atomic_dec(&bd->bd_gl->gl_ail_count); |
77 | if (mapping) | ||
78 | gfs2_meta_cache_flush(GFS2_I(mapping->host)); | ||
79 | brelse(bd->bd_bh); | 79 | brelse(bd->bd_bh); |
80 | } | 80 | } |
81 | 81 | ||
@@ -92,8 +92,6 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | |||
92 | struct buffer_head *bh; | 92 | struct buffer_head *bh; |
93 | int retry; | 93 | int retry; |
94 | 94 | ||
95 | BUG_ON(!spin_is_locked(&sdp->sd_log_lock)); | ||
96 | |||
97 | do { | 95 | do { |
98 | retry = 0; | 96 | retry = 0; |
99 | 97 | ||
@@ -210,7 +208,7 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags) | |||
210 | gfs2_log_unlock(sdp); | 208 | gfs2_log_unlock(sdp); |
211 | } | 209 | } |
212 | 210 | ||
213 | int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) | 211 | static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) |
214 | { | 212 | { |
215 | struct gfs2_ail *ai, *s; | 213 | struct gfs2_ail *ai, *s; |
216 | int ret; | 214 | int ret; |
@@ -248,7 +246,7 @@ static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | |||
248 | bd = list_entry(head->prev, struct gfs2_bufdata, | 246 | bd = list_entry(head->prev, struct gfs2_bufdata, |
249 | bd_ail_st_list); | 247 | bd_ail_st_list); |
250 | gfs2_assert(sdp, bd->bd_ail == ai); | 248 | gfs2_assert(sdp, bd->bd_ail == ai); |
251 | gfs2_remove_from_ail(bd->bd_bh->b_page->mapping, bd); | 249 | gfs2_remove_from_ail(bd); |
252 | } | 250 | } |
253 | } | 251 | } |
254 | 252 | ||
@@ -303,7 +301,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) | |||
303 | 301 | ||
304 | mutex_lock(&sdp->sd_log_reserve_mutex); | 302 | mutex_lock(&sdp->sd_log_reserve_mutex); |
305 | gfs2_log_lock(sdp); | 303 | gfs2_log_lock(sdp); |
306 | while(sdp->sd_log_blks_free <= (blks + reserved_blks)) { | 304 | while(atomic_read(&sdp->sd_log_blks_free) <= (blks + reserved_blks)) { |
307 | gfs2_log_unlock(sdp); | 305 | gfs2_log_unlock(sdp); |
308 | gfs2_ail1_empty(sdp, 0); | 306 | gfs2_ail1_empty(sdp, 0); |
309 | gfs2_log_flush(sdp, NULL); | 307 | gfs2_log_flush(sdp, NULL); |
@@ -312,7 +310,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) | |||
312 | gfs2_ail1_start(sdp, 0); | 310 | gfs2_ail1_start(sdp, 0); |
313 | gfs2_log_lock(sdp); | 311 | gfs2_log_lock(sdp); |
314 | } | 312 | } |
315 | sdp->sd_log_blks_free -= blks; | 313 | atomic_sub(blks, &sdp->sd_log_blks_free); |
316 | gfs2_log_unlock(sdp); | 314 | gfs2_log_unlock(sdp); |
317 | mutex_unlock(&sdp->sd_log_reserve_mutex); | 315 | mutex_unlock(&sdp->sd_log_reserve_mutex); |
318 | 316 | ||
@@ -332,27 +330,23 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) | |||
332 | { | 330 | { |
333 | 331 | ||
334 | gfs2_log_lock(sdp); | 332 | gfs2_log_lock(sdp); |
335 | sdp->sd_log_blks_free += blks; | 333 | atomic_add(blks, &sdp->sd_log_blks_free); |
336 | gfs2_assert_withdraw(sdp, | 334 | gfs2_assert_withdraw(sdp, |
337 | sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks); | 335 | atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); |
338 | gfs2_log_unlock(sdp); | 336 | gfs2_log_unlock(sdp); |
339 | up_read(&sdp->sd_log_flush_lock); | 337 | up_read(&sdp->sd_log_flush_lock); |
340 | } | 338 | } |
341 | 339 | ||
342 | static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) | 340 | static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) |
343 | { | 341 | { |
344 | struct inode *inode = sdp->sd_jdesc->jd_inode; | 342 | struct gfs2_journal_extent *je; |
345 | int error; | 343 | |
346 | struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; | 344 | list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) { |
347 | 345 | if (lbn >= je->lblock && lbn < je->lblock + je->blocks) | |
348 | bh_map.b_size = 1 << inode->i_blkbits; | 346 | return je->dblock + lbn - je->lblock; |
349 | error = gfs2_block_map(inode, lbn, 0, &bh_map); | 347 | } |
350 | if (error || !bh_map.b_blocknr) | 348 | |
351 | printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error, | 349 | return -1; |
352 | (unsigned long long)bh_map.b_blocknr, lbn); | ||
353 | gfs2_assert_withdraw(sdp, !error && bh_map.b_blocknr); | ||
354 | |||
355 | return bh_map.b_blocknr; | ||
356 | } | 350 | } |
357 | 351 | ||
358 | /** | 352 | /** |
@@ -561,8 +555,8 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) | |||
561 | ail2_empty(sdp, new_tail); | 555 | ail2_empty(sdp, new_tail); |
562 | 556 | ||
563 | gfs2_log_lock(sdp); | 557 | gfs2_log_lock(sdp); |
564 | sdp->sd_log_blks_free += dist; | 558 | atomic_add(dist, &sdp->sd_log_blks_free); |
565 | gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks); | 559 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); |
566 | gfs2_log_unlock(sdp); | 560 | gfs2_log_unlock(sdp); |
567 | 561 | ||
568 | sdp->sd_log_tail = new_tail; | 562 | sdp->sd_log_tail = new_tail; |
@@ -652,7 +646,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp) | |||
652 | get_bh(bh); | 646 | get_bh(bh); |
653 | gfs2_log_unlock(sdp); | 647 | gfs2_log_unlock(sdp); |
654 | lock_buffer(bh); | 648 | lock_buffer(bh); |
655 | if (test_clear_buffer_dirty(bh)) { | 649 | if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { |
656 | bh->b_end_io = end_buffer_write_sync; | 650 | bh->b_end_io = end_buffer_write_sync; |
657 | submit_bh(WRITE, bh); | 651 | submit_bh(WRITE, bh); |
658 | } else { | 652 | } else { |
@@ -694,20 +688,16 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp) | |||
694 | * | 688 | * |
695 | */ | 689 | */ |
696 | 690 | ||
697 | void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) | 691 | void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) |
698 | { | 692 | { |
699 | struct gfs2_ail *ai; | 693 | struct gfs2_ail *ai; |
700 | 694 | ||
701 | down_write(&sdp->sd_log_flush_lock); | 695 | down_write(&sdp->sd_log_flush_lock); |
702 | 696 | ||
703 | if (gl) { | 697 | /* Log might have been flushed while we waited for the flush lock */ |
704 | gfs2_log_lock(sdp); | 698 | if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) { |
705 | if (list_empty(&gl->gl_le.le_list)) { | 699 | up_write(&sdp->sd_log_flush_lock); |
706 | gfs2_log_unlock(sdp); | 700 | return; |
707 | up_write(&sdp->sd_log_flush_lock); | ||
708 | return; | ||
709 | } | ||
710 | gfs2_log_unlock(sdp); | ||
711 | } | 701 | } |
712 | 702 | ||
713 | ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL); | 703 | ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL); |
@@ -739,7 +729,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) | |||
739 | log_flush_commit(sdp); | 729 | log_flush_commit(sdp); |
740 | else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ | 730 | else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ |
741 | gfs2_log_lock(sdp); | 731 | gfs2_log_lock(sdp); |
742 | sdp->sd_log_blks_free--; /* Adjust for unreserved buffer */ | 732 | atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ |
743 | gfs2_log_unlock(sdp); | 733 | gfs2_log_unlock(sdp); |
744 | log_write_header(sdp, 0, PULL); | 734 | log_write_header(sdp, 0, PULL); |
745 | } | 735 | } |
@@ -767,7 +757,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) | |||
767 | static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | 757 | static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
768 | { | 758 | { |
769 | unsigned int reserved; | 759 | unsigned int reserved; |
770 | unsigned int old; | 760 | unsigned int unused; |
771 | 761 | ||
772 | gfs2_log_lock(sdp); | 762 | gfs2_log_lock(sdp); |
773 | 763 | ||
@@ -779,14 +769,11 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | |||
779 | sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; | 769 | sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; |
780 | gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0); | 770 | gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0); |
781 | reserved = calc_reserved(sdp); | 771 | reserved = calc_reserved(sdp); |
782 | old = sdp->sd_log_blks_free; | 772 | unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved; |
783 | sdp->sd_log_blks_free += tr->tr_reserved - | 773 | gfs2_assert_withdraw(sdp, unused >= 0); |
784 | (reserved - sdp->sd_log_blks_reserved); | 774 | atomic_add(unused, &sdp->sd_log_blks_free); |
785 | 775 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= | |
786 | gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old); | ||
787 | gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= | ||
788 | sdp->sd_jdesc->jd_blocks); | 776 | sdp->sd_jdesc->jd_blocks); |
789 | |||
790 | sdp->sd_log_blks_reserved = reserved; | 777 | sdp->sd_log_blks_reserved = reserved; |
791 | 778 | ||
792 | gfs2_log_unlock(sdp); | 779 | gfs2_log_unlock(sdp); |
@@ -825,7 +812,6 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) | |||
825 | down_write(&sdp->sd_log_flush_lock); | 812 | down_write(&sdp->sd_log_flush_lock); |
826 | 813 | ||
827 | gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); | 814 | gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); |
828 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl); | ||
829 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf); | 815 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf); |
830 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); | 816 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); |
831 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); | 817 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); |
@@ -838,7 +824,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) | |||
838 | log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, | 824 | log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, |
839 | (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL); | 825 | (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL); |
840 | 826 | ||
841 | gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks); | 827 | gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); |
842 | gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); | 828 | gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); |
843 | gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list)); | 829 | gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list)); |
844 | 830 | ||
@@ -866,3 +852,42 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp) | |||
866 | } | 852 | } |
867 | } | 853 | } |
868 | 854 | ||
855 | |||
856 | /** | ||
857 | * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks | ||
858 | * @sdp: Pointer to GFS2 superblock | ||
859 | * | ||
860 | * Also, periodically check to make sure that we're using the most recent | ||
861 | * journal index. | ||
862 | */ | ||
863 | |||
864 | int gfs2_logd(void *data) | ||
865 | { | ||
866 | struct gfs2_sbd *sdp = data; | ||
867 | unsigned long t; | ||
868 | int need_flush; | ||
869 | |||
870 | while (!kthread_should_stop()) { | ||
871 | /* Advance the log tail */ | ||
872 | |||
873 | t = sdp->sd_log_flush_time + | ||
874 | gfs2_tune_get(sdp, gt_log_flush_secs) * HZ; | ||
875 | |||
876 | gfs2_ail1_empty(sdp, DIO_ALL); | ||
877 | gfs2_log_lock(sdp); | ||
878 | need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks); | ||
879 | gfs2_log_unlock(sdp); | ||
880 | if (need_flush || time_after_eq(jiffies, t)) { | ||
881 | gfs2_log_flush(sdp, NULL); | ||
882 | sdp->sd_log_flush_time = jiffies; | ||
883 | } | ||
884 | |||
885 | t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; | ||
886 | if (freezing(current)) | ||
887 | refrigerator(); | ||
888 | schedule_timeout_interruptible(t); | ||
889 | } | ||
890 | |||
891 | return 0; | ||
892 | } | ||
893 | |||
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index dae282400627..771152816508 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h | |||
@@ -48,8 +48,6 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp, | |||
48 | unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, | 48 | unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, |
49 | unsigned int ssize); | 49 | unsigned int ssize); |
50 | 50 | ||
51 | int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags); | ||
52 | |||
53 | int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); | 51 | int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); |
54 | void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); | 52 | void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); |
55 | void gfs2_log_incr_head(struct gfs2_sbd *sdp); | 53 | void gfs2_log_incr_head(struct gfs2_sbd *sdp); |
@@ -57,11 +55,19 @@ void gfs2_log_incr_head(struct gfs2_sbd *sdp); | |||
57 | struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp); | 55 | struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp); |
58 | struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, | 56 | struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, |
59 | struct buffer_head *real); | 57 | struct buffer_head *real); |
60 | void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); | 58 | void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); |
59 | |||
60 | static inline void gfs2_log_flush(struct gfs2_sbd *sbd, struct gfs2_glock *gl) | ||
61 | { | ||
62 | if (!gl || test_bit(GLF_LFLUSH, &gl->gl_flags)) | ||
63 | __gfs2_log_flush(sbd, gl); | ||
64 | } | ||
65 | |||
61 | void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); | 66 | void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); |
62 | void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd); | 67 | void gfs2_remove_from_ail(struct gfs2_bufdata *bd); |
63 | 68 | ||
64 | void gfs2_log_shutdown(struct gfs2_sbd *sdp); | 69 | void gfs2_log_shutdown(struct gfs2_sbd *sdp); |
65 | void gfs2_meta_syncfs(struct gfs2_sbd *sdp); | 70 | void gfs2_meta_syncfs(struct gfs2_sbd *sdp); |
71 | int gfs2_logd(void *data); | ||
66 | 72 | ||
67 | #endif /* __LOG_DOT_H__ */ | 73 | #endif /* __LOG_DOT_H__ */ |
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 6c27cea761c6..fae59d69d01a 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
@@ -87,6 +87,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, | |||
87 | } | 87 | } |
88 | bd->bd_ail = ai; | 88 | bd->bd_ail = ai; |
89 | list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); | 89 | list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); |
90 | clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); | ||
90 | gfs2_log_unlock(sdp); | 91 | gfs2_log_unlock(sdp); |
91 | unlock_buffer(bh); | 92 | unlock_buffer(bh); |
92 | } | 93 | } |
@@ -124,49 +125,6 @@ static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type) | |||
124 | return bh; | 125 | return bh; |
125 | } | 126 | } |
126 | 127 | ||
127 | static void __glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | ||
128 | { | ||
129 | struct gfs2_glock *gl; | ||
130 | struct gfs2_trans *tr = current->journal_info; | ||
131 | |||
132 | tr->tr_touched = 1; | ||
133 | |||
134 | gl = container_of(le, struct gfs2_glock, gl_le); | ||
135 | if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl))) | ||
136 | return; | ||
137 | |||
138 | if (!list_empty(&le->le_list)) | ||
139 | return; | ||
140 | |||
141 | gfs2_glock_hold(gl); | ||
142 | set_bit(GLF_DIRTY, &gl->gl_flags); | ||
143 | sdp->sd_log_num_gl++; | ||
144 | list_add(&le->le_list, &sdp->sd_log_le_gl); | ||
145 | } | ||
146 | |||
147 | static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | ||
148 | { | ||
149 | gfs2_log_lock(sdp); | ||
150 | __glock_lo_add(sdp, le); | ||
151 | gfs2_log_unlock(sdp); | ||
152 | } | ||
153 | |||
154 | static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | ||
155 | { | ||
156 | struct list_head *head = &sdp->sd_log_le_gl; | ||
157 | struct gfs2_glock *gl; | ||
158 | |||
159 | while (!list_empty(head)) { | ||
160 | gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list); | ||
161 | list_del_init(&gl->gl_le.le_list); | ||
162 | sdp->sd_log_num_gl--; | ||
163 | |||
164 | gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)); | ||
165 | gfs2_glock_put(gl); | ||
166 | } | ||
167 | gfs2_assert_warn(sdp, !sdp->sd_log_num_gl); | ||
168 | } | ||
169 | |||
170 | static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | 128 | static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) |
171 | { | 129 | { |
172 | struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); | 130 | struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); |
@@ -182,7 +140,8 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | |||
182 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); | 140 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); |
183 | if (!list_empty(&le->le_list)) | 141 | if (!list_empty(&le->le_list)) |
184 | goto out; | 142 | goto out; |
185 | __glock_lo_add(sdp, &bd->bd_gl->gl_le); | 143 | set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); |
144 | set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); | ||
186 | gfs2_meta_check(sdp, bd->bd_bh); | 145 | gfs2_meta_check(sdp, bd->bd_bh); |
187 | gfs2_pin(sdp, bd->bd_bh); | 146 | gfs2_pin(sdp, bd->bd_bh); |
188 | sdp->sd_log_num_buf++; | 147 | sdp->sd_log_num_buf++; |
@@ -556,17 +515,20 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | |||
556 | 515 | ||
557 | lock_buffer(bd->bd_bh); | 516 | lock_buffer(bd->bd_bh); |
558 | gfs2_log_lock(sdp); | 517 | gfs2_log_lock(sdp); |
559 | if (!list_empty(&bd->bd_list_tr)) | 518 | if (tr) { |
560 | goto out; | 519 | if (!list_empty(&bd->bd_list_tr)) |
561 | tr->tr_touched = 1; | 520 | goto out; |
562 | if (gfs2_is_jdata(ip)) { | 521 | tr->tr_touched = 1; |
563 | tr->tr_num_buf++; | 522 | if (gfs2_is_jdata(ip)) { |
564 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); | 523 | tr->tr_num_buf++; |
524 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); | ||
525 | } | ||
565 | } | 526 | } |
566 | if (!list_empty(&le->le_list)) | 527 | if (!list_empty(&le->le_list)) |
567 | goto out; | 528 | goto out; |
568 | 529 | ||
569 | __glock_lo_add(sdp, &bd->bd_gl->gl_le); | 530 | set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); |
531 | set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); | ||
570 | if (gfs2_is_jdata(ip)) { | 532 | if (gfs2_is_jdata(ip)) { |
571 | gfs2_pin(sdp, bd->bd_bh); | 533 | gfs2_pin(sdp, bd->bd_bh); |
572 | tr->tr_num_databuf_new++; | 534 | tr->tr_num_databuf_new++; |
@@ -773,12 +735,6 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | |||
773 | } | 735 | } |
774 | 736 | ||
775 | 737 | ||
776 | const struct gfs2_log_operations gfs2_glock_lops = { | ||
777 | .lo_add = glock_lo_add, | ||
778 | .lo_after_commit = glock_lo_after_commit, | ||
779 | .lo_name = "glock", | ||
780 | }; | ||
781 | |||
782 | const struct gfs2_log_operations gfs2_buf_lops = { | 738 | const struct gfs2_log_operations gfs2_buf_lops = { |
783 | .lo_add = buf_lo_add, | 739 | .lo_add = buf_lo_add, |
784 | .lo_incore_commit = buf_lo_incore_commit, | 740 | .lo_incore_commit = buf_lo_incore_commit, |
@@ -816,7 +772,6 @@ const struct gfs2_log_operations gfs2_databuf_lops = { | |||
816 | }; | 772 | }; |
817 | 773 | ||
818 | const struct gfs2_log_operations *gfs2_log_ops[] = { | 774 | const struct gfs2_log_operations *gfs2_log_ops[] = { |
819 | &gfs2_glock_lops, | ||
820 | &gfs2_databuf_lops, | 775 | &gfs2_databuf_lops, |
821 | &gfs2_buf_lops, | 776 | &gfs2_buf_lops, |
822 | &gfs2_rg_lops, | 777 | &gfs2_rg_lops, |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 7ecfe0d3a491..9c7765c12d62 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
@@ -29,9 +29,8 @@ static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo) | |||
29 | struct gfs2_inode *ip = foo; | 29 | struct gfs2_inode *ip = foo; |
30 | 30 | ||
31 | inode_init_once(&ip->i_inode); | 31 | inode_init_once(&ip->i_inode); |
32 | spin_lock_init(&ip->i_spin); | ||
33 | init_rwsem(&ip->i_rw_mutex); | 32 | init_rwsem(&ip->i_rw_mutex); |
34 | memset(ip->i_cache, 0, sizeof(ip->i_cache)); | 33 | ip->i_alloc = NULL; |
35 | } | 34 | } |
36 | 35 | ||
37 | static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo) | 36 | static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo) |
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 4da423985e4f..85aea27b4a86 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
@@ -50,6 +50,7 @@ static int gfs2_aspace_writepage(struct page *page, | |||
50 | static const struct address_space_operations aspace_aops = { | 50 | static const struct address_space_operations aspace_aops = { |
51 | .writepage = gfs2_aspace_writepage, | 51 | .writepage = gfs2_aspace_writepage, |
52 | .releasepage = gfs2_releasepage, | 52 | .releasepage = gfs2_releasepage, |
53 | .sync_page = block_sync_page, | ||
53 | }; | 54 | }; |
54 | 55 | ||
55 | /** | 56 | /** |
@@ -221,13 +222,14 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, | |||
221 | struct buffer_head **bhp) | 222 | struct buffer_head **bhp) |
222 | { | 223 | { |
223 | *bhp = getbuf(gl, blkno, CREATE); | 224 | *bhp = getbuf(gl, blkno, CREATE); |
224 | if (!buffer_uptodate(*bhp)) | 225 | if (!buffer_uptodate(*bhp)) { |
225 | ll_rw_block(READ_META, 1, bhp); | 226 | ll_rw_block(READ_META, 1, bhp); |
226 | if (flags & DIO_WAIT) { | 227 | if (flags & DIO_WAIT) { |
227 | int error = gfs2_meta_wait(gl->gl_sbd, *bhp); | 228 | int error = gfs2_meta_wait(gl->gl_sbd, *bhp); |
228 | if (error) { | 229 | if (error) { |
229 | brelse(*bhp); | 230 | brelse(*bhp); |
230 | return error; | 231 | return error; |
232 | } | ||
231 | } | 233 | } |
232 | } | 234 | } |
233 | 235 | ||
@@ -282,7 +284,7 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, | |||
282 | return; | 284 | return; |
283 | } | 285 | } |
284 | 286 | ||
285 | bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL), | 287 | bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL); |
286 | bd->bd_bh = bh; | 288 | bd->bd_bh = bh; |
287 | bd->bd_gl = gl; | 289 | bd->bd_gl = gl; |
288 | 290 | ||
@@ -317,7 +319,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int | |||
317 | } | 319 | } |
318 | if (bd) { | 320 | if (bd) { |
319 | if (bd->bd_ail) { | 321 | if (bd->bd_ail) { |
320 | gfs2_remove_from_ail(NULL, bd); | 322 | gfs2_remove_from_ail(bd); |
321 | bh->b_private = NULL; | 323 | bh->b_private = NULL; |
322 | bd->bd_bh = NULL; | 324 | bd->bd_bh = NULL; |
323 | bd->bd_blkno = bh->b_blocknr; | 325 | bd->bd_blkno = bh->b_blocknr; |
@@ -358,32 +360,6 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) | |||
358 | } | 360 | } |
359 | 361 | ||
360 | /** | 362 | /** |
361 | * gfs2_meta_cache_flush - get rid of any references on buffers for this inode | ||
362 | * @ip: The GFS2 inode | ||
363 | * | ||
364 | * This releases buffers that are in the most-recently-used array of | ||
365 | * blocks used for indirect block addressing for this inode. | ||
366 | */ | ||
367 | |||
368 | void gfs2_meta_cache_flush(struct gfs2_inode *ip) | ||
369 | { | ||
370 | struct buffer_head **bh_slot; | ||
371 | unsigned int x; | ||
372 | |||
373 | spin_lock(&ip->i_spin); | ||
374 | |||
375 | for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) { | ||
376 | bh_slot = &ip->i_cache[x]; | ||
377 | if (*bh_slot) { | ||
378 | brelse(*bh_slot); | ||
379 | *bh_slot = NULL; | ||
380 | } | ||
381 | } | ||
382 | |||
383 | spin_unlock(&ip->i_spin); | ||
384 | } | ||
385 | |||
386 | /** | ||
387 | * gfs2_meta_indirect_buffer - Get a metadata buffer | 363 | * gfs2_meta_indirect_buffer - Get a metadata buffer |
388 | * @ip: The GFS2 inode | 364 | * @ip: The GFS2 inode |
389 | * @height: The level of this buf in the metadata (indir addr) tree (if any) | 365 | * @height: The level of this buf in the metadata (indir addr) tree (if any) |
@@ -391,8 +367,6 @@ void gfs2_meta_cache_flush(struct gfs2_inode *ip) | |||
391 | * @new: Non-zero if we may create a new buffer | 367 | * @new: Non-zero if we may create a new buffer |
392 | * @bhp: the buffer is returned here | 368 | * @bhp: the buffer is returned here |
393 | * | 369 | * |
394 | * Try to use the gfs2_inode's MRU metadata tree cache. | ||
395 | * | ||
396 | * Returns: errno | 370 | * Returns: errno |
397 | */ | 371 | */ |
398 | 372 | ||
@@ -401,58 +375,25 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, | |||
401 | { | 375 | { |
402 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 376 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
403 | struct gfs2_glock *gl = ip->i_gl; | 377 | struct gfs2_glock *gl = ip->i_gl; |
404 | struct buffer_head *bh = NULL, **bh_slot = ip->i_cache + height; | 378 | struct buffer_head *bh; |
405 | int in_cache = 0; | 379 | int ret = 0; |
406 | |||
407 | BUG_ON(!gl); | ||
408 | BUG_ON(!sdp); | ||
409 | |||
410 | spin_lock(&ip->i_spin); | ||
411 | if (*bh_slot && (*bh_slot)->b_blocknr == num) { | ||
412 | bh = *bh_slot; | ||
413 | get_bh(bh); | ||
414 | in_cache = 1; | ||
415 | } | ||
416 | spin_unlock(&ip->i_spin); | ||
417 | |||
418 | if (!bh) | ||
419 | bh = getbuf(gl, num, CREATE); | ||
420 | |||
421 | if (!bh) | ||
422 | return -ENOBUFS; | ||
423 | 380 | ||
424 | if (new) { | 381 | if (new) { |
425 | if (gfs2_assert_warn(sdp, height)) | 382 | BUG_ON(height == 0); |
426 | goto err; | 383 | bh = gfs2_meta_new(gl, num); |
427 | meta_prep_new(bh); | ||
428 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | 384 | gfs2_trans_add_bh(ip->i_gl, bh, 1); |
429 | gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); | 385 | gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); |
430 | gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); | 386 | gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); |
431 | } else { | 387 | } else { |
432 | u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI; | 388 | u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI; |
433 | if (!buffer_uptodate(bh)) { | 389 | ret = gfs2_meta_read(gl, num, DIO_WAIT, &bh); |
434 | ll_rw_block(READ_META, 1, &bh); | 390 | if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) { |
435 | if (gfs2_meta_wait(sdp, bh)) | 391 | brelse(bh); |
436 | goto err; | 392 | ret = -EIO; |
437 | } | 393 | } |
438 | if (gfs2_metatype_check(sdp, bh, mtype)) | ||
439 | goto err; | ||
440 | } | ||
441 | |||
442 | if (!in_cache) { | ||
443 | spin_lock(&ip->i_spin); | ||
444 | if (*bh_slot) | ||
445 | brelse(*bh_slot); | ||
446 | *bh_slot = bh; | ||
447 | get_bh(bh); | ||
448 | spin_unlock(&ip->i_spin); | ||
449 | } | 394 | } |
450 | |||
451 | *bhp = bh; | 395 | *bhp = bh; |
452 | return 0; | 396 | return ret; |
453 | err: | ||
454 | brelse(bh); | ||
455 | return -EIO; | ||
456 | } | 397 | } |
457 | 398 | ||
458 | /** | 399 | /** |
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index b7048222ebb4..73e3b1c76fe1 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h | |||
@@ -56,7 +56,6 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, | |||
56 | 56 | ||
57 | void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); | 57 | void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); |
58 | 58 | ||
59 | void gfs2_meta_cache_flush(struct gfs2_inode *ip); | ||
60 | int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, | 59 | int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, |
61 | int new, struct buffer_head **bhp); | 60 | int new, struct buffer_head **bhp); |
62 | 61 | ||
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 9679f8b9870d..38dbe99a30ed 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
@@ -20,6 +20,8 @@ | |||
20 | #include <linux/swap.h> | 20 | #include <linux/swap.h> |
21 | #include <linux/gfs2_ondisk.h> | 21 | #include <linux/gfs2_ondisk.h> |
22 | #include <linux/lm_interface.h> | 22 | #include <linux/lm_interface.h> |
23 | #include <linux/backing-dev.h> | ||
24 | #include <linux/pagevec.h> | ||
23 | 25 | ||
24 | #include "gfs2.h" | 26 | #include "gfs2.h" |
25 | #include "incore.h" | 27 | #include "incore.h" |
@@ -32,7 +34,6 @@ | |||
32 | #include "quota.h" | 34 | #include "quota.h" |
33 | #include "trans.h" | 35 | #include "trans.h" |
34 | #include "rgrp.h" | 36 | #include "rgrp.h" |
35 | #include "ops_file.h" | ||
36 | #include "super.h" | 37 | #include "super.h" |
37 | #include "util.h" | 38 | #include "util.h" |
38 | #include "glops.h" | 39 | #include "glops.h" |
@@ -58,22 +59,6 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, | |||
58 | } | 59 | } |
59 | 60 | ||
60 | /** | 61 | /** |
61 | * gfs2_get_block - Fills in a buffer head with details about a block | ||
62 | * @inode: The inode | ||
63 | * @lblock: The block number to look up | ||
64 | * @bh_result: The buffer head to return the result in | ||
65 | * @create: Non-zero if we may add block to the file | ||
66 | * | ||
67 | * Returns: errno | ||
68 | */ | ||
69 | |||
70 | int gfs2_get_block(struct inode *inode, sector_t lblock, | ||
71 | struct buffer_head *bh_result, int create) | ||
72 | { | ||
73 | return gfs2_block_map(inode, lblock, create, bh_result); | ||
74 | } | ||
75 | |||
76 | /** | ||
77 | * gfs2_get_block_noalloc - Fills in a buffer head with details about a block | 62 | * gfs2_get_block_noalloc - Fills in a buffer head with details about a block |
78 | * @inode: The inode | 63 | * @inode: The inode |
79 | * @lblock: The block number to look up | 64 | * @lblock: The block number to look up |
@@ -88,7 +73,7 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, | |||
88 | { | 73 | { |
89 | int error; | 74 | int error; |
90 | 75 | ||
91 | error = gfs2_block_map(inode, lblock, 0, bh_result); | 76 | error = gfs2_block_map(inode, lblock, bh_result, 0); |
92 | if (error) | 77 | if (error) |
93 | return error; | 78 | return error; |
94 | if (!buffer_mapped(bh_result)) | 79 | if (!buffer_mapped(bh_result)) |
@@ -99,20 +84,19 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, | |||
99 | static int gfs2_get_block_direct(struct inode *inode, sector_t lblock, | 84 | static int gfs2_get_block_direct(struct inode *inode, sector_t lblock, |
100 | struct buffer_head *bh_result, int create) | 85 | struct buffer_head *bh_result, int create) |
101 | { | 86 | { |
102 | return gfs2_block_map(inode, lblock, 0, bh_result); | 87 | return gfs2_block_map(inode, lblock, bh_result, 0); |
103 | } | 88 | } |
104 | 89 | ||
105 | /** | 90 | /** |
106 | * gfs2_writepage - Write complete page | 91 | * gfs2_writepage_common - Common bits of writepage |
107 | * @page: Page to write | 92 | * @page: The page to be written |
93 | * @wbc: The writeback control | ||
108 | * | 94 | * |
109 | * Returns: errno | 95 | * Returns: 1 if writepage is ok, otherwise an error code or zero if no error. |
110 | * | ||
111 | * Some of this is copied from block_write_full_page() although we still | ||
112 | * call it to do most of the work. | ||
113 | */ | 96 | */ |
114 | 97 | ||
115 | static int gfs2_writepage(struct page *page, struct writeback_control *wbc) | 98 | static int gfs2_writepage_common(struct page *page, |
99 | struct writeback_control *wbc) | ||
116 | { | 100 | { |
117 | struct inode *inode = page->mapping->host; | 101 | struct inode *inode = page->mapping->host; |
118 | struct gfs2_inode *ip = GFS2_I(inode); | 102 | struct gfs2_inode *ip = GFS2_I(inode); |
@@ -120,41 +104,133 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc) | |||
120 | loff_t i_size = i_size_read(inode); | 104 | loff_t i_size = i_size_read(inode); |
121 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | 105 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; |
122 | unsigned offset; | 106 | unsigned offset; |
123 | int error; | 107 | int ret = -EIO; |
124 | int done_trans = 0; | ||
125 | 108 | ||
126 | if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) { | 109 | if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) |
127 | unlock_page(page); | 110 | goto out; |
128 | return -EIO; | 111 | ret = 0; |
129 | } | ||
130 | if (current->journal_info) | 112 | if (current->journal_info) |
131 | goto out_ignore; | 113 | goto redirty; |
132 | |||
133 | /* Is the page fully outside i_size? (truncate in progress) */ | 114 | /* Is the page fully outside i_size? (truncate in progress) */ |
134 | offset = i_size & (PAGE_CACHE_SIZE-1); | 115 | offset = i_size & (PAGE_CACHE_SIZE-1); |
135 | if (page->index > end_index || (page->index == end_index && !offset)) { | 116 | if (page->index > end_index || (page->index == end_index && !offset)) { |
136 | page->mapping->a_ops->invalidatepage(page, 0); | 117 | page->mapping->a_ops->invalidatepage(page, 0); |
137 | unlock_page(page); | 118 | goto out; |
138 | return 0; /* don't care */ | 119 | } |
120 | return 1; | ||
121 | redirty: | ||
122 | redirty_page_for_writepage(wbc, page); | ||
123 | out: | ||
124 | unlock_page(page); | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | /** | ||
129 | * gfs2_writeback_writepage - Write page for writeback mappings | ||
130 | * @page: The page | ||
131 | * @wbc: The writeback control | ||
132 | * | ||
133 | */ | ||
134 | |||
135 | static int gfs2_writeback_writepage(struct page *page, | ||
136 | struct writeback_control *wbc) | ||
137 | { | ||
138 | int ret; | ||
139 | |||
140 | ret = gfs2_writepage_common(page, wbc); | ||
141 | if (ret <= 0) | ||
142 | return ret; | ||
143 | |||
144 | ret = mpage_writepage(page, gfs2_get_block_noalloc, wbc); | ||
145 | if (ret == -EAGAIN) | ||
146 | ret = block_write_full_page(page, gfs2_get_block_noalloc, wbc); | ||
147 | return ret; | ||
148 | } | ||
149 | |||
150 | /** | ||
151 | * gfs2_ordered_writepage - Write page for ordered data files | ||
152 | * @page: The page to write | ||
153 | * @wbc: The writeback control | ||
154 | * | ||
155 | */ | ||
156 | |||
157 | static int gfs2_ordered_writepage(struct page *page, | ||
158 | struct writeback_control *wbc) | ||
159 | { | ||
160 | struct inode *inode = page->mapping->host; | ||
161 | struct gfs2_inode *ip = GFS2_I(inode); | ||
162 | int ret; | ||
163 | |||
164 | ret = gfs2_writepage_common(page, wbc); | ||
165 | if (ret <= 0) | ||
166 | return ret; | ||
167 | |||
168 | if (!page_has_buffers(page)) { | ||
169 | create_empty_buffers(page, inode->i_sb->s_blocksize, | ||
170 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | ||
139 | } | 171 | } |
172 | gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1); | ||
173 | return block_write_full_page(page, gfs2_get_block_noalloc, wbc); | ||
174 | } | ||
140 | 175 | ||
141 | if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) && | 176 | /** |
142 | PageChecked(page)) { | 177 | * __gfs2_jdata_writepage - The core of jdata writepage |
178 | * @page: The page to write | ||
179 | * @wbc: The writeback control | ||
180 | * | ||
181 | * This is shared between writepage and writepages and implements the | ||
182 | * core of the writepage operation. If a transaction is required then | ||
183 | * PageChecked will have been set and the transaction will have | ||
184 | * already been started before this is called. | ||
185 | */ | ||
186 | |||
187 | static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) | ||
188 | { | ||
189 | struct inode *inode = page->mapping->host; | ||
190 | struct gfs2_inode *ip = GFS2_I(inode); | ||
191 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
192 | |||
193 | if (PageChecked(page)) { | ||
143 | ClearPageChecked(page); | 194 | ClearPageChecked(page); |
144 | error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); | ||
145 | if (error) | ||
146 | goto out_ignore; | ||
147 | if (!page_has_buffers(page)) { | 195 | if (!page_has_buffers(page)) { |
148 | create_empty_buffers(page, inode->i_sb->s_blocksize, | 196 | create_empty_buffers(page, inode->i_sb->s_blocksize, |
149 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | 197 | (1 << BH_Dirty)|(1 << BH_Uptodate)); |
150 | } | 198 | } |
151 | gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); | 199 | gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); |
200 | } | ||
201 | return block_write_full_page(page, gfs2_get_block_noalloc, wbc); | ||
202 | } | ||
203 | |||
204 | /** | ||
205 | * gfs2_jdata_writepage - Write complete page | ||
206 | * @page: Page to write | ||
207 | * | ||
208 | * Returns: errno | ||
209 | * | ||
210 | */ | ||
211 | |||
212 | static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) | ||
213 | { | ||
214 | struct inode *inode = page->mapping->host; | ||
215 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
216 | int error; | ||
217 | int done_trans = 0; | ||
218 | |||
219 | error = gfs2_writepage_common(page, wbc); | ||
220 | if (error <= 0) | ||
221 | return error; | ||
222 | |||
223 | if (PageChecked(page)) { | ||
224 | if (wbc->sync_mode != WB_SYNC_ALL) | ||
225 | goto out_ignore; | ||
226 | error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); | ||
227 | if (error) | ||
228 | goto out_ignore; | ||
152 | done_trans = 1; | 229 | done_trans = 1; |
153 | } | 230 | } |
154 | error = block_write_full_page(page, gfs2_get_block_noalloc, wbc); | 231 | error = __gfs2_jdata_writepage(page, wbc); |
155 | if (done_trans) | 232 | if (done_trans) |
156 | gfs2_trans_end(sdp); | 233 | gfs2_trans_end(sdp); |
157 | gfs2_meta_cache_flush(ip); | ||
158 | return error; | 234 | return error; |
159 | 235 | ||
160 | out_ignore: | 236 | out_ignore: |
@@ -164,29 +240,190 @@ out_ignore: | |||
164 | } | 240 | } |
165 | 241 | ||
166 | /** | 242 | /** |
167 | * gfs2_writepages - Write a bunch of dirty pages back to disk | 243 | * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk |
168 | * @mapping: The mapping to write | 244 | * @mapping: The mapping to write |
169 | * @wbc: Write-back control | 245 | * @wbc: Write-back control |
170 | * | 246 | * |
171 | * For journaled files and/or ordered writes this just falls back to the | 247 | * For the data=writeback case we can already ignore buffer heads |
172 | * kernel's default writepages path for now. We will probably want to change | ||
173 | * that eventually (i.e. when we look at allocate on flush). | ||
174 | * | ||
175 | * For the data=writeback case though we can already ignore buffer heads | ||
176 | * and write whole extents at once. This is a big reduction in the | 248 | * and write whole extents at once. This is a big reduction in the |
177 | * number of I/O requests we send and the bmap calls we make in this case. | 249 | * number of I/O requests we send and the bmap calls we make in this case. |
178 | */ | 250 | */ |
179 | static int gfs2_writepages(struct address_space *mapping, | 251 | static int gfs2_writeback_writepages(struct address_space *mapping, |
180 | struct writeback_control *wbc) | 252 | struct writeback_control *wbc) |
253 | { | ||
254 | return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); | ||
255 | } | ||
256 | |||
257 | /** | ||
258 | * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages | ||
259 | * @mapping: The mapping | ||
260 | * @wbc: The writeback control | ||
261 | * @writepage: The writepage function to call for each page | ||
262 | * @pvec: The vector of pages | ||
263 | * @nr_pages: The number of pages to write | ||
264 | * | ||
265 | * Returns: non-zero if loop should terminate, zero otherwise | ||
266 | */ | ||
267 | |||
268 | static int gfs2_write_jdata_pagevec(struct address_space *mapping, | ||
269 | struct writeback_control *wbc, | ||
270 | struct pagevec *pvec, | ||
271 | int nr_pages, pgoff_t end) | ||
181 | { | 272 | { |
182 | struct inode *inode = mapping->host; | 273 | struct inode *inode = mapping->host; |
183 | struct gfs2_inode *ip = GFS2_I(inode); | ||
184 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 274 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
275 | loff_t i_size = i_size_read(inode); | ||
276 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
277 | unsigned offset = i_size & (PAGE_CACHE_SIZE-1); | ||
278 | unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); | ||
279 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
280 | int i; | ||
281 | int ret; | ||
282 | |||
283 | ret = gfs2_trans_begin(sdp, nrblocks, 0); | ||
284 | if (ret < 0) | ||
285 | return ret; | ||
286 | |||
287 | for(i = 0; i < nr_pages; i++) { | ||
288 | struct page *page = pvec->pages[i]; | ||
289 | |||
290 | lock_page(page); | ||
291 | |||
292 | if (unlikely(page->mapping != mapping)) { | ||
293 | unlock_page(page); | ||
294 | continue; | ||
295 | } | ||
296 | |||
297 | if (!wbc->range_cyclic && page->index > end) { | ||
298 | ret = 1; | ||
299 | unlock_page(page); | ||
300 | continue; | ||
301 | } | ||
302 | |||
303 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
304 | wait_on_page_writeback(page); | ||
305 | |||
306 | if (PageWriteback(page) || | ||
307 | !clear_page_dirty_for_io(page)) { | ||
308 | unlock_page(page); | ||
309 | continue; | ||
310 | } | ||
311 | |||
312 | /* Is the page fully outside i_size? (truncate in progress) */ | ||
313 | if (page->index > end_index || (page->index == end_index && !offset)) { | ||
314 | page->mapping->a_ops->invalidatepage(page, 0); | ||
315 | unlock_page(page); | ||
316 | continue; | ||
317 | } | ||
318 | |||
319 | ret = __gfs2_jdata_writepage(page, wbc); | ||
320 | |||
321 | if (ret || (--(wbc->nr_to_write) <= 0)) | ||
322 | ret = 1; | ||
323 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
324 | wbc->encountered_congestion = 1; | ||
325 | ret = 1; | ||
326 | } | ||
327 | |||
328 | } | ||
329 | gfs2_trans_end(sdp); | ||
330 | return ret; | ||
331 | } | ||
332 | |||
333 | /** | ||
334 | * gfs2_write_cache_jdata - Like write_cache_pages but different | ||
335 | * @mapping: The mapping to write | ||
336 | * @wbc: The writeback control | ||
337 | * @writepage: The writepage function to call | ||
338 | * @data: The data to pass to writepage | ||
339 | * | ||
340 | * The reason that we use our own function here is that we need to | ||
341 | * start transactions before we grab page locks. This allows us | ||
342 | * to get the ordering right. | ||
343 | */ | ||
344 | |||
345 | static int gfs2_write_cache_jdata(struct address_space *mapping, | ||
346 | struct writeback_control *wbc) | ||
347 | { | ||
348 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
349 | int ret = 0; | ||
350 | int done = 0; | ||
351 | struct pagevec pvec; | ||
352 | int nr_pages; | ||
353 | pgoff_t index; | ||
354 | pgoff_t end; | ||
355 | int scanned = 0; | ||
356 | int range_whole = 0; | ||
357 | |||
358 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
359 | wbc->encountered_congestion = 1; | ||
360 | return 0; | ||
361 | } | ||
362 | |||
363 | pagevec_init(&pvec, 0); | ||
364 | if (wbc->range_cyclic) { | ||
365 | index = mapping->writeback_index; /* Start from prev offset */ | ||
366 | end = -1; | ||
367 | } else { | ||
368 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
369 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
370 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | ||
371 | range_whole = 1; | ||
372 | scanned = 1; | ||
373 | } | ||
185 | 374 | ||
186 | if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip)) | 375 | retry: |
187 | return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); | 376 | while (!done && (index <= end) && |
377 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
378 | PAGECACHE_TAG_DIRTY, | ||
379 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | ||
380 | scanned = 1; | ||
381 | ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end); | ||
382 | if (ret) | ||
383 | done = 1; | ||
384 | if (ret > 0) | ||
385 | ret = 0; | ||
386 | |||
387 | pagevec_release(&pvec); | ||
388 | cond_resched(); | ||
389 | } | ||
390 | |||
391 | if (!scanned && !done) { | ||
392 | /* | ||
393 | * We hit the last page and there is more work to be done: wrap | ||
394 | * back to the start of the file | ||
395 | */ | ||
396 | scanned = 1; | ||
397 | index = 0; | ||
398 | goto retry; | ||
399 | } | ||
400 | |||
401 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | ||
402 | mapping->writeback_index = index; | ||
403 | return ret; | ||
404 | } | ||
405 | |||
406 | |||
407 | /** | ||
408 | * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk | ||
409 | * @mapping: The mapping to write | ||
410 | * @wbc: The writeback control | ||
411 | * | ||
412 | */ | ||
188 | 413 | ||
189 | return generic_writepages(mapping, wbc); | 414 | static int gfs2_jdata_writepages(struct address_space *mapping, |
415 | struct writeback_control *wbc) | ||
416 | { | ||
417 | struct gfs2_inode *ip = GFS2_I(mapping->host); | ||
418 | struct gfs2_sbd *sdp = GFS2_SB(mapping->host); | ||
419 | int ret; | ||
420 | |||
421 | ret = gfs2_write_cache_jdata(mapping, wbc); | ||
422 | if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) { | ||
423 | gfs2_log_flush(sdp, ip->i_gl); | ||
424 | ret = gfs2_write_cache_jdata(mapping, wbc); | ||
425 | } | ||
426 | return ret; | ||
190 | } | 427 | } |
191 | 428 | ||
192 | /** | 429 | /** |
@@ -231,62 +468,107 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) | |||
231 | 468 | ||
232 | 469 | ||
233 | /** | 470 | /** |
234 | * gfs2_readpage - readpage with locking | 471 | * __gfs2_readpage - readpage |
235 | * @file: The file to read a page for. N.B. This may be NULL if we are | 472 | * @file: The file to read a page for |
236 | * reading an internal file. | ||
237 | * @page: The page to read | 473 | * @page: The page to read |
238 | * | 474 | * |
239 | * Returns: errno | 475 | * This is the core of gfs2's readpage. Its used by the internal file |
476 | * reading code as in that case we already hold the glock. Also its | ||
477 | * called by gfs2_readpage() once the required lock has been granted. | ||
478 | * | ||
240 | */ | 479 | */ |
241 | 480 | ||
242 | static int gfs2_readpage(struct file *file, struct page *page) | 481 | static int __gfs2_readpage(void *file, struct page *page) |
243 | { | 482 | { |
244 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | 483 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); |
245 | struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); | 484 | struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); |
246 | struct gfs2_file *gf = NULL; | ||
247 | struct gfs2_holder gh; | ||
248 | int error; | 485 | int error; |
249 | int do_unlock = 0; | ||
250 | |||
251 | if (likely(file != &gfs2_internal_file_sentinel)) { | ||
252 | if (file) { | ||
253 | gf = file->private_data; | ||
254 | if (test_bit(GFF_EXLOCK, &gf->f_flags)) | ||
255 | /* gfs2_sharewrite_fault has grabbed the ip->i_gl already */ | ||
256 | goto skip_lock; | ||
257 | } | ||
258 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); | ||
259 | do_unlock = 1; | ||
260 | error = gfs2_glock_nq_atime(&gh); | ||
261 | if (unlikely(error)) | ||
262 | goto out_unlock; | ||
263 | } | ||
264 | 486 | ||
265 | skip_lock: | ||
266 | if (gfs2_is_stuffed(ip)) { | 487 | if (gfs2_is_stuffed(ip)) { |
267 | error = stuffed_readpage(ip, page); | 488 | error = stuffed_readpage(ip, page); |
268 | unlock_page(page); | 489 | unlock_page(page); |
269 | } else | 490 | } else { |
270 | error = mpage_readpage(page, gfs2_get_block); | 491 | error = mpage_readpage(page, gfs2_block_map); |
492 | } | ||
271 | 493 | ||
272 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 494 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
273 | error = -EIO; | 495 | return -EIO; |
496 | |||
497 | return error; | ||
498 | } | ||
499 | |||
500 | /** | ||
501 | * gfs2_readpage - read a page of a file | ||
502 | * @file: The file to read | ||
503 | * @page: The page of the file | ||
504 | * | ||
505 | * This deals with the locking required. We use a trylock in order to | ||
506 | * avoid the page lock / glock ordering problems returning AOP_TRUNCATED_PAGE | ||
507 | * in the event that we are unable to get the lock. | ||
508 | */ | ||
509 | |||
510 | static int gfs2_readpage(struct file *file, struct page *page) | ||
511 | { | ||
512 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | ||
513 | struct gfs2_holder gh; | ||
514 | int error; | ||
274 | 515 | ||
275 | if (do_unlock) { | 516 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); |
276 | gfs2_glock_dq_m(1, &gh); | 517 | error = gfs2_glock_nq_atime(&gh); |
277 | gfs2_holder_uninit(&gh); | 518 | if (unlikely(error)) { |
519 | unlock_page(page); | ||
520 | goto out; | ||
278 | } | 521 | } |
522 | error = __gfs2_readpage(file, page); | ||
523 | gfs2_glock_dq(&gh); | ||
279 | out: | 524 | out: |
280 | return error; | 525 | gfs2_holder_uninit(&gh); |
281 | out_unlock: | ||
282 | unlock_page(page); | ||
283 | if (error == GLR_TRYFAILED) { | 526 | if (error == GLR_TRYFAILED) { |
284 | error = AOP_TRUNCATED_PAGE; | ||
285 | yield(); | 527 | yield(); |
528 | return AOP_TRUNCATED_PAGE; | ||
286 | } | 529 | } |
287 | if (do_unlock) | 530 | return error; |
288 | gfs2_holder_uninit(&gh); | 531 | } |
289 | goto out; | 532 | |
533 | /** | ||
534 | * gfs2_internal_read - read an internal file | ||
535 | * @ip: The gfs2 inode | ||
536 | * @ra_state: The readahead state (or NULL for no readahead) | ||
537 | * @buf: The buffer to fill | ||
538 | * @pos: The file position | ||
539 | * @size: The amount to read | ||
540 | * | ||
541 | */ | ||
542 | |||
543 | int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, | ||
544 | char *buf, loff_t *pos, unsigned size) | ||
545 | { | ||
546 | struct address_space *mapping = ip->i_inode.i_mapping; | ||
547 | unsigned long index = *pos / PAGE_CACHE_SIZE; | ||
548 | unsigned offset = *pos & (PAGE_CACHE_SIZE - 1); | ||
549 | unsigned copied = 0; | ||
550 | unsigned amt; | ||
551 | struct page *page; | ||
552 | void *p; | ||
553 | |||
554 | do { | ||
555 | amt = size - copied; | ||
556 | if (offset + size > PAGE_CACHE_SIZE) | ||
557 | amt = PAGE_CACHE_SIZE - offset; | ||
558 | page = read_cache_page(mapping, index, __gfs2_readpage, NULL); | ||
559 | if (IS_ERR(page)) | ||
560 | return PTR_ERR(page); | ||
561 | p = kmap_atomic(page, KM_USER0); | ||
562 | memcpy(buf + copied, p + offset, amt); | ||
563 | kunmap_atomic(p, KM_USER0); | ||
564 | mark_page_accessed(page); | ||
565 | page_cache_release(page); | ||
566 | copied += amt; | ||
567 | index++; | ||
568 | offset = 0; | ||
569 | } while(copied < size); | ||
570 | (*pos) += size; | ||
571 | return size; | ||
290 | } | 572 | } |
291 | 573 | ||
292 | /** | 574 | /** |
@@ -300,10 +582,9 @@ out_unlock: | |||
300 | * Any I/O we ignore at this time will be done via readpage later. | 582 | * Any I/O we ignore at this time will be done via readpage later. |
301 | * 2. We don't handle stuffed files here we let readpage do the honours. | 583 | * 2. We don't handle stuffed files here we let readpage do the honours. |
302 | * 3. mpage_readpages() does most of the heavy lifting in the common case. | 584 | * 3. mpage_readpages() does most of the heavy lifting in the common case. |
303 | * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places. | 585 | * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places. |
304 | * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as | ||
305 | * well as read-ahead. | ||
306 | */ | 586 | */ |
587 | |||
307 | static int gfs2_readpages(struct file *file, struct address_space *mapping, | 588 | static int gfs2_readpages(struct file *file, struct address_space *mapping, |
308 | struct list_head *pages, unsigned nr_pages) | 589 | struct list_head *pages, unsigned nr_pages) |
309 | { | 590 | { |
@@ -311,42 +592,20 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, | |||
311 | struct gfs2_inode *ip = GFS2_I(inode); | 592 | struct gfs2_inode *ip = GFS2_I(inode); |
312 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 593 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
313 | struct gfs2_holder gh; | 594 | struct gfs2_holder gh; |
314 | int ret = 0; | 595 | int ret; |
315 | int do_unlock = 0; | ||
316 | 596 | ||
317 | if (likely(file != &gfs2_internal_file_sentinel)) { | 597 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); |
318 | if (file) { | 598 | ret = gfs2_glock_nq_atime(&gh); |
319 | struct gfs2_file *gf = file->private_data; | 599 | if (unlikely(ret)) |
320 | if (test_bit(GFF_EXLOCK, &gf->f_flags)) | 600 | goto out_uninit; |
321 | goto skip_lock; | ||
322 | } | ||
323 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, | ||
324 | LM_FLAG_TRY_1CB|GL_ATIME, &gh); | ||
325 | do_unlock = 1; | ||
326 | ret = gfs2_glock_nq_atime(&gh); | ||
327 | if (ret == GLR_TRYFAILED) | ||
328 | goto out_noerror; | ||
329 | if (unlikely(ret)) | ||
330 | goto out_unlock; | ||
331 | } | ||
332 | skip_lock: | ||
333 | if (!gfs2_is_stuffed(ip)) | 601 | if (!gfs2_is_stuffed(ip)) |
334 | ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block); | 602 | ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map); |
335 | 603 | gfs2_glock_dq(&gh); | |
336 | if (do_unlock) { | 604 | out_uninit: |
337 | gfs2_glock_dq_m(1, &gh); | 605 | gfs2_holder_uninit(&gh); |
338 | gfs2_holder_uninit(&gh); | ||
339 | } | ||
340 | out: | ||
341 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 606 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
342 | ret = -EIO; | 607 | ret = -EIO; |
343 | return ret; | 608 | return ret; |
344 | out_noerror: | ||
345 | ret = 0; | ||
346 | out_unlock: | ||
347 | if (do_unlock) | ||
348 | gfs2_holder_uninit(&gh); | ||
349 | goto out; | ||
350 | } | 609 | } |
351 | 610 | ||
352 | /** | 611 | /** |
@@ -382,20 +641,11 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
382 | if (unlikely(error)) | 641 | if (unlikely(error)) |
383 | goto out_uninit; | 642 | goto out_uninit; |
384 | 643 | ||
385 | error = -ENOMEM; | ||
386 | page = __grab_cache_page(mapping, index); | ||
387 | *pagep = page; | ||
388 | if (!page) | ||
389 | goto out_unlock; | ||
390 | |||
391 | gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); | 644 | gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); |
392 | |||
393 | error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); | 645 | error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); |
394 | if (error) | 646 | if (error) |
395 | goto out_putpage; | 647 | goto out_unlock; |
396 | |||
397 | 648 | ||
398 | ip->i_alloc.al_requested = 0; | ||
399 | if (alloc_required) { | 649 | if (alloc_required) { |
400 | al = gfs2_alloc_get(ip); | 650 | al = gfs2_alloc_get(ip); |
401 | 651 | ||
@@ -424,40 +674,47 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
424 | if (error) | 674 | if (error) |
425 | goto out_trans_fail; | 675 | goto out_trans_fail; |
426 | 676 | ||
677 | error = -ENOMEM; | ||
678 | page = __grab_cache_page(mapping, index); | ||
679 | *pagep = page; | ||
680 | if (unlikely(!page)) | ||
681 | goto out_endtrans; | ||
682 | |||
427 | if (gfs2_is_stuffed(ip)) { | 683 | if (gfs2_is_stuffed(ip)) { |
684 | error = 0; | ||
428 | if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { | 685 | if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { |
429 | error = gfs2_unstuff_dinode(ip, page); | 686 | error = gfs2_unstuff_dinode(ip, page); |
430 | if (error == 0) | 687 | if (error == 0) |
431 | goto prepare_write; | 688 | goto prepare_write; |
432 | } else if (!PageUptodate(page)) | 689 | } else if (!PageUptodate(page)) { |
433 | error = stuffed_readpage(ip, page); | 690 | error = stuffed_readpage(ip, page); |
691 | } | ||
434 | goto out; | 692 | goto out; |
435 | } | 693 | } |
436 | 694 | ||
437 | prepare_write: | 695 | prepare_write: |
438 | error = block_prepare_write(page, from, to, gfs2_get_block); | 696 | error = block_prepare_write(page, from, to, gfs2_block_map); |
439 | |||
440 | out: | 697 | out: |
441 | if (error) { | 698 | if (error == 0) |
442 | gfs2_trans_end(sdp); | 699 | return 0; |
700 | |||
701 | page_cache_release(page); | ||
702 | if (pos + len > ip->i_inode.i_size) | ||
703 | vmtruncate(&ip->i_inode, ip->i_inode.i_size); | ||
704 | out_endtrans: | ||
705 | gfs2_trans_end(sdp); | ||
443 | out_trans_fail: | 706 | out_trans_fail: |
444 | if (alloc_required) { | 707 | if (alloc_required) { |
445 | gfs2_inplace_release(ip); | 708 | gfs2_inplace_release(ip); |
446 | out_qunlock: | 709 | out_qunlock: |
447 | gfs2_quota_unlock(ip); | 710 | gfs2_quota_unlock(ip); |
448 | out_alloc_put: | 711 | out_alloc_put: |
449 | gfs2_alloc_put(ip); | 712 | gfs2_alloc_put(ip); |
450 | } | 713 | } |
451 | out_putpage: | ||
452 | page_cache_release(page); | ||
453 | if (pos + len > ip->i_inode.i_size) | ||
454 | vmtruncate(&ip->i_inode, ip->i_inode.i_size); | ||
455 | out_unlock: | 714 | out_unlock: |
456 | gfs2_glock_dq_m(1, &ip->i_gh); | 715 | gfs2_glock_dq(&ip->i_gh); |
457 | out_uninit: | 716 | out_uninit: |
458 | gfs2_holder_uninit(&ip->i_gh); | 717 | gfs2_holder_uninit(&ip->i_gh); |
459 | } | ||
460 | |||
461 | return error; | 718 | return error; |
462 | } | 719 | } |
463 | 720 | ||
@@ -565,7 +822,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
565 | struct gfs2_inode *ip = GFS2_I(inode); | 822 | struct gfs2_inode *ip = GFS2_I(inode); |
566 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 823 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
567 | struct buffer_head *dibh; | 824 | struct buffer_head *dibh; |
568 | struct gfs2_alloc *al = &ip->i_alloc; | 825 | struct gfs2_alloc *al = ip->i_alloc; |
569 | struct gfs2_dinode *di; | 826 | struct gfs2_dinode *di; |
570 | unsigned int from = pos & (PAGE_CACHE_SIZE - 1); | 827 | unsigned int from = pos & (PAGE_CACHE_SIZE - 1); |
571 | unsigned int to = from + len; | 828 | unsigned int to = from + len; |
@@ -585,19 +842,16 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
585 | if (gfs2_is_stuffed(ip)) | 842 | if (gfs2_is_stuffed(ip)) |
586 | return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); | 843 | return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); |
587 | 844 | ||
588 | if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) | 845 | if (!gfs2_is_writeback(ip)) |
589 | gfs2_page_add_databufs(ip, page, from, to); | 846 | gfs2_page_add_databufs(ip, page, from, to); |
590 | 847 | ||
591 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); | 848 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); |
592 | 849 | ||
593 | if (likely(ret >= 0)) { | 850 | if (likely(ret >= 0) && (inode->i_size > ip->i_di.di_size)) { |
594 | copied = ret; | 851 | di = (struct gfs2_dinode *)dibh->b_data; |
595 | if ((pos + copied) > inode->i_size) { | 852 | ip->i_di.di_size = inode->i_size; |
596 | di = (struct gfs2_dinode *)dibh->b_data; | 853 | di->di_size = cpu_to_be64(inode->i_size); |
597 | ip->i_di.di_size = inode->i_size; | 854 | mark_inode_dirty(inode); |
598 | di->di_size = cpu_to_be64(inode->i_size); | ||
599 | mark_inode_dirty(inode); | ||
600 | } | ||
601 | } | 855 | } |
602 | 856 | ||
603 | if (inode == sdp->sd_rindex) | 857 | if (inode == sdp->sd_rindex) |
@@ -606,7 +860,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
606 | brelse(dibh); | 860 | brelse(dibh); |
607 | gfs2_trans_end(sdp); | 861 | gfs2_trans_end(sdp); |
608 | failed: | 862 | failed: |
609 | if (al->al_requested) { | 863 | if (al) { |
610 | gfs2_inplace_release(ip); | 864 | gfs2_inplace_release(ip); |
611 | gfs2_quota_unlock(ip); | 865 | gfs2_quota_unlock(ip); |
612 | gfs2_alloc_put(ip); | 866 | gfs2_alloc_put(ip); |
@@ -625,11 +879,7 @@ failed: | |||
625 | 879 | ||
626 | static int gfs2_set_page_dirty(struct page *page) | 880 | static int gfs2_set_page_dirty(struct page *page) |
627 | { | 881 | { |
628 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | 882 | SetPageChecked(page); |
629 | struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); | ||
630 | |||
631 | if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) | ||
632 | SetPageChecked(page); | ||
633 | return __set_page_dirty_buffers(page); | 883 | return __set_page_dirty_buffers(page); |
634 | } | 884 | } |
635 | 885 | ||
@@ -653,7 +903,7 @@ static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock) | |||
653 | return 0; | 903 | return 0; |
654 | 904 | ||
655 | if (!gfs2_is_stuffed(ip)) | 905 | if (!gfs2_is_stuffed(ip)) |
656 | dblock = generic_block_bmap(mapping, lblock, gfs2_get_block); | 906 | dblock = generic_block_bmap(mapping, lblock, gfs2_block_map); |
657 | 907 | ||
658 | gfs2_glock_dq_uninit(&i_gh); | 908 | gfs2_glock_dq_uninit(&i_gh); |
659 | 909 | ||
@@ -719,13 +969,9 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) | |||
719 | { | 969 | { |
720 | /* | 970 | /* |
721 | * Should we return an error here? I can't see that O_DIRECT for | 971 | * Should we return an error here? I can't see that O_DIRECT for |
722 | * a journaled file makes any sense. For now we'll silently fall | 972 | * a stuffed file makes any sense. For now we'll silently fall |
723 | * back to buffered I/O, likewise we do the same for stuffed | 973 | * back to buffered I/O |
724 | * files since they are (a) small and (b) unaligned. | ||
725 | */ | 974 | */ |
726 | if (gfs2_is_jdata(ip)) | ||
727 | return 0; | ||
728 | |||
729 | if (gfs2_is_stuffed(ip)) | 975 | if (gfs2_is_stuffed(ip)) |
730 | return 0; | 976 | return 0; |
731 | 977 | ||
@@ -836,9 +1082,23 @@ cannot_release: | |||
836 | return 0; | 1082 | return 0; |
837 | } | 1083 | } |
838 | 1084 | ||
839 | const struct address_space_operations gfs2_file_aops = { | 1085 | static const struct address_space_operations gfs2_writeback_aops = { |
840 | .writepage = gfs2_writepage, | 1086 | .writepage = gfs2_writeback_writepage, |
841 | .writepages = gfs2_writepages, | 1087 | .writepages = gfs2_writeback_writepages, |
1088 | .readpage = gfs2_readpage, | ||
1089 | .readpages = gfs2_readpages, | ||
1090 | .sync_page = block_sync_page, | ||
1091 | .write_begin = gfs2_write_begin, | ||
1092 | .write_end = gfs2_write_end, | ||
1093 | .bmap = gfs2_bmap, | ||
1094 | .invalidatepage = gfs2_invalidatepage, | ||
1095 | .releasepage = gfs2_releasepage, | ||
1096 | .direct_IO = gfs2_direct_IO, | ||
1097 | .migratepage = buffer_migrate_page, | ||
1098 | }; | ||
1099 | |||
1100 | static const struct address_space_operations gfs2_ordered_aops = { | ||
1101 | .writepage = gfs2_ordered_writepage, | ||
842 | .readpage = gfs2_readpage, | 1102 | .readpage = gfs2_readpage, |
843 | .readpages = gfs2_readpages, | 1103 | .readpages = gfs2_readpages, |
844 | .sync_page = block_sync_page, | 1104 | .sync_page = block_sync_page, |
@@ -849,5 +1109,34 @@ const struct address_space_operations gfs2_file_aops = { | |||
849 | .invalidatepage = gfs2_invalidatepage, | 1109 | .invalidatepage = gfs2_invalidatepage, |
850 | .releasepage = gfs2_releasepage, | 1110 | .releasepage = gfs2_releasepage, |
851 | .direct_IO = gfs2_direct_IO, | 1111 | .direct_IO = gfs2_direct_IO, |
1112 | .migratepage = buffer_migrate_page, | ||
852 | }; | 1113 | }; |
853 | 1114 | ||
1115 | static const struct address_space_operations gfs2_jdata_aops = { | ||
1116 | .writepage = gfs2_jdata_writepage, | ||
1117 | .writepages = gfs2_jdata_writepages, | ||
1118 | .readpage = gfs2_readpage, | ||
1119 | .readpages = gfs2_readpages, | ||
1120 | .sync_page = block_sync_page, | ||
1121 | .write_begin = gfs2_write_begin, | ||
1122 | .write_end = gfs2_write_end, | ||
1123 | .set_page_dirty = gfs2_set_page_dirty, | ||
1124 | .bmap = gfs2_bmap, | ||
1125 | .invalidatepage = gfs2_invalidatepage, | ||
1126 | .releasepage = gfs2_releasepage, | ||
1127 | }; | ||
1128 | |||
1129 | void gfs2_set_aops(struct inode *inode) | ||
1130 | { | ||
1131 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1132 | |||
1133 | if (gfs2_is_writeback(ip)) | ||
1134 | inode->i_mapping->a_ops = &gfs2_writeback_aops; | ||
1135 | else if (gfs2_is_ordered(ip)) | ||
1136 | inode->i_mapping->a_ops = &gfs2_ordered_aops; | ||
1137 | else if (gfs2_is_jdata(ip)) | ||
1138 | inode->i_mapping->a_ops = &gfs2_jdata_aops; | ||
1139 | else | ||
1140 | BUG(); | ||
1141 | } | ||
1142 | |||
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h index fa1b5b3d28b9..5da21285bba4 100644 --- a/fs/gfs2/ops_address.h +++ b/fs/gfs2/ops_address.h | |||
@@ -14,9 +14,10 @@ | |||
14 | #include <linux/buffer_head.h> | 14 | #include <linux/buffer_head.h> |
15 | #include <linux/mm.h> | 15 | #include <linux/mm.h> |
16 | 16 | ||
17 | extern const struct address_space_operations gfs2_file_aops; | ||
18 | extern int gfs2_get_block(struct inode *inode, sector_t lblock, | ||
19 | struct buffer_head *bh_result, int create); | ||
20 | extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); | 17 | extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); |
18 | extern int gfs2_internal_read(struct gfs2_inode *ip, | ||
19 | struct file_ra_state *ra_state, | ||
20 | char *buf, loff_t *pos, unsigned size); | ||
21 | extern void gfs2_set_aops(struct inode *inode); | ||
21 | 22 | ||
22 | #endif /* __OPS_ADDRESS_DOT_H__ */ | 23 | #endif /* __OPS_ADDRESS_DOT_H__ */ |
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index bb11fd6752d3..f4842f2548cd 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c | |||
@@ -33,57 +33,12 @@ | |||
33 | #include "lm.h" | 33 | #include "lm.h" |
34 | #include "log.h" | 34 | #include "log.h" |
35 | #include "meta_io.h" | 35 | #include "meta_io.h" |
36 | #include "ops_file.h" | ||
37 | #include "ops_vm.h" | ||
38 | #include "quota.h" | 36 | #include "quota.h" |
39 | #include "rgrp.h" | 37 | #include "rgrp.h" |
40 | #include "trans.h" | 38 | #include "trans.h" |
41 | #include "util.h" | 39 | #include "util.h" |
42 | #include "eaops.h" | 40 | #include "eaops.h" |
43 | 41 | #include "ops_address.h" | |
44 | /* | ||
45 | * Most fields left uninitialised to catch anybody who tries to | ||
46 | * use them. f_flags set to prevent file_accessed() from touching | ||
47 | * any other part of this. Its use is purely as a flag so that we | ||
48 | * know (in readpage()) whether or not do to locking. | ||
49 | */ | ||
50 | struct file gfs2_internal_file_sentinel = { | ||
51 | .f_flags = O_NOATIME|O_RDONLY, | ||
52 | }; | ||
53 | |||
54 | static int gfs2_read_actor(read_descriptor_t *desc, struct page *page, | ||
55 | unsigned long offset, unsigned long size) | ||
56 | { | ||
57 | char *kaddr; | ||
58 | unsigned long count = desc->count; | ||
59 | |||
60 | if (size > count) | ||
61 | size = count; | ||
62 | |||
63 | kaddr = kmap(page); | ||
64 | memcpy(desc->arg.data, kaddr + offset, size); | ||
65 | kunmap(page); | ||
66 | |||
67 | desc->count = count - size; | ||
68 | desc->written += size; | ||
69 | desc->arg.buf += size; | ||
70 | return size; | ||
71 | } | ||
72 | |||
73 | int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, | ||
74 | char *buf, loff_t *pos, unsigned size) | ||
75 | { | ||
76 | struct inode *inode = &ip->i_inode; | ||
77 | read_descriptor_t desc; | ||
78 | desc.written = 0; | ||
79 | desc.arg.data = buf; | ||
80 | desc.count = size; | ||
81 | desc.error = 0; | ||
82 | do_generic_mapping_read(inode->i_mapping, ra_state, | ||
83 | &gfs2_internal_file_sentinel, pos, &desc, | ||
84 | gfs2_read_actor); | ||
85 | return desc.written ? desc.written : desc.error; | ||
86 | } | ||
87 | 42 | ||
88 | /** | 43 | /** |
89 | * gfs2_llseek - seek to a location in a file | 44 | * gfs2_llseek - seek to a location in a file |
@@ -214,7 +169,7 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr) | |||
214 | if (put_user(fsflags, ptr)) | 169 | if (put_user(fsflags, ptr)) |
215 | error = -EFAULT; | 170 | error = -EFAULT; |
216 | 171 | ||
217 | gfs2_glock_dq_m(1, &gh); | 172 | gfs2_glock_dq(&gh); |
218 | gfs2_holder_uninit(&gh); | 173 | gfs2_holder_uninit(&gh); |
219 | return error; | 174 | return error; |
220 | } | 175 | } |
@@ -291,7 +246,16 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) | |||
291 | if (error) | 246 | if (error) |
292 | goto out; | 247 | goto out; |
293 | } | 248 | } |
294 | 249 | if ((flags ^ new_flags) & GFS2_DIF_JDATA) { | |
250 | if (flags & GFS2_DIF_JDATA) | ||
251 | gfs2_log_flush(sdp, ip->i_gl); | ||
252 | error = filemap_fdatawrite(inode->i_mapping); | ||
253 | if (error) | ||
254 | goto out; | ||
255 | error = filemap_fdatawait(inode->i_mapping); | ||
256 | if (error) | ||
257 | goto out; | ||
258 | } | ||
295 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | 259 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); |
296 | if (error) | 260 | if (error) |
297 | goto out; | 261 | goto out; |
@@ -303,6 +267,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) | |||
303 | gfs2_dinode_out(ip, bh->b_data); | 267 | gfs2_dinode_out(ip, bh->b_data); |
304 | brelse(bh); | 268 | brelse(bh); |
305 | gfs2_set_inode_flags(inode); | 269 | gfs2_set_inode_flags(inode); |
270 | gfs2_set_aops(inode); | ||
306 | out_trans_end: | 271 | out_trans_end: |
307 | gfs2_trans_end(sdp); | 272 | gfs2_trans_end(sdp); |
308 | out: | 273 | out: |
@@ -338,6 +303,128 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
338 | return -ENOTTY; | 303 | return -ENOTTY; |
339 | } | 304 | } |
340 | 305 | ||
306 | /** | ||
307 | * gfs2_allocate_page_backing - Use bmap to allocate blocks | ||
308 | * @page: The (locked) page to allocate backing for | ||
309 | * | ||
310 | * We try to allocate all the blocks required for the page in | ||
311 | * one go. This might fail for various reasons, so we keep | ||
312 | * trying until all the blocks to back this page are allocated. | ||
313 | * If some of the blocks are already allocated, thats ok too. | ||
314 | */ | ||
315 | |||
316 | static int gfs2_allocate_page_backing(struct page *page) | ||
317 | { | ||
318 | struct inode *inode = page->mapping->host; | ||
319 | struct buffer_head bh; | ||
320 | unsigned long size = PAGE_CACHE_SIZE; | ||
321 | u64 lblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
322 | |||
323 | do { | ||
324 | bh.b_state = 0; | ||
325 | bh.b_size = size; | ||
326 | gfs2_block_map(inode, lblock, &bh, 1); | ||
327 | if (!buffer_mapped(&bh)) | ||
328 | return -EIO; | ||
329 | size -= bh.b_size; | ||
330 | lblock += (bh.b_size >> inode->i_blkbits); | ||
331 | } while(size > 0); | ||
332 | return 0; | ||
333 | } | ||
334 | |||
335 | /** | ||
336 | * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable | ||
337 | * @vma: The virtual memory area | ||
338 | * @page: The page which is about to become writable | ||
339 | * | ||
340 | * When the page becomes writable, we need to ensure that we have | ||
341 | * blocks allocated on disk to back that page. | ||
342 | */ | ||
343 | |||
344 | static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | ||
345 | { | ||
346 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | ||
347 | struct gfs2_inode *ip = GFS2_I(inode); | ||
348 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
349 | unsigned long last_index; | ||
350 | u64 pos = page->index << (PAGE_CACHE_SIZE - inode->i_blkbits); | ||
351 | unsigned int data_blocks, ind_blocks, rblocks; | ||
352 | int alloc_required = 0; | ||
353 | struct gfs2_holder gh; | ||
354 | struct gfs2_alloc *al; | ||
355 | int ret; | ||
356 | |||
357 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &gh); | ||
358 | ret = gfs2_glock_nq_atime(&gh); | ||
359 | if (ret) | ||
360 | goto out; | ||
361 | |||
362 | set_bit(GIF_SW_PAGED, &ip->i_flags); | ||
363 | gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); | ||
364 | ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required); | ||
365 | if (ret || !alloc_required) | ||
366 | goto out_unlock; | ||
367 | ret = -ENOMEM; | ||
368 | al = gfs2_alloc_get(ip); | ||
369 | if (al == NULL) | ||
370 | goto out_unlock; | ||
371 | |||
372 | ret = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | ||
373 | if (ret) | ||
374 | goto out_alloc_put; | ||
375 | ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); | ||
376 | if (ret) | ||
377 | goto out_quota_unlock; | ||
378 | al->al_requested = data_blocks + ind_blocks; | ||
379 | ret = gfs2_inplace_reserve(ip); | ||
380 | if (ret) | ||
381 | goto out_quota_unlock; | ||
382 | |||
383 | rblocks = RES_DINODE + ind_blocks; | ||
384 | if (gfs2_is_jdata(ip)) | ||
385 | rblocks += data_blocks ? data_blocks : 1; | ||
386 | if (ind_blocks || data_blocks) | ||
387 | rblocks += RES_STATFS + RES_QUOTA; | ||
388 | ret = gfs2_trans_begin(sdp, rblocks, 0); | ||
389 | if (ret) | ||
390 | goto out_trans_fail; | ||
391 | |||
392 | lock_page(page); | ||
393 | ret = -EINVAL; | ||
394 | last_index = ip->i_inode.i_size >> PAGE_CACHE_SHIFT; | ||
395 | if (page->index > last_index) | ||
396 | goto out_unlock_page; | ||
397 | ret = 0; | ||
398 | if (!PageUptodate(page) || page->mapping != ip->i_inode.i_mapping) | ||
399 | goto out_unlock_page; | ||
400 | if (gfs2_is_stuffed(ip)) { | ||
401 | ret = gfs2_unstuff_dinode(ip, page); | ||
402 | if (ret) | ||
403 | goto out_unlock_page; | ||
404 | } | ||
405 | ret = gfs2_allocate_page_backing(page); | ||
406 | |||
407 | out_unlock_page: | ||
408 | unlock_page(page); | ||
409 | gfs2_trans_end(sdp); | ||
410 | out_trans_fail: | ||
411 | gfs2_inplace_release(ip); | ||
412 | out_quota_unlock: | ||
413 | gfs2_quota_unlock(ip); | ||
414 | out_alloc_put: | ||
415 | gfs2_alloc_put(ip); | ||
416 | out_unlock: | ||
417 | gfs2_glock_dq(&gh); | ||
418 | out: | ||
419 | gfs2_holder_uninit(&gh); | ||
420 | return ret; | ||
421 | } | ||
422 | |||
423 | static struct vm_operations_struct gfs2_vm_ops = { | ||
424 | .fault = filemap_fault, | ||
425 | .page_mkwrite = gfs2_page_mkwrite, | ||
426 | }; | ||
427 | |||
341 | 428 | ||
342 | /** | 429 | /** |
343 | * gfs2_mmap - | 430 | * gfs2_mmap - |
@@ -360,14 +447,7 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
360 | return error; | 447 | return error; |
361 | } | 448 | } |
362 | 449 | ||
363 | /* This is VM_MAYWRITE instead of VM_WRITE because a call | 450 | vma->vm_ops = &gfs2_vm_ops; |
364 | to mprotect() can turn on VM_WRITE later. */ | ||
365 | |||
366 | if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) == | ||
367 | (VM_MAYSHARE | VM_MAYWRITE)) | ||
368 | vma->vm_ops = &gfs2_vm_ops_sharewrite; | ||
369 | else | ||
370 | vma->vm_ops = &gfs2_vm_ops_private; | ||
371 | 451 | ||
372 | gfs2_glock_dq_uninit(&i_gh); | 452 | gfs2_glock_dq_uninit(&i_gh); |
373 | 453 | ||
@@ -538,15 +618,6 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) | |||
538 | if (__mandatory_lock(&ip->i_inode)) | 618 | if (__mandatory_lock(&ip->i_inode)) |
539 | return -ENOLCK; | 619 | return -ENOLCK; |
540 | 620 | ||
541 | if (sdp->sd_args.ar_localflocks) { | ||
542 | if (IS_GETLK(cmd)) { | ||
543 | posix_test_lock(file, fl); | ||
544 | return 0; | ||
545 | } else { | ||
546 | return posix_lock_file_wait(file, fl); | ||
547 | } | ||
548 | } | ||
549 | |||
550 | if (cmd == F_CANCELLK) { | 621 | if (cmd == F_CANCELLK) { |
551 | /* Hack: */ | 622 | /* Hack: */ |
552 | cmd = F_SETLK; | 623 | cmd = F_SETLK; |
@@ -632,16 +703,12 @@ static void do_unflock(struct file *file, struct file_lock *fl) | |||
632 | static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) | 703 | static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) |
633 | { | 704 | { |
634 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); | 705 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); |
635 | struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); | ||
636 | 706 | ||
637 | if (!(fl->fl_flags & FL_FLOCK)) | 707 | if (!(fl->fl_flags & FL_FLOCK)) |
638 | return -ENOLCK; | 708 | return -ENOLCK; |
639 | if (__mandatory_lock(&ip->i_inode)) | 709 | if (__mandatory_lock(&ip->i_inode)) |
640 | return -ENOLCK; | 710 | return -ENOLCK; |
641 | 711 | ||
642 | if (sdp->sd_args.ar_localflocks) | ||
643 | return flock_lock_file_wait(file, fl); | ||
644 | |||
645 | if (fl->fl_type == F_UNLCK) { | 712 | if (fl->fl_type == F_UNLCK) { |
646 | do_unflock(file, fl); | 713 | do_unflock(file, fl); |
647 | return 0; | 714 | return 0; |
@@ -678,3 +745,27 @@ const struct file_operations gfs2_dir_fops = { | |||
678 | .flock = gfs2_flock, | 745 | .flock = gfs2_flock, |
679 | }; | 746 | }; |
680 | 747 | ||
748 | const struct file_operations gfs2_file_fops_nolock = { | ||
749 | .llseek = gfs2_llseek, | ||
750 | .read = do_sync_read, | ||
751 | .aio_read = generic_file_aio_read, | ||
752 | .write = do_sync_write, | ||
753 | .aio_write = generic_file_aio_write, | ||
754 | .unlocked_ioctl = gfs2_ioctl, | ||
755 | .mmap = gfs2_mmap, | ||
756 | .open = gfs2_open, | ||
757 | .release = gfs2_close, | ||
758 | .fsync = gfs2_fsync, | ||
759 | .splice_read = generic_file_splice_read, | ||
760 | .splice_write = generic_file_splice_write, | ||
761 | .setlease = gfs2_setlease, | ||
762 | }; | ||
763 | |||
764 | const struct file_operations gfs2_dir_fops_nolock = { | ||
765 | .readdir = gfs2_readdir, | ||
766 | .unlocked_ioctl = gfs2_ioctl, | ||
767 | .open = gfs2_open, | ||
768 | .release = gfs2_close, | ||
769 | .fsync = gfs2_fsync, | ||
770 | }; | ||
771 | |||
diff --git a/fs/gfs2/ops_file.h b/fs/gfs2/ops_file.h deleted file mode 100644 index 7e5d8ec9c846..000000000000 --- a/fs/gfs2/ops_file.h +++ /dev/null | |||
@@ -1,24 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
4 | * | ||
5 | * This copyrighted material is made available to anyone wishing to use, | ||
6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
7 | * of the GNU General Public License version 2. | ||
8 | */ | ||
9 | |||
10 | #ifndef __OPS_FILE_DOT_H__ | ||
11 | #define __OPS_FILE_DOT_H__ | ||
12 | |||
13 | #include <linux/fs.h> | ||
14 | struct gfs2_inode; | ||
15 | |||
16 | extern struct file gfs2_internal_file_sentinel; | ||
17 | extern int gfs2_internal_read(struct gfs2_inode *ip, | ||
18 | struct file_ra_state *ra_state, | ||
19 | char *buf, loff_t *pos, unsigned size); | ||
20 | extern void gfs2_set_inode_flags(struct inode *inode); | ||
21 | extern const struct file_operations gfs2_file_fops; | ||
22 | extern const struct file_operations gfs2_dir_fops; | ||
23 | |||
24 | #endif /* __OPS_FILE_DOT_H__ */ | ||
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 17de58e83d92..43d511bba52d 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -21,6 +21,7 @@ | |||
21 | 21 | ||
22 | #include "gfs2.h" | 22 | #include "gfs2.h" |
23 | #include "incore.h" | 23 | #include "incore.h" |
24 | #include "bmap.h" | ||
24 | #include "daemon.h" | 25 | #include "daemon.h" |
25 | #include "glock.h" | 26 | #include "glock.h" |
26 | #include "glops.h" | 27 | #include "glops.h" |
@@ -59,7 +60,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
59 | 60 | ||
60 | mutex_init(&sdp->sd_inum_mutex); | 61 | mutex_init(&sdp->sd_inum_mutex); |
61 | spin_lock_init(&sdp->sd_statfs_spin); | 62 | spin_lock_init(&sdp->sd_statfs_spin); |
62 | mutex_init(&sdp->sd_statfs_mutex); | ||
63 | 63 | ||
64 | spin_lock_init(&sdp->sd_rindex_spin); | 64 | spin_lock_init(&sdp->sd_rindex_spin); |
65 | mutex_init(&sdp->sd_rindex_mutex); | 65 | mutex_init(&sdp->sd_rindex_mutex); |
@@ -77,7 +77,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
77 | 77 | ||
78 | spin_lock_init(&sdp->sd_log_lock); | 78 | spin_lock_init(&sdp->sd_log_lock); |
79 | 79 | ||
80 | INIT_LIST_HEAD(&sdp->sd_log_le_gl); | ||
81 | INIT_LIST_HEAD(&sdp->sd_log_le_buf); | 80 | INIT_LIST_HEAD(&sdp->sd_log_le_buf); |
82 | INIT_LIST_HEAD(&sdp->sd_log_le_revoke); | 81 | INIT_LIST_HEAD(&sdp->sd_log_le_revoke); |
83 | INIT_LIST_HEAD(&sdp->sd_log_le_rg); | 82 | INIT_LIST_HEAD(&sdp->sd_log_le_rg); |
@@ -303,6 +302,67 @@ out: | |||
303 | return error; | 302 | return error; |
304 | } | 303 | } |
305 | 304 | ||
305 | /** | ||
306 | * map_journal_extents - create a reusable "extent" mapping from all logical | ||
307 | * blocks to all physical blocks for the given journal. This will save | ||
308 | * us time when writing journal blocks. Most journals will have only one | ||
309 | * extent that maps all their logical blocks. That's because gfs2.mkfs | ||
310 | * arranges the journal blocks sequentially to maximize performance. | ||
311 | * So the extent would map the first block for the entire file length. | ||
312 | * However, gfs2_jadd can happen while file activity is happening, so | ||
313 | * those journals may not be sequential. Less likely is the case where | ||
314 | * the users created their own journals by mounting the metafs and | ||
315 | * laying it out. But it's still possible. These journals might have | ||
316 | * several extents. | ||
317 | * | ||
318 | * TODO: This should be done in bigger chunks rather than one block at a time, | ||
319 | * but since it's only done at mount time, I'm not worried about the | ||
320 | * time it takes. | ||
321 | */ | ||
322 | static int map_journal_extents(struct gfs2_sbd *sdp) | ||
323 | { | ||
324 | struct gfs2_jdesc *jd = sdp->sd_jdesc; | ||
325 | unsigned int lb; | ||
326 | u64 db, prev_db; /* logical block, disk block, prev disk block */ | ||
327 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | ||
328 | struct gfs2_journal_extent *jext = NULL; | ||
329 | struct buffer_head bh; | ||
330 | int rc = 0; | ||
331 | |||
332 | prev_db = 0; | ||
333 | |||
334 | for (lb = 0; lb < ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift; lb++) { | ||
335 | bh.b_state = 0; | ||
336 | bh.b_blocknr = 0; | ||
337 | bh.b_size = 1 << ip->i_inode.i_blkbits; | ||
338 | rc = gfs2_block_map(jd->jd_inode, lb, &bh, 0); | ||
339 | db = bh.b_blocknr; | ||
340 | if (rc || !db) { | ||
341 | printk(KERN_INFO "GFS2 journal mapping error %d: lb=" | ||
342 | "%u db=%llu\n", rc, lb, (unsigned long long)db); | ||
343 | break; | ||
344 | } | ||
345 | if (!prev_db || db != prev_db + 1) { | ||
346 | jext = kzalloc(sizeof(struct gfs2_journal_extent), | ||
347 | GFP_KERNEL); | ||
348 | if (!jext) { | ||
349 | printk(KERN_INFO "GFS2 error: out of memory " | ||
350 | "mapping journal extents.\n"); | ||
351 | rc = -ENOMEM; | ||
352 | break; | ||
353 | } | ||
354 | jext->dblock = db; | ||
355 | jext->lblock = lb; | ||
356 | jext->blocks = 1; | ||
357 | list_add_tail(&jext->extent_list, &jd->extent_list); | ||
358 | } else { | ||
359 | jext->blocks++; | ||
360 | } | ||
361 | prev_db = db; | ||
362 | } | ||
363 | return rc; | ||
364 | } | ||
365 | |||
306 | static int init_journal(struct gfs2_sbd *sdp, int undo) | 366 | static int init_journal(struct gfs2_sbd *sdp, int undo) |
307 | { | 367 | { |
308 | struct gfs2_holder ji_gh; | 368 | struct gfs2_holder ji_gh; |
@@ -340,7 +400,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) | |||
340 | 400 | ||
341 | if (sdp->sd_args.ar_spectator) { | 401 | if (sdp->sd_args.ar_spectator) { |
342 | sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0); | 402 | sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0); |
343 | sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks; | 403 | atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks); |
344 | } else { | 404 | } else { |
345 | if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) { | 405 | if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) { |
346 | fs_err(sdp, "can't mount journal #%u\n", | 406 | fs_err(sdp, "can't mount journal #%u\n", |
@@ -377,7 +437,10 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) | |||
377 | sdp->sd_jdesc->jd_jid, error); | 437 | sdp->sd_jdesc->jd_jid, error); |
378 | goto fail_jinode_gh; | 438 | goto fail_jinode_gh; |
379 | } | 439 | } |
380 | sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks; | 440 | atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks); |
441 | |||
442 | /* Map the extents for this journal's blocks */ | ||
443 | map_journal_extents(sdp); | ||
381 | } | 444 | } |
382 | 445 | ||
383 | if (sdp->sd_lockstruct.ls_first) { | 446 | if (sdp->sd_lockstruct.ls_first) { |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 291f0c7eaa3b..9f71372c1757 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -61,7 +61,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry, | |||
61 | inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0); | 61 | inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0); |
62 | if (!IS_ERR(inode)) { | 62 | if (!IS_ERR(inode)) { |
63 | gfs2_trans_end(sdp); | 63 | gfs2_trans_end(sdp); |
64 | if (dip->i_alloc.al_rgd) | 64 | if (dip->i_alloc->al_rgd) |
65 | gfs2_inplace_release(dip); | 65 | gfs2_inplace_release(dip); |
66 | gfs2_quota_unlock(dip); | 66 | gfs2_quota_unlock(dip); |
67 | gfs2_alloc_put(dip); | 67 | gfs2_alloc_put(dip); |
@@ -113,8 +113,18 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, | |||
113 | if (inode && IS_ERR(inode)) | 113 | if (inode && IS_ERR(inode)) |
114 | return ERR_PTR(PTR_ERR(inode)); | 114 | return ERR_PTR(PTR_ERR(inode)); |
115 | 115 | ||
116 | if (inode) | 116 | if (inode) { |
117 | struct gfs2_glock *gl = GFS2_I(inode)->i_gl; | ||
118 | struct gfs2_holder gh; | ||
119 | int error; | ||
120 | error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); | ||
121 | if (error) { | ||
122 | iput(inode); | ||
123 | return ERR_PTR(error); | ||
124 | } | ||
125 | gfs2_glock_dq_uninit(&gh); | ||
117 | return d_splice_alias(inode, dentry); | 126 | return d_splice_alias(inode, dentry); |
127 | } | ||
118 | d_add(dentry, inode); | 128 | d_add(dentry, inode); |
119 | 129 | ||
120 | return NULL; | 130 | return NULL; |
@@ -366,7 +376,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, | |||
366 | } | 376 | } |
367 | 377 | ||
368 | gfs2_trans_end(sdp); | 378 | gfs2_trans_end(sdp); |
369 | if (dip->i_alloc.al_rgd) | 379 | if (dip->i_alloc->al_rgd) |
370 | gfs2_inplace_release(dip); | 380 | gfs2_inplace_release(dip); |
371 | gfs2_quota_unlock(dip); | 381 | gfs2_quota_unlock(dip); |
372 | gfs2_alloc_put(dip); | 382 | gfs2_alloc_put(dip); |
@@ -442,7 +452,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
442 | gfs2_assert_withdraw(sdp, !error); /* dip already pinned */ | 452 | gfs2_assert_withdraw(sdp, !error); /* dip already pinned */ |
443 | 453 | ||
444 | gfs2_trans_end(sdp); | 454 | gfs2_trans_end(sdp); |
445 | if (dip->i_alloc.al_rgd) | 455 | if (dip->i_alloc->al_rgd) |
446 | gfs2_inplace_release(dip); | 456 | gfs2_inplace_release(dip); |
447 | gfs2_quota_unlock(dip); | 457 | gfs2_quota_unlock(dip); |
448 | gfs2_alloc_put(dip); | 458 | gfs2_alloc_put(dip); |
@@ -548,7 +558,7 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, | |||
548 | } | 558 | } |
549 | 559 | ||
550 | gfs2_trans_end(sdp); | 560 | gfs2_trans_end(sdp); |
551 | if (dip->i_alloc.al_rgd) | 561 | if (dip->i_alloc->al_rgd) |
552 | gfs2_inplace_release(dip); | 562 | gfs2_inplace_release(dip); |
553 | gfs2_quota_unlock(dip); | 563 | gfs2_quota_unlock(dip); |
554 | gfs2_alloc_put(dip); | 564 | gfs2_alloc_put(dip); |
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h index 34f0caac1a03..fd8cee231e1d 100644 --- a/fs/gfs2/ops_inode.h +++ b/fs/gfs2/ops_inode.h | |||
@@ -16,5 +16,11 @@ extern const struct inode_operations gfs2_file_iops; | |||
16 | extern const struct inode_operations gfs2_dir_iops; | 16 | extern const struct inode_operations gfs2_dir_iops; |
17 | extern const struct inode_operations gfs2_symlink_iops; | 17 | extern const struct inode_operations gfs2_symlink_iops; |
18 | extern const struct inode_operations gfs2_dev_iops; | 18 | extern const struct inode_operations gfs2_dev_iops; |
19 | extern const struct file_operations gfs2_file_fops; | ||
20 | extern const struct file_operations gfs2_dir_fops; | ||
21 | extern const struct file_operations gfs2_file_fops_nolock; | ||
22 | extern const struct file_operations gfs2_dir_fops_nolock; | ||
23 | |||
24 | extern void gfs2_set_inode_flags(struct inode *inode); | ||
19 | 25 | ||
20 | #endif /* __OPS_INODE_DOT_H__ */ | 26 | #endif /* __OPS_INODE_DOT_H__ */ |
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 950f31460e8b..5e524217944a 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c | |||
@@ -487,7 +487,6 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) | |||
487 | if (ip) { | 487 | if (ip) { |
488 | ip->i_flags = 0; | 488 | ip->i_flags = 0; |
489 | ip->i_gl = NULL; | 489 | ip->i_gl = NULL; |
490 | ip->i_last_pfault = jiffies; | ||
491 | } | 490 | } |
492 | return &ip->i_inode; | 491 | return &ip->i_inode; |
493 | } | 492 | } |
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c deleted file mode 100644 index 927d739d4685..000000000000 --- a/fs/gfs2/ops_vm.c +++ /dev/null | |||
@@ -1,169 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
4 | * | ||
5 | * This copyrighted material is made available to anyone wishing to use, | ||
6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
7 | * of the GNU General Public License version 2. | ||
8 | */ | ||
9 | |||
10 | #include <linux/slab.h> | ||
11 | #include <linux/spinlock.h> | ||
12 | #include <linux/completion.h> | ||
13 | #include <linux/buffer_head.h> | ||
14 | #include <linux/mm.h> | ||
15 | #include <linux/pagemap.h> | ||
16 | #include <linux/gfs2_ondisk.h> | ||
17 | #include <linux/lm_interface.h> | ||
18 | |||
19 | #include "gfs2.h" | ||
20 | #include "incore.h" | ||
21 | #include "bmap.h" | ||
22 | #include "glock.h" | ||
23 | #include "inode.h" | ||
24 | #include "ops_vm.h" | ||
25 | #include "quota.h" | ||
26 | #include "rgrp.h" | ||
27 | #include "trans.h" | ||
28 | #include "util.h" | ||
29 | |||
30 | static int gfs2_private_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
31 | { | ||
32 | struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host); | ||
33 | |||
34 | set_bit(GIF_PAGED, &ip->i_flags); | ||
35 | return filemap_fault(vma, vmf); | ||
36 | } | ||
37 | |||
38 | static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) | ||
39 | { | ||
40 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
41 | unsigned long index = page->index; | ||
42 | u64 lblock = index << (PAGE_CACHE_SHIFT - | ||
43 | sdp->sd_sb.sb_bsize_shift); | ||
44 | unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift; | ||
45 | struct gfs2_alloc *al; | ||
46 | unsigned int data_blocks, ind_blocks; | ||
47 | unsigned int x; | ||
48 | int error; | ||
49 | |||
50 | al = gfs2_alloc_get(ip); | ||
51 | |||
52 | error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | ||
53 | if (error) | ||
54 | goto out; | ||
55 | |||
56 | error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); | ||
57 | if (error) | ||
58 | goto out_gunlock_q; | ||
59 | |||
60 | gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); | ||
61 | |||
62 | al->al_requested = data_blocks + ind_blocks; | ||
63 | |||
64 | error = gfs2_inplace_reserve(ip); | ||
65 | if (error) | ||
66 | goto out_gunlock_q; | ||
67 | |||
68 | error = gfs2_trans_begin(sdp, al->al_rgd->rd_length + | ||
69 | ind_blocks + RES_DINODE + | ||
70 | RES_STATFS + RES_QUOTA, 0); | ||
71 | if (error) | ||
72 | goto out_ipres; | ||
73 | |||
74 | if (gfs2_is_stuffed(ip)) { | ||
75 | error = gfs2_unstuff_dinode(ip, NULL); | ||
76 | if (error) | ||
77 | goto out_trans; | ||
78 | } | ||
79 | |||
80 | for (x = 0; x < blocks; ) { | ||
81 | u64 dblock; | ||
82 | unsigned int extlen; | ||
83 | int new = 1; | ||
84 | |||
85 | error = gfs2_extent_map(&ip->i_inode, lblock, &new, &dblock, &extlen); | ||
86 | if (error) | ||
87 | goto out_trans; | ||
88 | |||
89 | lblock += extlen; | ||
90 | x += extlen; | ||
91 | } | ||
92 | |||
93 | gfs2_assert_warn(sdp, al->al_alloced); | ||
94 | |||
95 | out_trans: | ||
96 | gfs2_trans_end(sdp); | ||
97 | out_ipres: | ||
98 | gfs2_inplace_release(ip); | ||
99 | out_gunlock_q: | ||
100 | gfs2_quota_unlock(ip); | ||
101 | out: | ||
102 | gfs2_alloc_put(ip); | ||
103 | return error; | ||
104 | } | ||
105 | |||
106 | static int gfs2_sharewrite_fault(struct vm_area_struct *vma, | ||
107 | struct vm_fault *vmf) | ||
108 | { | ||
109 | struct file *file = vma->vm_file; | ||
110 | struct gfs2_file *gf = file->private_data; | ||
111 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); | ||
112 | struct gfs2_holder i_gh; | ||
113 | int alloc_required; | ||
114 | int error; | ||
115 | int ret = 0; | ||
116 | |||
117 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); | ||
118 | if (error) | ||
119 | goto out; | ||
120 | |||
121 | set_bit(GIF_PAGED, &ip->i_flags); | ||
122 | set_bit(GIF_SW_PAGED, &ip->i_flags); | ||
123 | |||
124 | error = gfs2_write_alloc_required(ip, | ||
125 | (u64)vmf->pgoff << PAGE_CACHE_SHIFT, | ||
126 | PAGE_CACHE_SIZE, &alloc_required); | ||
127 | if (error) { | ||
128 | ret = VM_FAULT_OOM; /* XXX: are these right? */ | ||
129 | goto out_unlock; | ||
130 | } | ||
131 | |||
132 | set_bit(GFF_EXLOCK, &gf->f_flags); | ||
133 | ret = filemap_fault(vma, vmf); | ||
134 | clear_bit(GFF_EXLOCK, &gf->f_flags); | ||
135 | if (ret & VM_FAULT_ERROR) | ||
136 | goto out_unlock; | ||
137 | |||
138 | if (alloc_required) { | ||
139 | /* XXX: do we need to drop page lock around alloc_page_backing?*/ | ||
140 | error = alloc_page_backing(ip, vmf->page); | ||
141 | if (error) { | ||
142 | /* | ||
143 | * VM_FAULT_LOCKED should always be the case for | ||
144 | * filemap_fault, but it may not be in a future | ||
145 | * implementation. | ||
146 | */ | ||
147 | if (ret & VM_FAULT_LOCKED) | ||
148 | unlock_page(vmf->page); | ||
149 | page_cache_release(vmf->page); | ||
150 | ret = VM_FAULT_OOM; | ||
151 | goto out_unlock; | ||
152 | } | ||
153 | set_page_dirty(vmf->page); | ||
154 | } | ||
155 | |||
156 | out_unlock: | ||
157 | gfs2_glock_dq_uninit(&i_gh); | ||
158 | out: | ||
159 | return ret; | ||
160 | } | ||
161 | |||
162 | struct vm_operations_struct gfs2_vm_ops_private = { | ||
163 | .fault = gfs2_private_fault, | ||
164 | }; | ||
165 | |||
166 | struct vm_operations_struct gfs2_vm_ops_sharewrite = { | ||
167 | .fault = gfs2_sharewrite_fault, | ||
168 | }; | ||
169 | |||
diff --git a/fs/gfs2/ops_vm.h b/fs/gfs2/ops_vm.h deleted file mode 100644 index 4ae8f43ed5e3..000000000000 --- a/fs/gfs2/ops_vm.h +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
4 | * | ||
5 | * This copyrighted material is made available to anyone wishing to use, | ||
6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
7 | * of the GNU General Public License version 2. | ||
8 | */ | ||
9 | |||
10 | #ifndef __OPS_VM_DOT_H__ | ||
11 | #define __OPS_VM_DOT_H__ | ||
12 | |||
13 | #include <linux/mm.h> | ||
14 | |||
15 | extern struct vm_operations_struct gfs2_vm_ops_private; | ||
16 | extern struct vm_operations_struct gfs2_vm_ops_sharewrite; | ||
17 | |||
18 | #endif /* __OPS_VM_DOT_H__ */ | ||
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index addb51e0f135..a08dabd6ce90 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -59,7 +59,6 @@ | |||
59 | #include "super.h" | 59 | #include "super.h" |
60 | #include "trans.h" | 60 | #include "trans.h" |
61 | #include "inode.h" | 61 | #include "inode.h" |
62 | #include "ops_file.h" | ||
63 | #include "ops_address.h" | 62 | #include "ops_address.h" |
64 | #include "util.h" | 63 | #include "util.h" |
65 | 64 | ||
@@ -274,10 +273,10 @@ static int bh_get(struct gfs2_quota_data *qd) | |||
274 | } | 273 | } |
275 | 274 | ||
276 | block = qd->qd_slot / sdp->sd_qc_per_block; | 275 | block = qd->qd_slot / sdp->sd_qc_per_block; |
277 | offset = qd->qd_slot % sdp->sd_qc_per_block;; | 276 | offset = qd->qd_slot % sdp->sd_qc_per_block; |
278 | 277 | ||
279 | bh_map.b_size = 1 << ip->i_inode.i_blkbits; | 278 | bh_map.b_size = 1 << ip->i_inode.i_blkbits; |
280 | error = gfs2_block_map(&ip->i_inode, block, 0, &bh_map); | 279 | error = gfs2_block_map(&ip->i_inode, block, &bh_map, 0); |
281 | if (error) | 280 | if (error) |
282 | goto fail; | 281 | goto fail; |
283 | error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, &bh); | 282 | error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, &bh); |
@@ -454,7 +453,7 @@ static void qdsb_put(struct gfs2_quota_data *qd) | |||
454 | int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) | 453 | int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) |
455 | { | 454 | { |
456 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 455 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
457 | struct gfs2_alloc *al = &ip->i_alloc; | 456 | struct gfs2_alloc *al = ip->i_alloc; |
458 | struct gfs2_quota_data **qd = al->al_qd; | 457 | struct gfs2_quota_data **qd = al->al_qd; |
459 | int error; | 458 | int error; |
460 | 459 | ||
@@ -502,7 +501,7 @@ out: | |||
502 | void gfs2_quota_unhold(struct gfs2_inode *ip) | 501 | void gfs2_quota_unhold(struct gfs2_inode *ip) |
503 | { | 502 | { |
504 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 503 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
505 | struct gfs2_alloc *al = &ip->i_alloc; | 504 | struct gfs2_alloc *al = ip->i_alloc; |
506 | unsigned int x; | 505 | unsigned int x; |
507 | 506 | ||
508 | gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)); | 507 | gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)); |
@@ -646,7 +645,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, | |||
646 | } | 645 | } |
647 | 646 | ||
648 | if (!buffer_mapped(bh)) { | 647 | if (!buffer_mapped(bh)) { |
649 | gfs2_get_block(inode, iblock, bh, 1); | 648 | gfs2_block_map(inode, iblock, bh, 1); |
650 | if (!buffer_mapped(bh)) | 649 | if (!buffer_mapped(bh)) |
651 | goto unlock; | 650 | goto unlock; |
652 | } | 651 | } |
@@ -793,11 +792,9 @@ static int do_glock(struct gfs2_quota_data *qd, int force_refresh, | |||
793 | struct gfs2_holder i_gh; | 792 | struct gfs2_holder i_gh; |
794 | struct gfs2_quota_host q; | 793 | struct gfs2_quota_host q; |
795 | char buf[sizeof(struct gfs2_quota)]; | 794 | char buf[sizeof(struct gfs2_quota)]; |
796 | struct file_ra_state ra_state; | ||
797 | int error; | 795 | int error; |
798 | struct gfs2_quota_lvb *qlvb; | 796 | struct gfs2_quota_lvb *qlvb; |
799 | 797 | ||
800 | file_ra_state_init(&ra_state, sdp->sd_quota_inode->i_mapping); | ||
801 | restart: | 798 | restart: |
802 | error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh); | 799 | error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh); |
803 | if (error) | 800 | if (error) |
@@ -820,8 +817,8 @@ restart: | |||
820 | 817 | ||
821 | memset(buf, 0, sizeof(struct gfs2_quota)); | 818 | memset(buf, 0, sizeof(struct gfs2_quota)); |
822 | pos = qd2offset(qd); | 819 | pos = qd2offset(qd); |
823 | error = gfs2_internal_read(ip, &ra_state, buf, | 820 | error = gfs2_internal_read(ip, NULL, buf, &pos, |
824 | &pos, sizeof(struct gfs2_quota)); | 821 | sizeof(struct gfs2_quota)); |
825 | if (error < 0) | 822 | if (error < 0) |
826 | goto fail_gunlock; | 823 | goto fail_gunlock; |
827 | 824 | ||
@@ -856,7 +853,7 @@ fail: | |||
856 | int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) | 853 | int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) |
857 | { | 854 | { |
858 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 855 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
859 | struct gfs2_alloc *al = &ip->i_alloc; | 856 | struct gfs2_alloc *al = ip->i_alloc; |
860 | unsigned int x; | 857 | unsigned int x; |
861 | int error = 0; | 858 | int error = 0; |
862 | 859 | ||
@@ -924,7 +921,7 @@ static int need_sync(struct gfs2_quota_data *qd) | |||
924 | 921 | ||
925 | void gfs2_quota_unlock(struct gfs2_inode *ip) | 922 | void gfs2_quota_unlock(struct gfs2_inode *ip) |
926 | { | 923 | { |
927 | struct gfs2_alloc *al = &ip->i_alloc; | 924 | struct gfs2_alloc *al = ip->i_alloc; |
928 | struct gfs2_quota_data *qda[4]; | 925 | struct gfs2_quota_data *qda[4]; |
929 | unsigned int count = 0; | 926 | unsigned int count = 0; |
930 | unsigned int x; | 927 | unsigned int x; |
@@ -972,7 +969,7 @@ static int print_message(struct gfs2_quota_data *qd, char *type) | |||
972 | int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) | 969 | int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) |
973 | { | 970 | { |
974 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 971 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
975 | struct gfs2_alloc *al = &ip->i_alloc; | 972 | struct gfs2_alloc *al = ip->i_alloc; |
976 | struct gfs2_quota_data *qd; | 973 | struct gfs2_quota_data *qd; |
977 | s64 value; | 974 | s64 value; |
978 | unsigned int x; | 975 | unsigned int x; |
@@ -1016,10 +1013,9 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) | |||
1016 | void gfs2_quota_change(struct gfs2_inode *ip, s64 change, | 1013 | void gfs2_quota_change(struct gfs2_inode *ip, s64 change, |
1017 | u32 uid, u32 gid) | 1014 | u32 uid, u32 gid) |
1018 | { | 1015 | { |
1019 | struct gfs2_alloc *al = &ip->i_alloc; | 1016 | struct gfs2_alloc *al = ip->i_alloc; |
1020 | struct gfs2_quota_data *qd; | 1017 | struct gfs2_quota_data *qd; |
1021 | unsigned int x; | 1018 | unsigned int x; |
1022 | unsigned int found = 0; | ||
1023 | 1019 | ||
1024 | if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), change)) | 1020 | if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), change)) |
1025 | return; | 1021 | return; |
@@ -1032,7 +1028,6 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, | |||
1032 | if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || | 1028 | if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || |
1033 | (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) { | 1029 | (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) { |
1034 | do_qc(qd, change); | 1030 | do_qc(qd, change); |
1035 | found++; | ||
1036 | } | 1031 | } |
1037 | } | 1032 | } |
1038 | } | 1033 | } |
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index beb6c7ac0086..b249e294a95b 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c | |||
@@ -391,7 +391,7 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea | |||
391 | lblock = head->lh_blkno; | 391 | lblock = head->lh_blkno; |
392 | gfs2_replay_incr_blk(sdp, &lblock); | 392 | gfs2_replay_incr_blk(sdp, &lblock); |
393 | bh_map.b_size = 1 << ip->i_inode.i_blkbits; | 393 | bh_map.b_size = 1 << ip->i_inode.i_blkbits; |
394 | error = gfs2_block_map(&ip->i_inode, lblock, 0, &bh_map); | 394 | error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0); |
395 | if (error) | 395 | if (error) |
396 | return error; | 396 | return error; |
397 | if (!bh_map.b_blocknr) { | 397 | if (!bh_map.b_blocknr) { |
@@ -504,13 +504,21 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd) | |||
504 | if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) | 504 | if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) |
505 | ro = 1; | 505 | ro = 1; |
506 | } else { | 506 | } else { |
507 | if (sdp->sd_vfs->s_flags & MS_RDONLY) | 507 | if (sdp->sd_vfs->s_flags & MS_RDONLY) { |
508 | ro = 1; | 508 | /* check if device itself is read-only */ |
509 | ro = bdev_read_only(sdp->sd_vfs->s_bdev); | ||
510 | if (!ro) { | ||
511 | fs_info(sdp, "recovery required on " | ||
512 | "read-only filesystem.\n"); | ||
513 | fs_info(sdp, "write access will be " | ||
514 | "enabled during recovery.\n"); | ||
515 | } | ||
516 | } | ||
509 | } | 517 | } |
510 | 518 | ||
511 | if (ro) { | 519 | if (ro) { |
512 | fs_warn(sdp, "jid=%u: Can't replay: read-only FS\n", | 520 | fs_warn(sdp, "jid=%u: Can't replay: read-only block " |
513 | jd->jd_jid); | 521 | "device\n", jd->jd_jid); |
514 | error = -EROFS; | 522 | error = -EROFS; |
515 | goto fail_gunlock_tr; | 523 | goto fail_gunlock_tr; |
516 | } | 524 | } |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 708c287e1d0e..3552110b2e5f 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -25,10 +25,10 @@ | |||
25 | #include "rgrp.h" | 25 | #include "rgrp.h" |
26 | #include "super.h" | 26 | #include "super.h" |
27 | #include "trans.h" | 27 | #include "trans.h" |
28 | #include "ops_file.h" | ||
29 | #include "util.h" | 28 | #include "util.h" |
30 | #include "log.h" | 29 | #include "log.h" |
31 | #include "inode.h" | 30 | #include "inode.h" |
31 | #include "ops_address.h" | ||
32 | 32 | ||
33 | #define BFITNOENT ((u32)~0) | 33 | #define BFITNOENT ((u32)~0) |
34 | #define NO_BLOCK ((u64)~0) | 34 | #define NO_BLOCK ((u64)~0) |
@@ -126,41 +126,43 @@ static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer, | |||
126 | * Return: the block number (bitmap buffer scope) that was found | 126 | * Return: the block number (bitmap buffer scope) that was found |
127 | */ | 127 | */ |
128 | 128 | ||
129 | static u32 gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer, | 129 | static u32 gfs2_bitfit(unsigned char *buffer, unsigned int buflen, u32 goal, |
130 | unsigned int buflen, u32 goal, | 130 | unsigned char old_state) |
131 | unsigned char old_state) | ||
132 | { | 131 | { |
133 | unsigned char *byte, *end, alloc; | 132 | unsigned char *byte; |
134 | u32 blk = goal; | 133 | u32 blk = goal; |
135 | unsigned int bit; | 134 | unsigned int bit, bitlong; |
135 | unsigned long *plong, plong55; | ||
136 | 136 | ||
137 | byte = buffer + (goal / GFS2_NBBY); | 137 | byte = buffer + (goal / GFS2_NBBY); |
138 | plong = (unsigned long *)(buffer + (goal / GFS2_NBBY)); | ||
138 | bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; | 139 | bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; |
139 | end = buffer + buflen; | 140 | bitlong = bit; |
140 | alloc = (old_state == GFS2_BLKST_FREE) ? 0x55 : 0; | 141 | #if BITS_PER_LONG == 32 |
141 | 142 | plong55 = 0x55555555; | |
142 | while (byte < end) { | 143 | #else |
143 | /* If we're looking for a free block we can eliminate all | 144 | plong55 = 0x5555555555555555; |
144 | bitmap settings with 0x55, which represents four data | 145 | #endif |
145 | blocks in a row. If we're looking for a data block, we can | 146 | while (byte < buffer + buflen) { |
146 | eliminate 0x00 which corresponds to four free blocks. */ | 147 | |
147 | if ((*byte & 0x55) == alloc) { | 148 | if (bitlong == 0 && old_state == 0 && *plong == plong55) { |
148 | blk += (8 - bit) >> 1; | 149 | plong++; |
149 | 150 | byte += sizeof(unsigned long); | |
150 | bit = 0; | 151 | blk += sizeof(unsigned long) * GFS2_NBBY; |
151 | byte++; | ||
152 | |||
153 | continue; | 152 | continue; |
154 | } | 153 | } |
155 | |||
156 | if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) | 154 | if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) |
157 | return blk; | 155 | return blk; |
158 | |||
159 | bit += GFS2_BIT_SIZE; | 156 | bit += GFS2_BIT_SIZE; |
160 | if (bit >= 8) { | 157 | if (bit >= 8) { |
161 | bit = 0; | 158 | bit = 0; |
162 | byte++; | 159 | byte++; |
163 | } | 160 | } |
161 | bitlong += GFS2_BIT_SIZE; | ||
162 | if (bitlong >= sizeof(unsigned long) * 8) { | ||
163 | bitlong = 0; | ||
164 | plong++; | ||
165 | } | ||
164 | 166 | ||
165 | blk++; | 167 | blk++; |
166 | } | 168 | } |
@@ -817,11 +819,9 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) | |||
817 | 819 | ||
818 | struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) | 820 | struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) |
819 | { | 821 | { |
820 | struct gfs2_alloc *al = &ip->i_alloc; | 822 | BUG_ON(ip->i_alloc != NULL); |
821 | 823 | ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_KERNEL); | |
822 | /* FIXME: Should assert that the correct locks are held here... */ | 824 | return ip->i_alloc; |
823 | memset(al, 0, sizeof(*al)); | ||
824 | return al; | ||
825 | } | 825 | } |
826 | 826 | ||
827 | /** | 827 | /** |
@@ -1059,26 +1059,34 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | |||
1059 | struct inode *inode = NULL; | 1059 | struct inode *inode = NULL; |
1060 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1060 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1061 | struct gfs2_rgrpd *rgd, *begin = NULL; | 1061 | struct gfs2_rgrpd *rgd, *begin = NULL; |
1062 | struct gfs2_alloc *al = &ip->i_alloc; | 1062 | struct gfs2_alloc *al = ip->i_alloc; |
1063 | int flags = LM_FLAG_TRY; | 1063 | int flags = LM_FLAG_TRY; |
1064 | int skipped = 0; | 1064 | int skipped = 0; |
1065 | int loops = 0; | 1065 | int loops = 0; |
1066 | int error; | 1066 | int error, rg_locked; |
1067 | 1067 | ||
1068 | /* Try recently successful rgrps */ | 1068 | /* Try recently successful rgrps */ |
1069 | 1069 | ||
1070 | rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc); | 1070 | rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc); |
1071 | 1071 | ||
1072 | while (rgd) { | 1072 | while (rgd) { |
1073 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, | 1073 | rg_locked = 0; |
1074 | LM_FLAG_TRY, &al->al_rgd_gh); | 1074 | |
1075 | if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { | ||
1076 | rg_locked = 1; | ||
1077 | error = 0; | ||
1078 | } else { | ||
1079 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, | ||
1080 | LM_FLAG_TRY, &al->al_rgd_gh); | ||
1081 | } | ||
1075 | switch (error) { | 1082 | switch (error) { |
1076 | case 0: | 1083 | case 0: |
1077 | if (try_rgrp_fit(rgd, al)) | 1084 | if (try_rgrp_fit(rgd, al)) |
1078 | goto out; | 1085 | goto out; |
1079 | if (rgd->rd_flags & GFS2_RDF_CHECK) | 1086 | if (rgd->rd_flags & GFS2_RDF_CHECK) |
1080 | inode = try_rgrp_unlink(rgd, last_unlinked); | 1087 | inode = try_rgrp_unlink(rgd, last_unlinked); |
1081 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1088 | if (!rg_locked) |
1089 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | ||
1082 | if (inode) | 1090 | if (inode) |
1083 | return inode; | 1091 | return inode; |
1084 | rgd = recent_rgrp_next(rgd, 1); | 1092 | rgd = recent_rgrp_next(rgd, 1); |
@@ -1098,15 +1106,23 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | |||
1098 | begin = rgd = forward_rgrp_get(sdp); | 1106 | begin = rgd = forward_rgrp_get(sdp); |
1099 | 1107 | ||
1100 | for (;;) { | 1108 | for (;;) { |
1101 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags, | 1109 | rg_locked = 0; |
1102 | &al->al_rgd_gh); | 1110 | |
1111 | if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { | ||
1112 | rg_locked = 1; | ||
1113 | error = 0; | ||
1114 | } else { | ||
1115 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags, | ||
1116 | &al->al_rgd_gh); | ||
1117 | } | ||
1103 | switch (error) { | 1118 | switch (error) { |
1104 | case 0: | 1119 | case 0: |
1105 | if (try_rgrp_fit(rgd, al)) | 1120 | if (try_rgrp_fit(rgd, al)) |
1106 | goto out; | 1121 | goto out; |
1107 | if (rgd->rd_flags & GFS2_RDF_CHECK) | 1122 | if (rgd->rd_flags & GFS2_RDF_CHECK) |
1108 | inode = try_rgrp_unlink(rgd, last_unlinked); | 1123 | inode = try_rgrp_unlink(rgd, last_unlinked); |
1109 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1124 | if (!rg_locked) |
1125 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | ||
1110 | if (inode) | 1126 | if (inode) |
1111 | return inode; | 1127 | return inode; |
1112 | break; | 1128 | break; |
@@ -1158,7 +1174,7 @@ out: | |||
1158 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) | 1174 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) |
1159 | { | 1175 | { |
1160 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1176 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1161 | struct gfs2_alloc *al = &ip->i_alloc; | 1177 | struct gfs2_alloc *al = ip->i_alloc; |
1162 | struct inode *inode; | 1178 | struct inode *inode; |
1163 | int error = 0; | 1179 | int error = 0; |
1164 | u64 last_unlinked = NO_BLOCK; | 1180 | u64 last_unlinked = NO_BLOCK; |
@@ -1204,7 +1220,7 @@ try_again: | |||
1204 | void gfs2_inplace_release(struct gfs2_inode *ip) | 1220 | void gfs2_inplace_release(struct gfs2_inode *ip) |
1205 | { | 1221 | { |
1206 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1222 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1207 | struct gfs2_alloc *al = &ip->i_alloc; | 1223 | struct gfs2_alloc *al = ip->i_alloc; |
1208 | 1224 | ||
1209 | if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1) | 1225 | if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1) |
1210 | fs_warn(sdp, "al_alloced = %u, al_requested = %u " | 1226 | fs_warn(sdp, "al_alloced = %u, al_requested = %u " |
@@ -1213,7 +1229,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip) | |||
1213 | al->al_line); | 1229 | al->al_line); |
1214 | 1230 | ||
1215 | al->al_rgd = NULL; | 1231 | al->al_rgd = NULL; |
1216 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1232 | if (al->al_rgd_gh.gh_gl) |
1233 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | ||
1217 | if (ip != GFS2_I(sdp->sd_rindex)) | 1234 | if (ip != GFS2_I(sdp->sd_rindex)) |
1218 | gfs2_glock_dq_uninit(&al->al_ri_gh); | 1235 | gfs2_glock_dq_uninit(&al->al_ri_gh); |
1219 | } | 1236 | } |
@@ -1301,11 +1318,10 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, | |||
1301 | /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone | 1318 | /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone |
1302 | bitmaps, so we must search the originals for that. */ | 1319 | bitmaps, so we must search the originals for that. */ |
1303 | if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) | 1320 | if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) |
1304 | blk = gfs2_bitfit(rgd, bi->bi_clone + bi->bi_offset, | 1321 | blk = gfs2_bitfit(bi->bi_clone + bi->bi_offset, |
1305 | bi->bi_len, goal, old_state); | 1322 | bi->bi_len, goal, old_state); |
1306 | else | 1323 | else |
1307 | blk = gfs2_bitfit(rgd, | 1324 | blk = gfs2_bitfit(bi->bi_bh->b_data + bi->bi_offset, |
1308 | bi->bi_bh->b_data + bi->bi_offset, | ||
1309 | bi->bi_len, goal, old_state); | 1325 | bi->bi_len, goal, old_state); |
1310 | if (blk != BFITNOENT) | 1326 | if (blk != BFITNOENT) |
1311 | break; | 1327 | break; |
@@ -1394,7 +1410,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, | |||
1394 | u64 gfs2_alloc_data(struct gfs2_inode *ip) | 1410 | u64 gfs2_alloc_data(struct gfs2_inode *ip) |
1395 | { | 1411 | { |
1396 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1412 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1397 | struct gfs2_alloc *al = &ip->i_alloc; | 1413 | struct gfs2_alloc *al = ip->i_alloc; |
1398 | struct gfs2_rgrpd *rgd = al->al_rgd; | 1414 | struct gfs2_rgrpd *rgd = al->al_rgd; |
1399 | u32 goal, blk; | 1415 | u32 goal, blk; |
1400 | u64 block; | 1416 | u64 block; |
@@ -1439,7 +1455,7 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip) | |||
1439 | u64 gfs2_alloc_meta(struct gfs2_inode *ip) | 1455 | u64 gfs2_alloc_meta(struct gfs2_inode *ip) |
1440 | { | 1456 | { |
1441 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1457 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1442 | struct gfs2_alloc *al = &ip->i_alloc; | 1458 | struct gfs2_alloc *al = ip->i_alloc; |
1443 | struct gfs2_rgrpd *rgd = al->al_rgd; | 1459 | struct gfs2_rgrpd *rgd = al->al_rgd; |
1444 | u32 goal, blk; | 1460 | u32 goal, blk; |
1445 | u64 block; | 1461 | u64 block; |
@@ -1485,7 +1501,7 @@ u64 gfs2_alloc_meta(struct gfs2_inode *ip) | |||
1485 | u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) | 1501 | u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) |
1486 | { | 1502 | { |
1487 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | 1503 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); |
1488 | struct gfs2_alloc *al = &dip->i_alloc; | 1504 | struct gfs2_alloc *al = dip->i_alloc; |
1489 | struct gfs2_rgrpd *rgd = al->al_rgd; | 1505 | struct gfs2_rgrpd *rgd = al->al_rgd; |
1490 | u32 blk; | 1506 | u32 blk; |
1491 | u64 block; | 1507 | u64 block; |
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index b4c6adfc6f2e..149bb161f4b6 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
@@ -32,7 +32,9 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd); | |||
32 | struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); | 32 | struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); |
33 | static inline void gfs2_alloc_put(struct gfs2_inode *ip) | 33 | static inline void gfs2_alloc_put(struct gfs2_inode *ip) |
34 | { | 34 | { |
35 | return; /* So we can see where ip->i_alloc is used */ | 35 | BUG_ON(ip->i_alloc == NULL); |
36 | kfree(ip->i_alloc); | ||
37 | ip->i_alloc = NULL; | ||
36 | } | 38 | } |
37 | 39 | ||
38 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, | 40 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index dd3e737f528e..ef0562c3bc71 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -51,13 +51,9 @@ void gfs2_tune_init(struct gfs2_tune *gt) | |||
51 | { | 51 | { |
52 | spin_lock_init(>->gt_spin); | 52 | spin_lock_init(>->gt_spin); |
53 | 53 | ||
54 | gt->gt_ilimit = 100; | ||
55 | gt->gt_ilimit_tries = 3; | ||
56 | gt->gt_ilimit_min = 1; | ||
57 | gt->gt_demote_secs = 300; | 54 | gt->gt_demote_secs = 300; |
58 | gt->gt_incore_log_blocks = 1024; | 55 | gt->gt_incore_log_blocks = 1024; |
59 | gt->gt_log_flush_secs = 60; | 56 | gt->gt_log_flush_secs = 60; |
60 | gt->gt_jindex_refresh_secs = 60; | ||
61 | gt->gt_recoverd_secs = 60; | 57 | gt->gt_recoverd_secs = 60; |
62 | gt->gt_logd_secs = 1; | 58 | gt->gt_logd_secs = 1; |
63 | gt->gt_quotad_secs = 5; | 59 | gt->gt_quotad_secs = 5; |
@@ -71,10 +67,8 @@ void gfs2_tune_init(struct gfs2_tune *gt) | |||
71 | gt->gt_new_files_jdata = 0; | 67 | gt->gt_new_files_jdata = 0; |
72 | gt->gt_new_files_directio = 0; | 68 | gt->gt_new_files_directio = 0; |
73 | gt->gt_max_readahead = 1 << 18; | 69 | gt->gt_max_readahead = 1 << 18; |
74 | gt->gt_lockdump_size = 131072; | ||
75 | gt->gt_stall_secs = 600; | 70 | gt->gt_stall_secs = 600; |
76 | gt->gt_complain_secs = 10; | 71 | gt->gt_complain_secs = 10; |
77 | gt->gt_reclaim_limit = 5000; | ||
78 | gt->gt_statfs_quantum = 30; | 72 | gt->gt_statfs_quantum = 30; |
79 | gt->gt_statfs_slow = 0; | 73 | gt->gt_statfs_slow = 0; |
80 | } | 74 | } |
@@ -393,6 +387,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) | |||
393 | if (!jd) | 387 | if (!jd) |
394 | break; | 388 | break; |
395 | 389 | ||
390 | INIT_LIST_HEAD(&jd->extent_list); | ||
396 | jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL); | 391 | jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL); |
397 | if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { | 392 | if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { |
398 | if (!jd->jd_inode) | 393 | if (!jd->jd_inode) |
@@ -422,8 +417,9 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) | |||
422 | 417 | ||
423 | void gfs2_jindex_free(struct gfs2_sbd *sdp) | 418 | void gfs2_jindex_free(struct gfs2_sbd *sdp) |
424 | { | 419 | { |
425 | struct list_head list; | 420 | struct list_head list, *head; |
426 | struct gfs2_jdesc *jd; | 421 | struct gfs2_jdesc *jd; |
422 | struct gfs2_journal_extent *jext; | ||
427 | 423 | ||
428 | spin_lock(&sdp->sd_jindex_spin); | 424 | spin_lock(&sdp->sd_jindex_spin); |
429 | list_add(&list, &sdp->sd_jindex_list); | 425 | list_add(&list, &sdp->sd_jindex_list); |
@@ -433,6 +429,14 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp) | |||
433 | 429 | ||
434 | while (!list_empty(&list)) { | 430 | while (!list_empty(&list)) { |
435 | jd = list_entry(list.next, struct gfs2_jdesc, jd_list); | 431 | jd = list_entry(list.next, struct gfs2_jdesc, jd_list); |
432 | head = &jd->extent_list; | ||
433 | while (!list_empty(head)) { | ||
434 | jext = list_entry(head->next, | ||
435 | struct gfs2_journal_extent, | ||
436 | extent_list); | ||
437 | list_del(&jext->extent_list); | ||
438 | kfree(jext); | ||
439 | } | ||
436 | list_del(&jd->jd_list); | 440 | list_del(&jd->jd_list); |
437 | iput(jd->jd_inode); | 441 | iput(jd->jd_inode); |
438 | kfree(jd); | 442 | kfree(jd); |
@@ -543,7 +547,6 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) | |||
543 | if (error) | 547 | if (error) |
544 | return error; | 548 | return error; |
545 | 549 | ||
546 | gfs2_meta_cache_flush(ip); | ||
547 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); | 550 | j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); |
548 | 551 | ||
549 | error = gfs2_find_jhead(sdp->sd_jdesc, &head); | 552 | error = gfs2_find_jhead(sdp->sd_jdesc, &head); |
@@ -686,9 +689,7 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, | |||
686 | if (error) | 689 | if (error) |
687 | return; | 690 | return; |
688 | 691 | ||
689 | mutex_lock(&sdp->sd_statfs_mutex); | ||
690 | gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); | 692 | gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); |
691 | mutex_unlock(&sdp->sd_statfs_mutex); | ||
692 | 693 | ||
693 | spin_lock(&sdp->sd_statfs_spin); | 694 | spin_lock(&sdp->sd_statfs_spin); |
694 | l_sc->sc_total += total; | 695 | l_sc->sc_total += total; |
@@ -736,9 +737,7 @@ int gfs2_statfs_sync(struct gfs2_sbd *sdp) | |||
736 | if (error) | 737 | if (error) |
737 | goto out_bh2; | 738 | goto out_bh2; |
738 | 739 | ||
739 | mutex_lock(&sdp->sd_statfs_mutex); | ||
740 | gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); | 740 | gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); |
741 | mutex_unlock(&sdp->sd_statfs_mutex); | ||
742 | 741 | ||
743 | spin_lock(&sdp->sd_statfs_spin); | 742 | spin_lock(&sdp->sd_statfs_spin); |
744 | m_sc->sc_total += l_sc->sc_total; | 743 | m_sc->sc_total += l_sc->sc_total; |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 06e0b7768d97..eaa3b7b2f99e 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -32,7 +32,8 @@ spinlock_t gfs2_sys_margs_lock; | |||
32 | 32 | ||
33 | static ssize_t id_show(struct gfs2_sbd *sdp, char *buf) | 33 | static ssize_t id_show(struct gfs2_sbd *sdp, char *buf) |
34 | { | 34 | { |
35 | return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_vfs->s_id); | 35 | return snprintf(buf, PAGE_SIZE, "%u:%u\n", |
36 | MAJOR(sdp->sd_vfs->s_dev), MINOR(sdp->sd_vfs->s_dev)); | ||
36 | } | 37 | } |
37 | 38 | ||
38 | static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf) | 39 | static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf) |
@@ -221,9 +222,7 @@ static struct kobj_type gfs2_ktype = { | |||
221 | .sysfs_ops = &gfs2_attr_ops, | 222 | .sysfs_ops = &gfs2_attr_ops, |
222 | }; | 223 | }; |
223 | 224 | ||
224 | static struct kset gfs2_kset = { | 225 | static struct kset *gfs2_kset; |
225 | .ktype = &gfs2_ktype, | ||
226 | }; | ||
227 | 226 | ||
228 | /* | 227 | /* |
229 | * display struct lm_lockstruct fields | 228 | * display struct lm_lockstruct fields |
@@ -427,13 +426,11 @@ TUNE_ATTR_2(name, name##_store) | |||
427 | TUNE_ATTR(demote_secs, 0); | 426 | TUNE_ATTR(demote_secs, 0); |
428 | TUNE_ATTR(incore_log_blocks, 0); | 427 | TUNE_ATTR(incore_log_blocks, 0); |
429 | TUNE_ATTR(log_flush_secs, 0); | 428 | TUNE_ATTR(log_flush_secs, 0); |
430 | TUNE_ATTR(jindex_refresh_secs, 0); | ||
431 | TUNE_ATTR(quota_warn_period, 0); | 429 | TUNE_ATTR(quota_warn_period, 0); |
432 | TUNE_ATTR(quota_quantum, 0); | 430 | TUNE_ATTR(quota_quantum, 0); |
433 | TUNE_ATTR(atime_quantum, 0); | 431 | TUNE_ATTR(atime_quantum, 0); |
434 | TUNE_ATTR(max_readahead, 0); | 432 | TUNE_ATTR(max_readahead, 0); |
435 | TUNE_ATTR(complain_secs, 0); | 433 | TUNE_ATTR(complain_secs, 0); |
436 | TUNE_ATTR(reclaim_limit, 0); | ||
437 | TUNE_ATTR(statfs_slow, 0); | 434 | TUNE_ATTR(statfs_slow, 0); |
438 | TUNE_ATTR(new_files_jdata, 0); | 435 | TUNE_ATTR(new_files_jdata, 0); |
439 | TUNE_ATTR(new_files_directio, 0); | 436 | TUNE_ATTR(new_files_directio, 0); |
@@ -450,13 +447,11 @@ static struct attribute *tune_attrs[] = { | |||
450 | &tune_attr_demote_secs.attr, | 447 | &tune_attr_demote_secs.attr, |
451 | &tune_attr_incore_log_blocks.attr, | 448 | &tune_attr_incore_log_blocks.attr, |
452 | &tune_attr_log_flush_secs.attr, | 449 | &tune_attr_log_flush_secs.attr, |
453 | &tune_attr_jindex_refresh_secs.attr, | ||
454 | &tune_attr_quota_warn_period.attr, | 450 | &tune_attr_quota_warn_period.attr, |
455 | &tune_attr_quota_quantum.attr, | 451 | &tune_attr_quota_quantum.attr, |
456 | &tune_attr_atime_quantum.attr, | 452 | &tune_attr_atime_quantum.attr, |
457 | &tune_attr_max_readahead.attr, | 453 | &tune_attr_max_readahead.attr, |
458 | &tune_attr_complain_secs.attr, | 454 | &tune_attr_complain_secs.attr, |
459 | &tune_attr_reclaim_limit.attr, | ||
460 | &tune_attr_statfs_slow.attr, | 455 | &tune_attr_statfs_slow.attr, |
461 | &tune_attr_quota_simul_sync.attr, | 456 | &tune_attr_quota_simul_sync.attr, |
462 | &tune_attr_quota_cache_secs.attr, | 457 | &tune_attr_quota_cache_secs.attr, |
@@ -495,14 +490,9 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) | |||
495 | { | 490 | { |
496 | int error; | 491 | int error; |
497 | 492 | ||
498 | sdp->sd_kobj.kset = &gfs2_kset; | 493 | sdp->sd_kobj.kset = gfs2_kset; |
499 | sdp->sd_kobj.ktype = &gfs2_ktype; | 494 | error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL, |
500 | 495 | "%s", sdp->sd_table_name); | |
501 | error = kobject_set_name(&sdp->sd_kobj, "%s", sdp->sd_table_name); | ||
502 | if (error) | ||
503 | goto fail; | ||
504 | |||
505 | error = kobject_register(&sdp->sd_kobj); | ||
506 | if (error) | 496 | if (error) |
507 | goto fail; | 497 | goto fail; |
508 | 498 | ||
@@ -522,6 +512,7 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) | |||
522 | if (error) | 512 | if (error) |
523 | goto fail_args; | 513 | goto fail_args; |
524 | 514 | ||
515 | kobject_uevent(&sdp->sd_kobj, KOBJ_ADD); | ||
525 | return 0; | 516 | return 0; |
526 | 517 | ||
527 | fail_args: | 518 | fail_args: |
@@ -531,7 +522,7 @@ fail_counters: | |||
531 | fail_lockstruct: | 522 | fail_lockstruct: |
532 | sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group); | 523 | sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group); |
533 | fail_reg: | 524 | fail_reg: |
534 | kobject_unregister(&sdp->sd_kobj); | 525 | kobject_put(&sdp->sd_kobj); |
535 | fail: | 526 | fail: |
536 | fs_err(sdp, "error %d adding sysfs files", error); | 527 | fs_err(sdp, "error %d adding sysfs files", error); |
537 | return error; | 528 | return error; |
@@ -543,21 +534,22 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp) | |||
543 | sysfs_remove_group(&sdp->sd_kobj, &args_group); | 534 | sysfs_remove_group(&sdp->sd_kobj, &args_group); |
544 | sysfs_remove_group(&sdp->sd_kobj, &counters_group); | 535 | sysfs_remove_group(&sdp->sd_kobj, &counters_group); |
545 | sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group); | 536 | sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group); |
546 | kobject_unregister(&sdp->sd_kobj); | 537 | kobject_put(&sdp->sd_kobj); |
547 | } | 538 | } |
548 | 539 | ||
549 | int gfs2_sys_init(void) | 540 | int gfs2_sys_init(void) |
550 | { | 541 | { |
551 | gfs2_sys_margs = NULL; | 542 | gfs2_sys_margs = NULL; |
552 | spin_lock_init(&gfs2_sys_margs_lock); | 543 | spin_lock_init(&gfs2_sys_margs_lock); |
553 | kobject_set_name(&gfs2_kset.kobj, "gfs2"); | 544 | gfs2_kset = kset_create_and_add("gfs2", NULL, fs_kobj); |
554 | kobj_set_kset_s(&gfs2_kset, fs_subsys); | 545 | if (!gfs2_kset) |
555 | return kset_register(&gfs2_kset); | 546 | return -ENOMEM; |
547 | return 0; | ||
556 | } | 548 | } |
557 | 549 | ||
558 | void gfs2_sys_uninit(void) | 550 | void gfs2_sys_uninit(void) |
559 | { | 551 | { |
560 | kfree(gfs2_sys_margs); | 552 | kfree(gfs2_sys_margs); |
561 | kset_unregister(&gfs2_kset); | 553 | kset_unregister(gfs2_kset); |
562 | } | 554 | } |
563 | 555 | ||
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 717983e2c2ae..73e5d92a657c 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c | |||
@@ -114,11 +114,6 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) | |||
114 | gfs2_log_flush(sdp, NULL); | 114 | gfs2_log_flush(sdp, NULL); |
115 | } | 115 | } |
116 | 116 | ||
117 | void gfs2_trans_add_gl(struct gfs2_glock *gl) | ||
118 | { | ||
119 | lops_add(gl->gl_sbd, &gl->gl_le); | ||
120 | } | ||
121 | |||
122 | /** | 117 | /** |
123 | * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction | 118 | * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction |
124 | * @gl: the glock the buffer belongs to | 119 | * @gl: the glock the buffer belongs to |
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 043d5f4b9c4c..e826f0dab80a 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h | |||
@@ -30,7 +30,6 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, | |||
30 | 30 | ||
31 | void gfs2_trans_end(struct gfs2_sbd *sdp); | 31 | void gfs2_trans_end(struct gfs2_sbd *sdp); |
32 | 32 | ||
33 | void gfs2_trans_add_gl(struct gfs2_glock *gl); | ||
34 | void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); | 33 | void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); |
35 | void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); | 34 | void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); |
36 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno); | 35 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno); |
diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c index f13f1494d4fe..f8452a0eab56 100644 --- a/fs/hfs/bfind.c +++ b/fs/hfs/bfind.c | |||
@@ -52,6 +52,10 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) | |||
52 | rec = (e + b) / 2; | 52 | rec = (e + b) / 2; |
53 | len = hfs_brec_lenoff(bnode, rec, &off); | 53 | len = hfs_brec_lenoff(bnode, rec, &off); |
54 | keylen = hfs_brec_keylen(bnode, rec); | 54 | keylen = hfs_brec_keylen(bnode, rec); |
55 | if (keylen == HFS_BAD_KEYLEN) { | ||
56 | res = -EINVAL; | ||
57 | goto done; | ||
58 | } | ||
55 | hfs_bnode_read(bnode, fd->key, off, keylen); | 59 | hfs_bnode_read(bnode, fd->key, off, keylen); |
56 | cmpval = bnode->tree->keycmp(fd->key, fd->search_key); | 60 | cmpval = bnode->tree->keycmp(fd->key, fd->search_key); |
57 | if (!cmpval) { | 61 | if (!cmpval) { |
@@ -67,6 +71,10 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) | |||
67 | if (rec != e && e >= 0) { | 71 | if (rec != e && e >= 0) { |
68 | len = hfs_brec_lenoff(bnode, e, &off); | 72 | len = hfs_brec_lenoff(bnode, e, &off); |
69 | keylen = hfs_brec_keylen(bnode, e); | 73 | keylen = hfs_brec_keylen(bnode, e); |
74 | if (keylen == HFS_BAD_KEYLEN) { | ||
75 | res = -EINVAL; | ||
76 | goto done; | ||
77 | } | ||
70 | hfs_bnode_read(bnode, fd->key, off, keylen); | 78 | hfs_bnode_read(bnode, fd->key, off, keylen); |
71 | } | 79 | } |
72 | done: | 80 | done: |
@@ -198,6 +206,10 @@ int hfs_brec_goto(struct hfs_find_data *fd, int cnt) | |||
198 | 206 | ||
199 | len = hfs_brec_lenoff(bnode, fd->record, &off); | 207 | len = hfs_brec_lenoff(bnode, fd->record, &off); |
200 | keylen = hfs_brec_keylen(bnode, fd->record); | 208 | keylen = hfs_brec_keylen(bnode, fd->record); |
209 | if (keylen == HFS_BAD_KEYLEN) { | ||
210 | res = -EINVAL; | ||
211 | goto out; | ||
212 | } | ||
201 | fd->keyoffset = off; | 213 | fd->keyoffset = off; |
202 | fd->keylength = keylen; | 214 | fd->keylength = keylen; |
203 | fd->entryoffset = off + keylen; | 215 | fd->entryoffset = off + keylen; |
diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c index 5c87cf4801fc..8626ee375ea8 100644 --- a/fs/hfs/brec.c +++ b/fs/hfs/brec.c | |||
@@ -44,10 +44,21 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) | |||
44 | recoff = hfs_bnode_read_u16(node, node->tree->node_size - (rec + 1) * 2); | 44 | recoff = hfs_bnode_read_u16(node, node->tree->node_size - (rec + 1) * 2); |
45 | if (!recoff) | 45 | if (!recoff) |
46 | return 0; | 46 | return 0; |
47 | if (node->tree->attributes & HFS_TREE_BIGKEYS) | 47 | if (node->tree->attributes & HFS_TREE_BIGKEYS) { |
48 | retval = hfs_bnode_read_u16(node, recoff) + 2; | 48 | retval = hfs_bnode_read_u16(node, recoff) + 2; |
49 | else | 49 | if (retval > node->tree->max_key_len + 2) { |
50 | printk(KERN_ERR "hfs: keylen %d too large\n", | ||
51 | retval); | ||
52 | retval = HFS_BAD_KEYLEN; | ||
53 | } | ||
54 | } else { | ||
50 | retval = (hfs_bnode_read_u8(node, recoff) | 1) + 1; | 55 | retval = (hfs_bnode_read_u8(node, recoff) | 1) + 1; |
56 | if (retval > node->tree->max_key_len + 1) { | ||
57 | printk(KERN_ERR "hfs: keylen %d too large\n", | ||
58 | retval); | ||
59 | retval = HFS_BAD_KEYLEN; | ||
60 | } | ||
61 | } | ||
51 | } | 62 | } |
52 | return retval; | 63 | return retval; |
53 | } | 64 | } |
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 8a3a650abc87..110dd3515dc8 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c | |||
@@ -61,7 +61,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke | |||
61 | mapping = tree->inode->i_mapping; | 61 | mapping = tree->inode->i_mapping; |
62 | page = read_mapping_page(mapping, 0, NULL); | 62 | page = read_mapping_page(mapping, 0, NULL); |
63 | if (IS_ERR(page)) | 63 | if (IS_ERR(page)) |
64 | goto free_tree; | 64 | goto free_inode; |
65 | 65 | ||
66 | /* Load the header */ | 66 | /* Load the header */ |
67 | head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); | 67 | head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); |
@@ -81,6 +81,17 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke | |||
81 | goto fail_page; | 81 | goto fail_page; |
82 | if (!tree->node_count) | 82 | if (!tree->node_count) |
83 | goto fail_page; | 83 | goto fail_page; |
84 | if ((id == HFS_EXT_CNID) && (tree->max_key_len != HFS_MAX_EXT_KEYLEN)) { | ||
85 | printk(KERN_ERR "hfs: invalid extent max_key_len %d\n", | ||
86 | tree->max_key_len); | ||
87 | goto fail_page; | ||
88 | } | ||
89 | if ((id == HFS_CAT_CNID) && (tree->max_key_len != HFS_MAX_CAT_KEYLEN)) { | ||
90 | printk(KERN_ERR "hfs: invalid catalog max_key_len %d\n", | ||
91 | tree->max_key_len); | ||
92 | goto fail_page; | ||
93 | } | ||
94 | |||
84 | tree->node_size_shift = ffs(size) - 1; | 95 | tree->node_size_shift = ffs(size) - 1; |
85 | tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 96 | tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
86 | 97 | ||
@@ -88,11 +99,12 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke | |||
88 | page_cache_release(page); | 99 | page_cache_release(page); |
89 | return tree; | 100 | return tree; |
90 | 101 | ||
91 | fail_page: | 102 | fail_page: |
92 | tree->inode->i_mapping->a_ops = &hfs_aops; | ||
93 | page_cache_release(page); | 103 | page_cache_release(page); |
94 | free_tree: | 104 | free_inode: |
105 | tree->inode->i_mapping->a_ops = &hfs_aops; | ||
95 | iput(tree->inode); | 106 | iput(tree->inode); |
107 | free_tree: | ||
96 | kfree(tree); | 108 | kfree(tree); |
97 | return NULL; | 109 | return NULL; |
98 | } | 110 | } |
diff --git a/fs/hfs/hfs.h b/fs/hfs/hfs.h index 1445e3a56ed4..c6aae61adfe6 100644 --- a/fs/hfs/hfs.h +++ b/fs/hfs/hfs.h | |||
@@ -28,6 +28,8 @@ | |||
28 | #define HFS_MAX_NAMELEN 128 | 28 | #define HFS_MAX_NAMELEN 128 |
29 | #define HFS_MAX_VALENCE 32767U | 29 | #define HFS_MAX_VALENCE 32767U |
30 | 30 | ||
31 | #define HFS_BAD_KEYLEN 0xFF | ||
32 | |||
31 | /* Meanings of the drAtrb field of the MDB, | 33 | /* Meanings of the drAtrb field of the MDB, |
32 | * Reference: _Inside Macintosh: Files_ p. 2-61 | 34 | * Reference: _Inside Macintosh: Files_ p. 2-61 |
33 | */ | 35 | */ |
@@ -167,6 +169,9 @@ typedef union hfs_btree_key { | |||
167 | struct hfs_ext_key ext; | 169 | struct hfs_ext_key ext; |
168 | } hfs_btree_key; | 170 | } hfs_btree_key; |
169 | 171 | ||
172 | #define HFS_MAX_CAT_KEYLEN (sizeof(struct hfs_cat_key) - sizeof(u8)) | ||
173 | #define HFS_MAX_EXT_KEYLEN (sizeof(struct hfs_ext_key) - sizeof(u8)) | ||
174 | |||
170 | typedef union hfs_btree_key btree_key; | 175 | typedef union hfs_btree_key btree_key; |
171 | 176 | ||
172 | struct hfs_extent { | 177 | struct hfs_extent { |
diff --git a/fs/inode.c b/fs/inode.c index ed35383d0b6c..276ffd6b6fdd 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -1276,6 +1276,11 @@ void file_update_time(struct file *file) | |||
1276 | sync_it = 1; | 1276 | sync_it = 1; |
1277 | } | 1277 | } |
1278 | 1278 | ||
1279 | if (IS_I_VERSION(inode)) { | ||
1280 | inode_inc_iversion(inode); | ||
1281 | sync_it = 1; | ||
1282 | } | ||
1283 | |||
1279 | if (sync_it) | 1284 | if (sync_it) |
1280 | mark_inode_dirty_sync(inode); | 1285 | mark_inode_dirty_sync(inode); |
1281 | } | 1286 | } |
diff --git a/fs/ioprio.c b/fs/ioprio.c index e4e01bc7f338..c4a1c3c65aac 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c | |||
@@ -41,18 +41,28 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) | |||
41 | return err; | 41 | return err; |
42 | 42 | ||
43 | task_lock(task); | 43 | task_lock(task); |
44 | do { | ||
45 | ioc = task->io_context; | ||
46 | /* see wmb() in current_io_context() */ | ||
47 | smp_read_barrier_depends(); | ||
48 | if (ioc) | ||
49 | break; | ||
44 | 50 | ||
45 | task->ioprio = ioprio; | 51 | ioc = alloc_io_context(GFP_ATOMIC, -1); |
46 | 52 | if (!ioc) { | |
47 | ioc = task->io_context; | 53 | err = -ENOMEM; |
48 | /* see wmb() in current_io_context() */ | 54 | break; |
49 | smp_read_barrier_depends(); | 55 | } |
56 | task->io_context = ioc; | ||
57 | } while (1); | ||
50 | 58 | ||
51 | if (ioc) | 59 | if (!err) { |
60 | ioc->ioprio = ioprio; | ||
52 | ioc->ioprio_changed = 1; | 61 | ioc->ioprio_changed = 1; |
62 | } | ||
53 | 63 | ||
54 | task_unlock(task); | 64 | task_unlock(task); |
55 | return 0; | 65 | return err; |
56 | } | 66 | } |
57 | 67 | ||
58 | asmlinkage long sys_ioprio_set(int which, int who, int ioprio) | 68 | asmlinkage long sys_ioprio_set(int which, int who, int ioprio) |
@@ -75,8 +85,6 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) | |||
75 | 85 | ||
76 | break; | 86 | break; |
77 | case IOPRIO_CLASS_IDLE: | 87 | case IOPRIO_CLASS_IDLE: |
78 | if (!capable(CAP_SYS_ADMIN)) | ||
79 | return -EPERM; | ||
80 | break; | 88 | break; |
81 | case IOPRIO_CLASS_NONE: | 89 | case IOPRIO_CLASS_NONE: |
82 | if (data) | 90 | if (data) |
@@ -148,7 +156,9 @@ static int get_task_ioprio(struct task_struct *p) | |||
148 | ret = security_task_getioprio(p); | 156 | ret = security_task_getioprio(p); |
149 | if (ret) | 157 | if (ret) |
150 | goto out; | 158 | goto out; |
151 | ret = p->ioprio; | 159 | ret = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, IOPRIO_NORM); |
160 | if (p->io_context) | ||
161 | ret = p->io_context->ioprio; | ||
152 | out: | 162 | out: |
153 | return ret; | 163 | return ret; |
154 | } | 164 | } |
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 0f69c416eebc..a5432bbbfb88 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c | |||
@@ -347,7 +347,8 @@ restart: | |||
347 | break; | 347 | break; |
348 | } | 348 | } |
349 | retry = __process_buffer(journal, jh, bhs,&batch_count); | 349 | retry = __process_buffer(journal, jh, bhs,&batch_count); |
350 | if (!retry && lock_need_resched(&journal->j_list_lock)){ | 350 | if (!retry && (need_resched() || |
351 | spin_needbreak(&journal->j_list_lock))) { | ||
351 | spin_unlock(&journal->j_list_lock); | 352 | spin_unlock(&journal->j_list_lock); |
352 | retry = 1; | 353 | retry = 1; |
353 | break; | 354 | break; |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 610264b99a8e..31853eb65b4c 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -265,7 +265,7 @@ write_out_data: | |||
265 | put_bh(bh); | 265 | put_bh(bh); |
266 | } | 266 | } |
267 | 267 | ||
268 | if (lock_need_resched(&journal->j_list_lock)) { | 268 | if (need_resched() || spin_needbreak(&journal->j_list_lock)) { |
269 | spin_unlock(&journal->j_list_lock); | 269 | spin_unlock(&journal->j_list_lock); |
270 | goto write_out_data; | 270 | goto write_out_data; |
271 | } | 271 | } |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 08ff6c7028cc..038ed7436199 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -288,10 +288,12 @@ handle_t *journal_start(journal_t *journal, int nblocks) | |||
288 | jbd_free_handle(handle); | 288 | jbd_free_handle(handle); |
289 | current->journal_info = NULL; | 289 | current->journal_info = NULL; |
290 | handle = ERR_PTR(err); | 290 | handle = ERR_PTR(err); |
291 | goto out; | ||
291 | } | 292 | } |
292 | 293 | ||
293 | lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); | 294 | lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); |
294 | 295 | ||
296 | out: | ||
295 | return handle; | 297 | return handle; |
296 | } | 298 | } |
297 | 299 | ||
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 3fccde7ba008..6914598022ce 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -232,7 +232,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | |||
232 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | 232 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it |
233 | */ | 233 | */ |
234 | static int __process_buffer(journal_t *journal, struct journal_head *jh, | 234 | static int __process_buffer(journal_t *journal, struct journal_head *jh, |
235 | struct buffer_head **bhs, int *batch_count) | 235 | struct buffer_head **bhs, int *batch_count, |
236 | transaction_t *transaction) | ||
236 | { | 237 | { |
237 | struct buffer_head *bh = jh2bh(jh); | 238 | struct buffer_head *bh = jh2bh(jh); |
238 | int ret = 0; | 239 | int ret = 0; |
@@ -250,6 +251,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
250 | transaction_t *t = jh->b_transaction; | 251 | transaction_t *t = jh->b_transaction; |
251 | tid_t tid = t->t_tid; | 252 | tid_t tid = t->t_tid; |
252 | 253 | ||
254 | transaction->t_chp_stats.cs_forced_to_close++; | ||
253 | spin_unlock(&journal->j_list_lock); | 255 | spin_unlock(&journal->j_list_lock); |
254 | jbd_unlock_bh_state(bh); | 256 | jbd_unlock_bh_state(bh); |
255 | jbd2_log_start_commit(journal, tid); | 257 | jbd2_log_start_commit(journal, tid); |
@@ -279,6 +281,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
279 | bhs[*batch_count] = bh; | 281 | bhs[*batch_count] = bh; |
280 | __buffer_relink_io(jh); | 282 | __buffer_relink_io(jh); |
281 | jbd_unlock_bh_state(bh); | 283 | jbd_unlock_bh_state(bh); |
284 | transaction->t_chp_stats.cs_written++; | ||
282 | (*batch_count)++; | 285 | (*batch_count)++; |
283 | if (*batch_count == NR_BATCH) { | 286 | if (*batch_count == NR_BATCH) { |
284 | spin_unlock(&journal->j_list_lock); | 287 | spin_unlock(&journal->j_list_lock); |
@@ -322,6 +325,8 @@ int jbd2_log_do_checkpoint(journal_t *journal) | |||
322 | if (!journal->j_checkpoint_transactions) | 325 | if (!journal->j_checkpoint_transactions) |
323 | goto out; | 326 | goto out; |
324 | transaction = journal->j_checkpoint_transactions; | 327 | transaction = journal->j_checkpoint_transactions; |
328 | if (transaction->t_chp_stats.cs_chp_time == 0) | ||
329 | transaction->t_chp_stats.cs_chp_time = jiffies; | ||
325 | this_tid = transaction->t_tid; | 330 | this_tid = transaction->t_tid; |
326 | restart: | 331 | restart: |
327 | /* | 332 | /* |
@@ -346,8 +351,10 @@ restart: | |||
346 | retry = 1; | 351 | retry = 1; |
347 | break; | 352 | break; |
348 | } | 353 | } |
349 | retry = __process_buffer(journal, jh, bhs,&batch_count); | 354 | retry = __process_buffer(journal, jh, bhs, &batch_count, |
350 | if (!retry && lock_need_resched(&journal->j_list_lock)){ | 355 | transaction); |
356 | if (!retry && (need_resched() || | ||
357 | spin_needbreak(&journal->j_list_lock))) { | ||
351 | spin_unlock(&journal->j_list_lock); | 358 | spin_unlock(&journal->j_list_lock); |
352 | retry = 1; | 359 | retry = 1; |
353 | break; | 360 | break; |
@@ -602,15 +609,15 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
602 | 609 | ||
603 | /* | 610 | /* |
604 | * There is one special case to worry about: if we have just pulled the | 611 | * There is one special case to worry about: if we have just pulled the |
605 | * buffer off a committing transaction's forget list, then even if the | 612 | * buffer off a running or committing transaction's checkpoing list, |
606 | * checkpoint list is empty, the transaction obviously cannot be | 613 | * then even if the checkpoint list is empty, the transaction obviously |
607 | * dropped! | 614 | * cannot be dropped! |
608 | * | 615 | * |
609 | * The locking here around j_committing_transaction is a bit sleazy. | 616 | * The locking here around t_state is a bit sleazy. |
610 | * See the comment at the end of jbd2_journal_commit_transaction(). | 617 | * See the comment at the end of jbd2_journal_commit_transaction(). |
611 | */ | 618 | */ |
612 | if (transaction == journal->j_committing_transaction) { | 619 | if (transaction->t_state != T_FINISHED) { |
613 | JBUFFER_TRACE(jh, "belongs to committing transaction"); | 620 | JBUFFER_TRACE(jh, "belongs to running/committing transaction"); |
614 | goto out; | 621 | goto out; |
615 | } | 622 | } |
616 | 623 | ||
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6986f334c643..4f302d279279 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -20,6 +20,8 @@ | |||
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
22 | #include <linux/pagemap.h> | 22 | #include <linux/pagemap.h> |
23 | #include <linux/jiffies.h> | ||
24 | #include <linux/crc32.h> | ||
23 | 25 | ||
24 | /* | 26 | /* |
25 | * Default IO end handler for temporary BJ_IO buffer_heads. | 27 | * Default IO end handler for temporary BJ_IO buffer_heads. |
@@ -92,19 +94,23 @@ static int inverted_lock(journal_t *journal, struct buffer_head *bh) | |||
92 | return 1; | 94 | return 1; |
93 | } | 95 | } |
94 | 96 | ||
95 | /* Done it all: now write the commit record. We should have | 97 | /* |
98 | * Done it all: now submit the commit record. We should have | ||
96 | * cleaned up our previous buffers by now, so if we are in abort | 99 | * cleaned up our previous buffers by now, so if we are in abort |
97 | * mode we can now just skip the rest of the journal write | 100 | * mode we can now just skip the rest of the journal write |
98 | * entirely. | 101 | * entirely. |
99 | * | 102 | * |
100 | * Returns 1 if the journal needs to be aborted or 0 on success | 103 | * Returns 1 if the journal needs to be aborted or 0 on success |
101 | */ | 104 | */ |
102 | static int journal_write_commit_record(journal_t *journal, | 105 | static int journal_submit_commit_record(journal_t *journal, |
103 | transaction_t *commit_transaction) | 106 | transaction_t *commit_transaction, |
107 | struct buffer_head **cbh, | ||
108 | __u32 crc32_sum) | ||
104 | { | 109 | { |
105 | struct journal_head *descriptor; | 110 | struct journal_head *descriptor; |
111 | struct commit_header *tmp; | ||
106 | struct buffer_head *bh; | 112 | struct buffer_head *bh; |
107 | int i, ret; | 113 | int ret; |
108 | int barrier_done = 0; | 114 | int barrier_done = 0; |
109 | 115 | ||
110 | if (is_journal_aborted(journal)) | 116 | if (is_journal_aborted(journal)) |
@@ -116,21 +122,33 @@ static int journal_write_commit_record(journal_t *journal, | |||
116 | 122 | ||
117 | bh = jh2bh(descriptor); | 123 | bh = jh2bh(descriptor); |
118 | 124 | ||
119 | /* AKPM: buglet - add `i' to tmp! */ | 125 | tmp = (struct commit_header *)bh->b_data; |
120 | for (i = 0; i < bh->b_size; i += 512) { | 126 | tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); |
121 | journal_header_t *tmp = (journal_header_t*)bh->b_data; | 127 | tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); |
122 | tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); | 128 | tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); |
123 | tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); | 129 | |
124 | tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); | 130 | if (JBD2_HAS_COMPAT_FEATURE(journal, |
131 | JBD2_FEATURE_COMPAT_CHECKSUM)) { | ||
132 | tmp->h_chksum_type = JBD2_CRC32_CHKSUM; | ||
133 | tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; | ||
134 | tmp->h_chksum[0] = cpu_to_be32(crc32_sum); | ||
125 | } | 135 | } |
126 | 136 | ||
127 | JBUFFER_TRACE(descriptor, "write commit block"); | 137 | JBUFFER_TRACE(descriptor, "submit commit block"); |
138 | lock_buffer(bh); | ||
139 | |||
128 | set_buffer_dirty(bh); | 140 | set_buffer_dirty(bh); |
129 | if (journal->j_flags & JBD2_BARRIER) { | 141 | set_buffer_uptodate(bh); |
142 | bh->b_end_io = journal_end_buffer_io_sync; | ||
143 | |||
144 | if (journal->j_flags & JBD2_BARRIER && | ||
145 | !JBD2_HAS_COMPAT_FEATURE(journal, | ||
146 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | ||
130 | set_buffer_ordered(bh); | 147 | set_buffer_ordered(bh); |
131 | barrier_done = 1; | 148 | barrier_done = 1; |
132 | } | 149 | } |
133 | ret = sync_dirty_buffer(bh); | 150 | ret = submit_bh(WRITE, bh); |
151 | |||
134 | /* is it possible for another commit to fail at roughly | 152 | /* is it possible for another commit to fail at roughly |
135 | * the same time as this one? If so, we don't want to | 153 | * the same time as this one? If so, we don't want to |
136 | * trust the barrier flag in the super, but instead want | 154 | * trust the barrier flag in the super, but instead want |
@@ -151,14 +169,72 @@ static int journal_write_commit_record(journal_t *journal, | |||
151 | clear_buffer_ordered(bh); | 169 | clear_buffer_ordered(bh); |
152 | set_buffer_uptodate(bh); | 170 | set_buffer_uptodate(bh); |
153 | set_buffer_dirty(bh); | 171 | set_buffer_dirty(bh); |
154 | ret = sync_dirty_buffer(bh); | 172 | ret = submit_bh(WRITE, bh); |
155 | } | 173 | } |
156 | put_bh(bh); /* One for getblk() */ | 174 | *cbh = bh; |
157 | jbd2_journal_put_journal_head(descriptor); | 175 | return ret; |
176 | } | ||
177 | |||
178 | /* | ||
179 | * This function along with journal_submit_commit_record | ||
180 | * allows to write the commit record asynchronously. | ||
181 | */ | ||
182 | static int journal_wait_on_commit_record(struct buffer_head *bh) | ||
183 | { | ||
184 | int ret = 0; | ||
185 | |||
186 | clear_buffer_dirty(bh); | ||
187 | wait_on_buffer(bh); | ||
188 | |||
189 | if (unlikely(!buffer_uptodate(bh))) | ||
190 | ret = -EIO; | ||
191 | put_bh(bh); /* One for getblk() */ | ||
192 | jbd2_journal_put_journal_head(bh2jh(bh)); | ||
158 | 193 | ||
159 | return (ret == -EIO); | 194 | return ret; |
160 | } | 195 | } |
161 | 196 | ||
197 | /* | ||
198 | * Wait for all submitted IO to complete. | ||
199 | */ | ||
200 | static int journal_wait_on_locked_list(journal_t *journal, | ||
201 | transaction_t *commit_transaction) | ||
202 | { | ||
203 | int ret = 0; | ||
204 | struct journal_head *jh; | ||
205 | |||
206 | while (commit_transaction->t_locked_list) { | ||
207 | struct buffer_head *bh; | ||
208 | |||
209 | jh = commit_transaction->t_locked_list->b_tprev; | ||
210 | bh = jh2bh(jh); | ||
211 | get_bh(bh); | ||
212 | if (buffer_locked(bh)) { | ||
213 | spin_unlock(&journal->j_list_lock); | ||
214 | wait_on_buffer(bh); | ||
215 | if (unlikely(!buffer_uptodate(bh))) | ||
216 | ret = -EIO; | ||
217 | spin_lock(&journal->j_list_lock); | ||
218 | } | ||
219 | if (!inverted_lock(journal, bh)) { | ||
220 | put_bh(bh); | ||
221 | spin_lock(&journal->j_list_lock); | ||
222 | continue; | ||
223 | } | ||
224 | if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { | ||
225 | __jbd2_journal_unfile_buffer(jh); | ||
226 | jbd_unlock_bh_state(bh); | ||
227 | jbd2_journal_remove_journal_head(bh); | ||
228 | put_bh(bh); | ||
229 | } else { | ||
230 | jbd_unlock_bh_state(bh); | ||
231 | } | ||
232 | put_bh(bh); | ||
233 | cond_resched_lock(&journal->j_list_lock); | ||
234 | } | ||
235 | return ret; | ||
236 | } | ||
237 | |||
162 | static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) | 238 | static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) |
163 | { | 239 | { |
164 | int i; | 240 | int i; |
@@ -265,7 +341,7 @@ write_out_data: | |||
265 | put_bh(bh); | 341 | put_bh(bh); |
266 | } | 342 | } |
267 | 343 | ||
268 | if (lock_need_resched(&journal->j_list_lock)) { | 344 | if (need_resched() || spin_needbreak(&journal->j_list_lock)) { |
269 | spin_unlock(&journal->j_list_lock); | 345 | spin_unlock(&journal->j_list_lock); |
270 | goto write_out_data; | 346 | goto write_out_data; |
271 | } | 347 | } |
@@ -274,7 +350,21 @@ write_out_data: | |||
274 | journal_do_submit_data(wbuf, bufs); | 350 | journal_do_submit_data(wbuf, bufs); |
275 | } | 351 | } |
276 | 352 | ||
277 | static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag, | 353 | static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) |
354 | { | ||
355 | struct page *page = bh->b_page; | ||
356 | char *addr; | ||
357 | __u32 checksum; | ||
358 | |||
359 | addr = kmap_atomic(page, KM_USER0); | ||
360 | checksum = crc32_be(crc32_sum, | ||
361 | (void *)(addr + offset_in_page(bh->b_data)), bh->b_size); | ||
362 | kunmap_atomic(addr, KM_USER0); | ||
363 | |||
364 | return checksum; | ||
365 | } | ||
366 | |||
367 | static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, | ||
278 | unsigned long long block) | 368 | unsigned long long block) |
279 | { | 369 | { |
280 | tag->t_blocknr = cpu_to_be32(block & (u32)~0); | 370 | tag->t_blocknr = cpu_to_be32(block & (u32)~0); |
@@ -290,6 +380,7 @@ static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag, | |||
290 | */ | 380 | */ |
291 | void jbd2_journal_commit_transaction(journal_t *journal) | 381 | void jbd2_journal_commit_transaction(journal_t *journal) |
292 | { | 382 | { |
383 | struct transaction_stats_s stats; | ||
293 | transaction_t *commit_transaction; | 384 | transaction_t *commit_transaction; |
294 | struct journal_head *jh, *new_jh, *descriptor; | 385 | struct journal_head *jh, *new_jh, *descriptor; |
295 | struct buffer_head **wbuf = journal->j_wbuf; | 386 | struct buffer_head **wbuf = journal->j_wbuf; |
@@ -305,6 +396,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
305 | int tag_flag; | 396 | int tag_flag; |
306 | int i; | 397 | int i; |
307 | int tag_bytes = journal_tag_bytes(journal); | 398 | int tag_bytes = journal_tag_bytes(journal); |
399 | struct buffer_head *cbh = NULL; /* For transactional checksums */ | ||
400 | __u32 crc32_sum = ~0; | ||
308 | 401 | ||
309 | /* | 402 | /* |
310 | * First job: lock down the current transaction and wait for | 403 | * First job: lock down the current transaction and wait for |
@@ -337,6 +430,11 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
337 | spin_lock(&journal->j_state_lock); | 430 | spin_lock(&journal->j_state_lock); |
338 | commit_transaction->t_state = T_LOCKED; | 431 | commit_transaction->t_state = T_LOCKED; |
339 | 432 | ||
433 | stats.u.run.rs_wait = commit_transaction->t_max_wait; | ||
434 | stats.u.run.rs_locked = jiffies; | ||
435 | stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, | ||
436 | stats.u.run.rs_locked); | ||
437 | |||
340 | spin_lock(&commit_transaction->t_handle_lock); | 438 | spin_lock(&commit_transaction->t_handle_lock); |
341 | while (commit_transaction->t_updates) { | 439 | while (commit_transaction->t_updates) { |
342 | DEFINE_WAIT(wait); | 440 | DEFINE_WAIT(wait); |
@@ -407,6 +505,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
407 | */ | 505 | */ |
408 | jbd2_journal_switch_revoke_table(journal); | 506 | jbd2_journal_switch_revoke_table(journal); |
409 | 507 | ||
508 | stats.u.run.rs_flushing = jiffies; | ||
509 | stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked, | ||
510 | stats.u.run.rs_flushing); | ||
511 | |||
410 | commit_transaction->t_state = T_FLUSH; | 512 | commit_transaction->t_state = T_FLUSH; |
411 | journal->j_committing_transaction = commit_transaction; | 513 | journal->j_committing_transaction = commit_transaction; |
412 | journal->j_running_transaction = NULL; | 514 | journal->j_running_transaction = NULL; |
@@ -440,38 +542,15 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
440 | journal_submit_data_buffers(journal, commit_transaction); | 542 | journal_submit_data_buffers(journal, commit_transaction); |
441 | 543 | ||
442 | /* | 544 | /* |
443 | * Wait for all previously submitted IO to complete. | 545 | * Wait for all previously submitted IO to complete if commit |
546 | * record is to be written synchronously. | ||
444 | */ | 547 | */ |
445 | spin_lock(&journal->j_list_lock); | 548 | spin_lock(&journal->j_list_lock); |
446 | while (commit_transaction->t_locked_list) { | 549 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
447 | struct buffer_head *bh; | 550 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) |
551 | err = journal_wait_on_locked_list(journal, | ||
552 | commit_transaction); | ||
448 | 553 | ||
449 | jh = commit_transaction->t_locked_list->b_tprev; | ||
450 | bh = jh2bh(jh); | ||
451 | get_bh(bh); | ||
452 | if (buffer_locked(bh)) { | ||
453 | spin_unlock(&journal->j_list_lock); | ||
454 | wait_on_buffer(bh); | ||
455 | if (unlikely(!buffer_uptodate(bh))) | ||
456 | err = -EIO; | ||
457 | spin_lock(&journal->j_list_lock); | ||
458 | } | ||
459 | if (!inverted_lock(journal, bh)) { | ||
460 | put_bh(bh); | ||
461 | spin_lock(&journal->j_list_lock); | ||
462 | continue; | ||
463 | } | ||
464 | if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { | ||
465 | __jbd2_journal_unfile_buffer(jh); | ||
466 | jbd_unlock_bh_state(bh); | ||
467 | jbd2_journal_remove_journal_head(bh); | ||
468 | put_bh(bh); | ||
469 | } else { | ||
470 | jbd_unlock_bh_state(bh); | ||
471 | } | ||
472 | put_bh(bh); | ||
473 | cond_resched_lock(&journal->j_list_lock); | ||
474 | } | ||
475 | spin_unlock(&journal->j_list_lock); | 554 | spin_unlock(&journal->j_list_lock); |
476 | 555 | ||
477 | if (err) | 556 | if (err) |
@@ -498,6 +577,12 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
498 | */ | 577 | */ |
499 | commit_transaction->t_state = T_COMMIT; | 578 | commit_transaction->t_state = T_COMMIT; |
500 | 579 | ||
580 | stats.u.run.rs_logging = jiffies; | ||
581 | stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing, | ||
582 | stats.u.run.rs_logging); | ||
583 | stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits; | ||
584 | stats.u.run.rs_blocks_logged = 0; | ||
585 | |||
501 | descriptor = NULL; | 586 | descriptor = NULL; |
502 | bufs = 0; | 587 | bufs = 0; |
503 | while (commit_transaction->t_buffers) { | 588 | while (commit_transaction->t_buffers) { |
@@ -639,6 +724,15 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
639 | start_journal_io: | 724 | start_journal_io: |
640 | for (i = 0; i < bufs; i++) { | 725 | for (i = 0; i < bufs; i++) { |
641 | struct buffer_head *bh = wbuf[i]; | 726 | struct buffer_head *bh = wbuf[i]; |
727 | /* | ||
728 | * Compute checksum. | ||
729 | */ | ||
730 | if (JBD2_HAS_COMPAT_FEATURE(journal, | ||
731 | JBD2_FEATURE_COMPAT_CHECKSUM)) { | ||
732 | crc32_sum = | ||
733 | jbd2_checksum_data(crc32_sum, bh); | ||
734 | } | ||
735 | |||
642 | lock_buffer(bh); | 736 | lock_buffer(bh); |
643 | clear_buffer_dirty(bh); | 737 | clear_buffer_dirty(bh); |
644 | set_buffer_uptodate(bh); | 738 | set_buffer_uptodate(bh); |
@@ -646,6 +740,7 @@ start_journal_io: | |||
646 | submit_bh(WRITE, bh); | 740 | submit_bh(WRITE, bh); |
647 | } | 741 | } |
648 | cond_resched(); | 742 | cond_resched(); |
743 | stats.u.run.rs_blocks_logged += bufs; | ||
649 | 744 | ||
650 | /* Force a new descriptor to be generated next | 745 | /* Force a new descriptor to be generated next |
651 | time round the loop. */ | 746 | time round the loop. */ |
@@ -654,6 +749,23 @@ start_journal_io: | |||
654 | } | 749 | } |
655 | } | 750 | } |
656 | 751 | ||
752 | /* Done it all: now write the commit record asynchronously. */ | ||
753 | |||
754 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | ||
755 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | ||
756 | err = journal_submit_commit_record(journal, commit_transaction, | ||
757 | &cbh, crc32_sum); | ||
758 | if (err) | ||
759 | __jbd2_journal_abort_hard(journal); | ||
760 | |||
761 | spin_lock(&journal->j_list_lock); | ||
762 | err = journal_wait_on_locked_list(journal, | ||
763 | commit_transaction); | ||
764 | spin_unlock(&journal->j_list_lock); | ||
765 | if (err) | ||
766 | __jbd2_journal_abort_hard(journal); | ||
767 | } | ||
768 | |||
657 | /* Lo and behold: we have just managed to send a transaction to | 769 | /* Lo and behold: we have just managed to send a transaction to |
658 | the log. Before we can commit it, wait for the IO so far to | 770 | the log. Before we can commit it, wait for the IO so far to |
659 | complete. Control buffers being written are on the | 771 | complete. Control buffers being written are on the |
@@ -753,8 +865,14 @@ wait_for_iobuf: | |||
753 | 865 | ||
754 | jbd_debug(3, "JBD: commit phase 6\n"); | 866 | jbd_debug(3, "JBD: commit phase 6\n"); |
755 | 867 | ||
756 | if (journal_write_commit_record(journal, commit_transaction)) | 868 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
757 | err = -EIO; | 869 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
870 | err = journal_submit_commit_record(journal, commit_transaction, | ||
871 | &cbh, crc32_sum); | ||
872 | if (err) | ||
873 | __jbd2_journal_abort_hard(journal); | ||
874 | } | ||
875 | err = journal_wait_on_commit_record(cbh); | ||
758 | 876 | ||
759 | if (err) | 877 | if (err) |
760 | jbd2_journal_abort(journal, err); | 878 | jbd2_journal_abort(journal, err); |
@@ -816,6 +934,7 @@ restart_loop: | |||
816 | cp_transaction = jh->b_cp_transaction; | 934 | cp_transaction = jh->b_cp_transaction; |
817 | if (cp_transaction) { | 935 | if (cp_transaction) { |
818 | JBUFFER_TRACE(jh, "remove from old cp transaction"); | 936 | JBUFFER_TRACE(jh, "remove from old cp transaction"); |
937 | cp_transaction->t_chp_stats.cs_dropped++; | ||
819 | __jbd2_journal_remove_checkpoint(jh); | 938 | __jbd2_journal_remove_checkpoint(jh); |
820 | } | 939 | } |
821 | 940 | ||
@@ -867,10 +986,10 @@ restart_loop: | |||
867 | } | 986 | } |
868 | spin_unlock(&journal->j_list_lock); | 987 | spin_unlock(&journal->j_list_lock); |
869 | /* | 988 | /* |
870 | * This is a bit sleazy. We borrow j_list_lock to protect | 989 | * This is a bit sleazy. We use j_list_lock to protect transition |
871 | * journal->j_committing_transaction in __jbd2_journal_remove_checkpoint. | 990 | * of a transaction into T_FINISHED state and calling |
872 | * Really, __jbd2_journal_remove_checkpoint should be using j_state_lock but | 991 | * __jbd2_journal_drop_transaction(). Otherwise we could race with |
873 | * it's a bit hassle to hold that across __jbd2_journal_remove_checkpoint | 992 | * other checkpointing code processing the transaction... |
874 | */ | 993 | */ |
875 | spin_lock(&journal->j_state_lock); | 994 | spin_lock(&journal->j_state_lock); |
876 | spin_lock(&journal->j_list_lock); | 995 | spin_lock(&journal->j_list_lock); |
@@ -890,6 +1009,36 @@ restart_loop: | |||
890 | 1009 | ||
891 | J_ASSERT(commit_transaction->t_state == T_COMMIT); | 1010 | J_ASSERT(commit_transaction->t_state == T_COMMIT); |
892 | 1011 | ||
1012 | commit_transaction->t_start = jiffies; | ||
1013 | stats.u.run.rs_logging = jbd2_time_diff(stats.u.run.rs_logging, | ||
1014 | commit_transaction->t_start); | ||
1015 | |||
1016 | /* | ||
1017 | * File the transaction for history | ||
1018 | */ | ||
1019 | stats.ts_type = JBD2_STATS_RUN; | ||
1020 | stats.ts_tid = commit_transaction->t_tid; | ||
1021 | stats.u.run.rs_handle_count = commit_transaction->t_handle_count; | ||
1022 | spin_lock(&journal->j_history_lock); | ||
1023 | memcpy(journal->j_history + journal->j_history_cur, &stats, | ||
1024 | sizeof(stats)); | ||
1025 | if (++journal->j_history_cur == journal->j_history_max) | ||
1026 | journal->j_history_cur = 0; | ||
1027 | |||
1028 | /* | ||
1029 | * Calculate overall stats | ||
1030 | */ | ||
1031 | journal->j_stats.ts_tid++; | ||
1032 | journal->j_stats.u.run.rs_wait += stats.u.run.rs_wait; | ||
1033 | journal->j_stats.u.run.rs_running += stats.u.run.rs_running; | ||
1034 | journal->j_stats.u.run.rs_locked += stats.u.run.rs_locked; | ||
1035 | journal->j_stats.u.run.rs_flushing += stats.u.run.rs_flushing; | ||
1036 | journal->j_stats.u.run.rs_logging += stats.u.run.rs_logging; | ||
1037 | journal->j_stats.u.run.rs_handle_count += stats.u.run.rs_handle_count; | ||
1038 | journal->j_stats.u.run.rs_blocks += stats.u.run.rs_blocks; | ||
1039 | journal->j_stats.u.run.rs_blocks_logged += stats.u.run.rs_blocks_logged; | ||
1040 | spin_unlock(&journal->j_history_lock); | ||
1041 | |||
893 | commit_transaction->t_state = T_FINISHED; | 1042 | commit_transaction->t_state = T_FINISHED; |
894 | J_ASSERT(commit_transaction == journal->j_committing_transaction); | 1043 | J_ASSERT(commit_transaction == journal->j_committing_transaction); |
895 | journal->j_commit_sequence = commit_transaction->t_tid; | 1044 | journal->j_commit_sequence = commit_transaction->t_tid; |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 6ddc5531587c..96ba846992e9 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/poison.h> | 36 | #include <linux/poison.h> |
37 | #include <linux/proc_fs.h> | 37 | #include <linux/proc_fs.h> |
38 | #include <linux/debugfs.h> | 38 | #include <linux/debugfs.h> |
39 | #include <linux/seq_file.h> | ||
39 | 40 | ||
40 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
41 | #include <asm/page.h> | 42 | #include <asm/page.h> |
@@ -640,6 +641,312 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) | |||
640 | return jbd2_journal_add_journal_head(bh); | 641 | return jbd2_journal_add_journal_head(bh); |
641 | } | 642 | } |
642 | 643 | ||
644 | struct jbd2_stats_proc_session { | ||
645 | journal_t *journal; | ||
646 | struct transaction_stats_s *stats; | ||
647 | int start; | ||
648 | int max; | ||
649 | }; | ||
650 | |||
651 | static void *jbd2_history_skip_empty(struct jbd2_stats_proc_session *s, | ||
652 | struct transaction_stats_s *ts, | ||
653 | int first) | ||
654 | { | ||
655 | if (ts == s->stats + s->max) | ||
656 | ts = s->stats; | ||
657 | if (!first && ts == s->stats + s->start) | ||
658 | return NULL; | ||
659 | while (ts->ts_type == 0) { | ||
660 | ts++; | ||
661 | if (ts == s->stats + s->max) | ||
662 | ts = s->stats; | ||
663 | if (ts == s->stats + s->start) | ||
664 | return NULL; | ||
665 | } | ||
666 | return ts; | ||
667 | |||
668 | } | ||
669 | |||
670 | static void *jbd2_seq_history_start(struct seq_file *seq, loff_t *pos) | ||
671 | { | ||
672 | struct jbd2_stats_proc_session *s = seq->private; | ||
673 | struct transaction_stats_s *ts; | ||
674 | int l = *pos; | ||
675 | |||
676 | if (l == 0) | ||
677 | return SEQ_START_TOKEN; | ||
678 | ts = jbd2_history_skip_empty(s, s->stats + s->start, 1); | ||
679 | if (!ts) | ||
680 | return NULL; | ||
681 | l--; | ||
682 | while (l) { | ||
683 | ts = jbd2_history_skip_empty(s, ++ts, 0); | ||
684 | if (!ts) | ||
685 | break; | ||
686 | l--; | ||
687 | } | ||
688 | return ts; | ||
689 | } | ||
690 | |||
691 | static void *jbd2_seq_history_next(struct seq_file *seq, void *v, loff_t *pos) | ||
692 | { | ||
693 | struct jbd2_stats_proc_session *s = seq->private; | ||
694 | struct transaction_stats_s *ts = v; | ||
695 | |||
696 | ++*pos; | ||
697 | if (v == SEQ_START_TOKEN) | ||
698 | return jbd2_history_skip_empty(s, s->stats + s->start, 1); | ||
699 | else | ||
700 | return jbd2_history_skip_empty(s, ++ts, 0); | ||
701 | } | ||
702 | |||
703 | static int jbd2_seq_history_show(struct seq_file *seq, void *v) | ||
704 | { | ||
705 | struct transaction_stats_s *ts = v; | ||
706 | if (v == SEQ_START_TOKEN) { | ||
707 | seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s " | ||
708 | "%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid", | ||
709 | "wait", "run", "lock", "flush", "log", "hndls", | ||
710 | "block", "inlog", "ctime", "write", "drop", | ||
711 | "close"); | ||
712 | return 0; | ||
713 | } | ||
714 | if (ts->ts_type == JBD2_STATS_RUN) | ||
715 | seq_printf(seq, "%-4s %-5lu %-5u %-5u %-5u %-5u %-5u " | ||
716 | "%-6lu %-5lu %-5lu\n", "R", ts->ts_tid, | ||
717 | jiffies_to_msecs(ts->u.run.rs_wait), | ||
718 | jiffies_to_msecs(ts->u.run.rs_running), | ||
719 | jiffies_to_msecs(ts->u.run.rs_locked), | ||
720 | jiffies_to_msecs(ts->u.run.rs_flushing), | ||
721 | jiffies_to_msecs(ts->u.run.rs_logging), | ||
722 | ts->u.run.rs_handle_count, | ||
723 | ts->u.run.rs_blocks, | ||
724 | ts->u.run.rs_blocks_logged); | ||
725 | else if (ts->ts_type == JBD2_STATS_CHECKPOINT) | ||
726 | seq_printf(seq, "%-4s %-5lu %48s %-5u %-5lu %-5lu %-5lu\n", | ||
727 | "C", ts->ts_tid, " ", | ||
728 | jiffies_to_msecs(ts->u.chp.cs_chp_time), | ||
729 | ts->u.chp.cs_written, ts->u.chp.cs_dropped, | ||
730 | ts->u.chp.cs_forced_to_close); | ||
731 | else | ||
732 | J_ASSERT(0); | ||
733 | return 0; | ||
734 | } | ||
735 | |||
736 | static void jbd2_seq_history_stop(struct seq_file *seq, void *v) | ||
737 | { | ||
738 | } | ||
739 | |||
740 | static struct seq_operations jbd2_seq_history_ops = { | ||
741 | .start = jbd2_seq_history_start, | ||
742 | .next = jbd2_seq_history_next, | ||
743 | .stop = jbd2_seq_history_stop, | ||
744 | .show = jbd2_seq_history_show, | ||
745 | }; | ||
746 | |||
747 | static int jbd2_seq_history_open(struct inode *inode, struct file *file) | ||
748 | { | ||
749 | journal_t *journal = PDE(inode)->data; | ||
750 | struct jbd2_stats_proc_session *s; | ||
751 | int rc, size; | ||
752 | |||
753 | s = kmalloc(sizeof(*s), GFP_KERNEL); | ||
754 | if (s == NULL) | ||
755 | return -ENOMEM; | ||
756 | size = sizeof(struct transaction_stats_s) * journal->j_history_max; | ||
757 | s->stats = kmalloc(size, GFP_KERNEL); | ||
758 | if (s->stats == NULL) { | ||
759 | kfree(s); | ||
760 | return -ENOMEM; | ||
761 | } | ||
762 | spin_lock(&journal->j_history_lock); | ||
763 | memcpy(s->stats, journal->j_history, size); | ||
764 | s->max = journal->j_history_max; | ||
765 | s->start = journal->j_history_cur % s->max; | ||
766 | spin_unlock(&journal->j_history_lock); | ||
767 | |||
768 | rc = seq_open(file, &jbd2_seq_history_ops); | ||
769 | if (rc == 0) { | ||
770 | struct seq_file *m = file->private_data; | ||
771 | m->private = s; | ||
772 | } else { | ||
773 | kfree(s->stats); | ||
774 | kfree(s); | ||
775 | } | ||
776 | return rc; | ||
777 | |||
778 | } | ||
779 | |||
780 | static int jbd2_seq_history_release(struct inode *inode, struct file *file) | ||
781 | { | ||
782 | struct seq_file *seq = file->private_data; | ||
783 | struct jbd2_stats_proc_session *s = seq->private; | ||
784 | |||
785 | kfree(s->stats); | ||
786 | kfree(s); | ||
787 | return seq_release(inode, file); | ||
788 | } | ||
789 | |||
790 | static struct file_operations jbd2_seq_history_fops = { | ||
791 | .owner = THIS_MODULE, | ||
792 | .open = jbd2_seq_history_open, | ||
793 | .read = seq_read, | ||
794 | .llseek = seq_lseek, | ||
795 | .release = jbd2_seq_history_release, | ||
796 | }; | ||
797 | |||
798 | static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos) | ||
799 | { | ||
800 | return *pos ? NULL : SEQ_START_TOKEN; | ||
801 | } | ||
802 | |||
803 | static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos) | ||
804 | { | ||
805 | return NULL; | ||
806 | } | ||
807 | |||
808 | static int jbd2_seq_info_show(struct seq_file *seq, void *v) | ||
809 | { | ||
810 | struct jbd2_stats_proc_session *s = seq->private; | ||
811 | |||
812 | if (v != SEQ_START_TOKEN) | ||
813 | return 0; | ||
814 | seq_printf(seq, "%lu transaction, each upto %u blocks\n", | ||
815 | s->stats->ts_tid, | ||
816 | s->journal->j_max_transaction_buffers); | ||
817 | if (s->stats->ts_tid == 0) | ||
818 | return 0; | ||
819 | seq_printf(seq, "average: \n %ums waiting for transaction\n", | ||
820 | jiffies_to_msecs(s->stats->u.run.rs_wait / s->stats->ts_tid)); | ||
821 | seq_printf(seq, " %ums running transaction\n", | ||
822 | jiffies_to_msecs(s->stats->u.run.rs_running / s->stats->ts_tid)); | ||
823 | seq_printf(seq, " %ums transaction was being locked\n", | ||
824 | jiffies_to_msecs(s->stats->u.run.rs_locked / s->stats->ts_tid)); | ||
825 | seq_printf(seq, " %ums flushing data (in ordered mode)\n", | ||
826 | jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid)); | ||
827 | seq_printf(seq, " %ums logging transaction\n", | ||
828 | jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid)); | ||
829 | seq_printf(seq, " %lu handles per transaction\n", | ||
830 | s->stats->u.run.rs_handle_count / s->stats->ts_tid); | ||
831 | seq_printf(seq, " %lu blocks per transaction\n", | ||
832 | s->stats->u.run.rs_blocks / s->stats->ts_tid); | ||
833 | seq_printf(seq, " %lu logged blocks per transaction\n", | ||
834 | s->stats->u.run.rs_blocks_logged / s->stats->ts_tid); | ||
835 | return 0; | ||
836 | } | ||
837 | |||
838 | static void jbd2_seq_info_stop(struct seq_file *seq, void *v) | ||
839 | { | ||
840 | } | ||
841 | |||
842 | static struct seq_operations jbd2_seq_info_ops = { | ||
843 | .start = jbd2_seq_info_start, | ||
844 | .next = jbd2_seq_info_next, | ||
845 | .stop = jbd2_seq_info_stop, | ||
846 | .show = jbd2_seq_info_show, | ||
847 | }; | ||
848 | |||
849 | static int jbd2_seq_info_open(struct inode *inode, struct file *file) | ||
850 | { | ||
851 | journal_t *journal = PDE(inode)->data; | ||
852 | struct jbd2_stats_proc_session *s; | ||
853 | int rc, size; | ||
854 | |||
855 | s = kmalloc(sizeof(*s), GFP_KERNEL); | ||
856 | if (s == NULL) | ||
857 | return -ENOMEM; | ||
858 | size = sizeof(struct transaction_stats_s); | ||
859 | s->stats = kmalloc(size, GFP_KERNEL); | ||
860 | if (s->stats == NULL) { | ||
861 | kfree(s); | ||
862 | return -ENOMEM; | ||
863 | } | ||
864 | spin_lock(&journal->j_history_lock); | ||
865 | memcpy(s->stats, &journal->j_stats, size); | ||
866 | s->journal = journal; | ||
867 | spin_unlock(&journal->j_history_lock); | ||
868 | |||
869 | rc = seq_open(file, &jbd2_seq_info_ops); | ||
870 | if (rc == 0) { | ||
871 | struct seq_file *m = file->private_data; | ||
872 | m->private = s; | ||
873 | } else { | ||
874 | kfree(s->stats); | ||
875 | kfree(s); | ||
876 | } | ||
877 | return rc; | ||
878 | |||
879 | } | ||
880 | |||
881 | static int jbd2_seq_info_release(struct inode *inode, struct file *file) | ||
882 | { | ||
883 | struct seq_file *seq = file->private_data; | ||
884 | struct jbd2_stats_proc_session *s = seq->private; | ||
885 | kfree(s->stats); | ||
886 | kfree(s); | ||
887 | return seq_release(inode, file); | ||
888 | } | ||
889 | |||
890 | static struct file_operations jbd2_seq_info_fops = { | ||
891 | .owner = THIS_MODULE, | ||
892 | .open = jbd2_seq_info_open, | ||
893 | .read = seq_read, | ||
894 | .llseek = seq_lseek, | ||
895 | .release = jbd2_seq_info_release, | ||
896 | }; | ||
897 | |||
898 | static struct proc_dir_entry *proc_jbd2_stats; | ||
899 | |||
900 | static void jbd2_stats_proc_init(journal_t *journal) | ||
901 | { | ||
902 | char name[BDEVNAME_SIZE]; | ||
903 | |||
904 | snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); | ||
905 | journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); | ||
906 | if (journal->j_proc_entry) { | ||
907 | struct proc_dir_entry *p; | ||
908 | p = create_proc_entry("history", S_IRUGO, | ||
909 | journal->j_proc_entry); | ||
910 | if (p) { | ||
911 | p->proc_fops = &jbd2_seq_history_fops; | ||
912 | p->data = journal; | ||
913 | p = create_proc_entry("info", S_IRUGO, | ||
914 | journal->j_proc_entry); | ||
915 | if (p) { | ||
916 | p->proc_fops = &jbd2_seq_info_fops; | ||
917 | p->data = journal; | ||
918 | } | ||
919 | } | ||
920 | } | ||
921 | } | ||
922 | |||
923 | static void jbd2_stats_proc_exit(journal_t *journal) | ||
924 | { | ||
925 | char name[BDEVNAME_SIZE]; | ||
926 | |||
927 | snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); | ||
928 | remove_proc_entry("info", journal->j_proc_entry); | ||
929 | remove_proc_entry("history", journal->j_proc_entry); | ||
930 | remove_proc_entry(name, proc_jbd2_stats); | ||
931 | } | ||
932 | |||
933 | static void journal_init_stats(journal_t *journal) | ||
934 | { | ||
935 | int size; | ||
936 | |||
937 | if (!proc_jbd2_stats) | ||
938 | return; | ||
939 | |||
940 | journal->j_history_max = 100; | ||
941 | size = sizeof(struct transaction_stats_s) * journal->j_history_max; | ||
942 | journal->j_history = kzalloc(size, GFP_KERNEL); | ||
943 | if (!journal->j_history) { | ||
944 | journal->j_history_max = 0; | ||
945 | return; | ||
946 | } | ||
947 | spin_lock_init(&journal->j_history_lock); | ||
948 | } | ||
949 | |||
643 | /* | 950 | /* |
644 | * Management for journal control blocks: functions to create and | 951 | * Management for journal control blocks: functions to create and |
645 | * destroy journal_t structures, and to initialise and read existing | 952 | * destroy journal_t structures, and to initialise and read existing |
@@ -681,6 +988,9 @@ static journal_t * journal_init_common (void) | |||
681 | kfree(journal); | 988 | kfree(journal); |
682 | goto fail; | 989 | goto fail; |
683 | } | 990 | } |
991 | |||
992 | journal_init_stats(journal); | ||
993 | |||
684 | return journal; | 994 | return journal; |
685 | fail: | 995 | fail: |
686 | return NULL; | 996 | return NULL; |
@@ -735,6 +1045,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, | |||
735 | journal->j_fs_dev = fs_dev; | 1045 | journal->j_fs_dev = fs_dev; |
736 | journal->j_blk_offset = start; | 1046 | journal->j_blk_offset = start; |
737 | journal->j_maxlen = len; | 1047 | journal->j_maxlen = len; |
1048 | jbd2_stats_proc_init(journal); | ||
738 | 1049 | ||
739 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); | 1050 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); |
740 | J_ASSERT(bh != NULL); | 1051 | J_ASSERT(bh != NULL); |
@@ -773,6 +1084,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) | |||
773 | 1084 | ||
774 | journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits; | 1085 | journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits; |
775 | journal->j_blocksize = inode->i_sb->s_blocksize; | 1086 | journal->j_blocksize = inode->i_sb->s_blocksize; |
1087 | jbd2_stats_proc_init(journal); | ||
776 | 1088 | ||
777 | /* journal descriptor can store up to n blocks -bzzz */ | 1089 | /* journal descriptor can store up to n blocks -bzzz */ |
778 | n = journal->j_blocksize / sizeof(journal_block_tag_t); | 1090 | n = journal->j_blocksize / sizeof(journal_block_tag_t); |
@@ -1153,6 +1465,8 @@ void jbd2_journal_destroy(journal_t *journal) | |||
1153 | brelse(journal->j_sb_buffer); | 1465 | brelse(journal->j_sb_buffer); |
1154 | } | 1466 | } |
1155 | 1467 | ||
1468 | if (journal->j_proc_entry) | ||
1469 | jbd2_stats_proc_exit(journal); | ||
1156 | if (journal->j_inode) | 1470 | if (journal->j_inode) |
1157 | iput(journal->j_inode); | 1471 | iput(journal->j_inode); |
1158 | if (journal->j_revoke) | 1472 | if (journal->j_revoke) |
@@ -1264,6 +1578,32 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, | |||
1264 | return 1; | 1578 | return 1; |
1265 | } | 1579 | } |
1266 | 1580 | ||
1581 | /* | ||
1582 | * jbd2_journal_clear_features () - Clear a given journal feature in the | ||
1583 | * superblock | ||
1584 | * @journal: Journal to act on. | ||
1585 | * @compat: bitmask of compatible features | ||
1586 | * @ro: bitmask of features that force read-only mount | ||
1587 | * @incompat: bitmask of incompatible features | ||
1588 | * | ||
1589 | * Clear a given journal feature as present on the | ||
1590 | * superblock. | ||
1591 | */ | ||
1592 | void jbd2_journal_clear_features(journal_t *journal, unsigned long compat, | ||
1593 | unsigned long ro, unsigned long incompat) | ||
1594 | { | ||
1595 | journal_superblock_t *sb; | ||
1596 | |||
1597 | jbd_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n", | ||
1598 | compat, ro, incompat); | ||
1599 | |||
1600 | sb = journal->j_superblock; | ||
1601 | |||
1602 | sb->s_feature_compat &= ~cpu_to_be32(compat); | ||
1603 | sb->s_feature_ro_compat &= ~cpu_to_be32(ro); | ||
1604 | sb->s_feature_incompat &= ~cpu_to_be32(incompat); | ||
1605 | } | ||
1606 | EXPORT_SYMBOL(jbd2_journal_clear_features); | ||
1267 | 1607 | ||
1268 | /** | 1608 | /** |
1269 | * int jbd2_journal_update_format () - Update on-disk journal structure. | 1609 | * int jbd2_journal_update_format () - Update on-disk journal structure. |
@@ -1633,7 +1973,7 @@ static int journal_init_jbd2_journal_head_cache(void) | |||
1633 | jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", | 1973 | jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", |
1634 | sizeof(struct journal_head), | 1974 | sizeof(struct journal_head), |
1635 | 0, /* offset */ | 1975 | 0, /* offset */ |
1636 | 0, /* flags */ | 1976 | SLAB_TEMPORARY, /* flags */ |
1637 | NULL); /* ctor */ | 1977 | NULL); /* ctor */ |
1638 | retval = 0; | 1978 | retval = 0; |
1639 | if (jbd2_journal_head_cache == 0) { | 1979 | if (jbd2_journal_head_cache == 0) { |
@@ -1900,6 +2240,28 @@ static void __exit jbd2_remove_debugfs_entry(void) | |||
1900 | 2240 | ||
1901 | #endif | 2241 | #endif |
1902 | 2242 | ||
2243 | #ifdef CONFIG_PROC_FS | ||
2244 | |||
2245 | #define JBD2_STATS_PROC_NAME "fs/jbd2" | ||
2246 | |||
2247 | static void __init jbd2_create_jbd_stats_proc_entry(void) | ||
2248 | { | ||
2249 | proc_jbd2_stats = proc_mkdir(JBD2_STATS_PROC_NAME, NULL); | ||
2250 | } | ||
2251 | |||
2252 | static void __exit jbd2_remove_jbd_stats_proc_entry(void) | ||
2253 | { | ||
2254 | if (proc_jbd2_stats) | ||
2255 | remove_proc_entry(JBD2_STATS_PROC_NAME, NULL); | ||
2256 | } | ||
2257 | |||
2258 | #else | ||
2259 | |||
2260 | #define jbd2_create_jbd_stats_proc_entry() do {} while (0) | ||
2261 | #define jbd2_remove_jbd_stats_proc_entry() do {} while (0) | ||
2262 | |||
2263 | #endif | ||
2264 | |||
1903 | struct kmem_cache *jbd2_handle_cache; | 2265 | struct kmem_cache *jbd2_handle_cache; |
1904 | 2266 | ||
1905 | static int __init journal_init_handle_cache(void) | 2267 | static int __init journal_init_handle_cache(void) |
@@ -1907,7 +2269,7 @@ static int __init journal_init_handle_cache(void) | |||
1907 | jbd2_handle_cache = kmem_cache_create("jbd2_journal_handle", | 2269 | jbd2_handle_cache = kmem_cache_create("jbd2_journal_handle", |
1908 | sizeof(handle_t), | 2270 | sizeof(handle_t), |
1909 | 0, /* offset */ | 2271 | 0, /* offset */ |
1910 | 0, /* flags */ | 2272 | SLAB_TEMPORARY, /* flags */ |
1911 | NULL); /* ctor */ | 2273 | NULL); /* ctor */ |
1912 | if (jbd2_handle_cache == NULL) { | 2274 | if (jbd2_handle_cache == NULL) { |
1913 | printk(KERN_EMERG "JBD: failed to create handle cache\n"); | 2275 | printk(KERN_EMERG "JBD: failed to create handle cache\n"); |
@@ -1955,6 +2317,7 @@ static int __init journal_init(void) | |||
1955 | if (ret != 0) | 2317 | if (ret != 0) |
1956 | jbd2_journal_destroy_caches(); | 2318 | jbd2_journal_destroy_caches(); |
1957 | jbd2_create_debugfs_entry(); | 2319 | jbd2_create_debugfs_entry(); |
2320 | jbd2_create_jbd_stats_proc_entry(); | ||
1958 | return ret; | 2321 | return ret; |
1959 | } | 2322 | } |
1960 | 2323 | ||
@@ -1966,6 +2329,7 @@ static void __exit journal_exit(void) | |||
1966 | printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); | 2329 | printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); |
1967 | #endif | 2330 | #endif |
1968 | jbd2_remove_debugfs_entry(); | 2331 | jbd2_remove_debugfs_entry(); |
2332 | jbd2_remove_jbd_stats_proc_entry(); | ||
1969 | jbd2_journal_destroy_caches(); | 2333 | jbd2_journal_destroy_caches(); |
1970 | } | 2334 | } |
1971 | 2335 | ||
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index d0ce627539ef..921680663fa2 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/jbd2.h> | 21 | #include <linux/jbd2.h> |
22 | #include <linux/errno.h> | 22 | #include <linux/errno.h> |
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/crc32.h> | ||
24 | #endif | 25 | #endif |
25 | 26 | ||
26 | /* | 27 | /* |
@@ -316,6 +317,37 @@ static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag | |||
316 | return block; | 317 | return block; |
317 | } | 318 | } |
318 | 319 | ||
320 | /* | ||
321 | * calc_chksums calculates the checksums for the blocks described in the | ||
322 | * descriptor block. | ||
323 | */ | ||
324 | static int calc_chksums(journal_t *journal, struct buffer_head *bh, | ||
325 | unsigned long *next_log_block, __u32 *crc32_sum) | ||
326 | { | ||
327 | int i, num_blks, err; | ||
328 | unsigned long io_block; | ||
329 | struct buffer_head *obh; | ||
330 | |||
331 | num_blks = count_tags(journal, bh); | ||
332 | /* Calculate checksum of the descriptor block. */ | ||
333 | *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); | ||
334 | |||
335 | for (i = 0; i < num_blks; i++) { | ||
336 | io_block = (*next_log_block)++; | ||
337 | wrap(journal, *next_log_block); | ||
338 | err = jread(&obh, journal, io_block); | ||
339 | if (err) { | ||
340 | printk(KERN_ERR "JBD: IO error %d recovering block " | ||
341 | "%lu in log\n", err, io_block); | ||
342 | return 1; | ||
343 | } else { | ||
344 | *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, | ||
345 | obh->b_size); | ||
346 | } | ||
347 | } | ||
348 | return 0; | ||
349 | } | ||
350 | |||
319 | static int do_one_pass(journal_t *journal, | 351 | static int do_one_pass(journal_t *journal, |
320 | struct recovery_info *info, enum passtype pass) | 352 | struct recovery_info *info, enum passtype pass) |
321 | { | 353 | { |
@@ -328,6 +360,7 @@ static int do_one_pass(journal_t *journal, | |||
328 | unsigned int sequence; | 360 | unsigned int sequence; |
329 | int blocktype; | 361 | int blocktype; |
330 | int tag_bytes = journal_tag_bytes(journal); | 362 | int tag_bytes = journal_tag_bytes(journal); |
363 | __u32 crc32_sum = ~0; /* Transactional Checksums */ | ||
331 | 364 | ||
332 | /* Precompute the maximum metadata descriptors in a descriptor block */ | 365 | /* Precompute the maximum metadata descriptors in a descriptor block */ |
333 | int MAX_BLOCKS_PER_DESC; | 366 | int MAX_BLOCKS_PER_DESC; |
@@ -419,12 +452,26 @@ static int do_one_pass(journal_t *journal, | |||
419 | switch(blocktype) { | 452 | switch(blocktype) { |
420 | case JBD2_DESCRIPTOR_BLOCK: | 453 | case JBD2_DESCRIPTOR_BLOCK: |
421 | /* If it is a valid descriptor block, replay it | 454 | /* If it is a valid descriptor block, replay it |
422 | * in pass REPLAY; otherwise, just skip over the | 455 | * in pass REPLAY; if journal_checksums enabled, then |
423 | * blocks it describes. */ | 456 | * calculate checksums in PASS_SCAN, otherwise, |
457 | * just skip over the blocks it describes. */ | ||
424 | if (pass != PASS_REPLAY) { | 458 | if (pass != PASS_REPLAY) { |
459 | if (pass == PASS_SCAN && | ||
460 | JBD2_HAS_COMPAT_FEATURE(journal, | ||
461 | JBD2_FEATURE_COMPAT_CHECKSUM) && | ||
462 | !info->end_transaction) { | ||
463 | if (calc_chksums(journal, bh, | ||
464 | &next_log_block, | ||
465 | &crc32_sum)) { | ||
466 | put_bh(bh); | ||
467 | break; | ||
468 | } | ||
469 | put_bh(bh); | ||
470 | continue; | ||
471 | } | ||
425 | next_log_block += count_tags(journal, bh); | 472 | next_log_block += count_tags(journal, bh); |
426 | wrap(journal, next_log_block); | 473 | wrap(journal, next_log_block); |
427 | brelse(bh); | 474 | put_bh(bh); |
428 | continue; | 475 | continue; |
429 | } | 476 | } |
430 | 477 | ||
@@ -516,9 +563,96 @@ static int do_one_pass(journal_t *journal, | |||
516 | continue; | 563 | continue; |
517 | 564 | ||
518 | case JBD2_COMMIT_BLOCK: | 565 | case JBD2_COMMIT_BLOCK: |
519 | /* Found an expected commit block: not much to | 566 | /* How to differentiate between interrupted commit |
520 | * do other than move on to the next sequence | 567 | * and journal corruption ? |
568 | * | ||
569 | * {nth transaction} | ||
570 | * Checksum Verification Failed | ||
571 | * | | ||
572 | * ____________________ | ||
573 | * | | | ||
574 | * async_commit sync_commit | ||
575 | * | | | ||
576 | * | GO TO NEXT "Journal Corruption" | ||
577 | * | TRANSACTION | ||
578 | * | | ||
579 | * {(n+1)th transanction} | ||
580 | * | | ||
581 | * _______|______________ | ||
582 | * | | | ||
583 | * Commit block found Commit block not found | ||
584 | * | | | ||
585 | * "Journal Corruption" | | ||
586 | * _____________|_________ | ||
587 | * | | | ||
588 | * nth trans corrupt OR nth trans | ||
589 | * and (n+1)th interrupted interrupted | ||
590 | * before commit block | ||
591 | * could reach the disk. | ||
592 | * (Cannot find the difference in above | ||
593 | * mentioned conditions. Hence assume | ||
594 | * "Interrupted Commit".) | ||
595 | */ | ||
596 | |||
597 | /* Found an expected commit block: if checksums | ||
598 | * are present verify them in PASS_SCAN; else not | ||
599 | * much to do other than move on to the next sequence | ||
521 | * number. */ | 600 | * number. */ |
601 | if (pass == PASS_SCAN && | ||
602 | JBD2_HAS_COMPAT_FEATURE(journal, | ||
603 | JBD2_FEATURE_COMPAT_CHECKSUM)) { | ||
604 | int chksum_err, chksum_seen; | ||
605 | struct commit_header *cbh = | ||
606 | (struct commit_header *)bh->b_data; | ||
607 | unsigned found_chksum = | ||
608 | be32_to_cpu(cbh->h_chksum[0]); | ||
609 | |||
610 | chksum_err = chksum_seen = 0; | ||
611 | |||
612 | if (info->end_transaction) { | ||
613 | printk(KERN_ERR "JBD: Transaction %u " | ||
614 | "found to be corrupt.\n", | ||
615 | next_commit_ID - 1); | ||
616 | brelse(bh); | ||
617 | break; | ||
618 | } | ||
619 | |||
620 | if (crc32_sum == found_chksum && | ||
621 | cbh->h_chksum_type == JBD2_CRC32_CHKSUM && | ||
622 | cbh->h_chksum_size == | ||
623 | JBD2_CRC32_CHKSUM_SIZE) | ||
624 | chksum_seen = 1; | ||
625 | else if (!(cbh->h_chksum_type == 0 && | ||
626 | cbh->h_chksum_size == 0 && | ||
627 | found_chksum == 0 && | ||
628 | !chksum_seen)) | ||
629 | /* | ||
630 | * If fs is mounted using an old kernel and then | ||
631 | * kernel with journal_chksum is used then we | ||
632 | * get a situation where the journal flag has | ||
633 | * checksum flag set but checksums are not | ||
634 | * present i.e chksum = 0, in the individual | ||
635 | * commit blocks. | ||
636 | * Hence to avoid checksum failures, in this | ||
637 | * situation, this extra check is added. | ||
638 | */ | ||
639 | chksum_err = 1; | ||
640 | |||
641 | if (chksum_err) { | ||
642 | info->end_transaction = next_commit_ID; | ||
643 | |||
644 | if (!JBD2_HAS_COMPAT_FEATURE(journal, | ||
645 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){ | ||
646 | printk(KERN_ERR | ||
647 | "JBD: Transaction %u " | ||
648 | "found to be corrupt.\n", | ||
649 | next_commit_ID); | ||
650 | brelse(bh); | ||
651 | break; | ||
652 | } | ||
653 | } | ||
654 | crc32_sum = ~0; | ||
655 | } | ||
522 | brelse(bh); | 656 | brelse(bh); |
523 | next_commit_ID++; | 657 | next_commit_ID++; |
524 | continue; | 658 | continue; |
@@ -554,9 +688,10 @@ static int do_one_pass(journal_t *journal, | |||
554 | * transaction marks the end of the valid log. | 688 | * transaction marks the end of the valid log. |
555 | */ | 689 | */ |
556 | 690 | ||
557 | if (pass == PASS_SCAN) | 691 | if (pass == PASS_SCAN) { |
558 | info->end_transaction = next_commit_ID; | 692 | if (!info->end_transaction) |
559 | else { | 693 | info->end_transaction = next_commit_ID; |
694 | } else { | ||
560 | /* It's really bad news if different passes end up at | 695 | /* It's really bad news if different passes end up at |
561 | * different places (but possible due to IO errors). */ | 696 | * different places (but possible due to IO errors). */ |
562 | if (info->end_transaction != next_commit_ID) { | 697 | if (info->end_transaction != next_commit_ID) { |
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 3595fd432d5b..df36f42e19e1 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
@@ -171,13 +171,15 @@ int __init jbd2_journal_init_revoke_caches(void) | |||
171 | { | 171 | { |
172 | jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", | 172 | jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", |
173 | sizeof(struct jbd2_revoke_record_s), | 173 | sizeof(struct jbd2_revoke_record_s), |
174 | 0, SLAB_HWCACHE_ALIGN, NULL); | 174 | 0, |
175 | SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, | ||
176 | NULL); | ||
175 | if (jbd2_revoke_record_cache == 0) | 177 | if (jbd2_revoke_record_cache == 0) |
176 | return -ENOMEM; | 178 | return -ENOMEM; |
177 | 179 | ||
178 | jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", | 180 | jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", |
179 | sizeof(struct jbd2_revoke_table_s), | 181 | sizeof(struct jbd2_revoke_table_s), |
180 | 0, 0, NULL); | 182 | 0, SLAB_TEMPORARY, NULL); |
181 | if (jbd2_revoke_table_cache == 0) { | 183 | if (jbd2_revoke_table_cache == 0) { |
182 | kmem_cache_destroy(jbd2_revoke_record_cache); | 184 | kmem_cache_destroy(jbd2_revoke_record_cache); |
183 | jbd2_revoke_record_cache = NULL; | 185 | jbd2_revoke_record_cache = NULL; |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index b1fcf2b3dca3..b9b0b6f899b9 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -54,11 +54,13 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
54 | spin_lock_init(&transaction->t_handle_lock); | 54 | spin_lock_init(&transaction->t_handle_lock); |
55 | 55 | ||
56 | /* Set up the commit timer for the new transaction. */ | 56 | /* Set up the commit timer for the new transaction. */ |
57 | journal->j_commit_timer.expires = transaction->t_expires; | 57 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); |
58 | add_timer(&journal->j_commit_timer); | 58 | add_timer(&journal->j_commit_timer); |
59 | 59 | ||
60 | J_ASSERT(journal->j_running_transaction == NULL); | 60 | J_ASSERT(journal->j_running_transaction == NULL); |
61 | journal->j_running_transaction = transaction; | 61 | journal->j_running_transaction = transaction; |
62 | transaction->t_max_wait = 0; | ||
63 | transaction->t_start = jiffies; | ||
62 | 64 | ||
63 | return transaction; | 65 | return transaction; |
64 | } | 66 | } |
@@ -85,6 +87,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle) | |||
85 | int nblocks = handle->h_buffer_credits; | 87 | int nblocks = handle->h_buffer_credits; |
86 | transaction_t *new_transaction = NULL; | 88 | transaction_t *new_transaction = NULL; |
87 | int ret = 0; | 89 | int ret = 0; |
90 | unsigned long ts = jiffies; | ||
88 | 91 | ||
89 | if (nblocks > journal->j_max_transaction_buffers) { | 92 | if (nblocks > journal->j_max_transaction_buffers) { |
90 | printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", | 93 | printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", |
@@ -217,6 +220,12 @@ repeat_locked: | |||
217 | /* OK, account for the buffers that this operation expects to | 220 | /* OK, account for the buffers that this operation expects to |
218 | * use and add the handle to the running transaction. */ | 221 | * use and add the handle to the running transaction. */ |
219 | 222 | ||
223 | if (time_after(transaction->t_start, ts)) { | ||
224 | ts = jbd2_time_diff(ts, transaction->t_start); | ||
225 | if (ts > transaction->t_max_wait) | ||
226 | transaction->t_max_wait = ts; | ||
227 | } | ||
228 | |||
220 | handle->h_transaction = transaction; | 229 | handle->h_transaction = transaction; |
221 | transaction->t_outstanding_credits += nblocks; | 230 | transaction->t_outstanding_credits += nblocks; |
222 | transaction->t_updates++; | 231 | transaction->t_updates++; |
@@ -232,6 +241,8 @@ out: | |||
232 | return ret; | 241 | return ret; |
233 | } | 242 | } |
234 | 243 | ||
244 | static struct lock_class_key jbd2_handle_key; | ||
245 | |||
235 | /* Allocate a new handle. This should probably be in a slab... */ | 246 | /* Allocate a new handle. This should probably be in a slab... */ |
236 | static handle_t *new_handle(int nblocks) | 247 | static handle_t *new_handle(int nblocks) |
237 | { | 248 | { |
@@ -242,6 +253,9 @@ static handle_t *new_handle(int nblocks) | |||
242 | handle->h_buffer_credits = nblocks; | 253 | handle->h_buffer_credits = nblocks; |
243 | handle->h_ref = 1; | 254 | handle->h_ref = 1; |
244 | 255 | ||
256 | lockdep_init_map(&handle->h_lockdep_map, "jbd2_handle", | ||
257 | &jbd2_handle_key, 0); | ||
258 | |||
245 | return handle; | 259 | return handle; |
246 | } | 260 | } |
247 | 261 | ||
@@ -284,7 +298,11 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks) | |||
284 | jbd2_free_handle(handle); | 298 | jbd2_free_handle(handle); |
285 | current->journal_info = NULL; | 299 | current->journal_info = NULL; |
286 | handle = ERR_PTR(err); | 300 | handle = ERR_PTR(err); |
301 | goto out; | ||
287 | } | 302 | } |
303 | |||
304 | lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); | ||
305 | out: | ||
288 | return handle; | 306 | return handle; |
289 | } | 307 | } |
290 | 308 | ||
@@ -1164,7 +1182,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
1164 | } | 1182 | } |
1165 | 1183 | ||
1166 | /* That test should have eliminated the following case: */ | 1184 | /* That test should have eliminated the following case: */ |
1167 | J_ASSERT_JH(jh, jh->b_frozen_data == 0); | 1185 | J_ASSERT_JH(jh, jh->b_frozen_data == NULL); |
1168 | 1186 | ||
1169 | JBUFFER_TRACE(jh, "file as BJ_Metadata"); | 1187 | JBUFFER_TRACE(jh, "file as BJ_Metadata"); |
1170 | spin_lock(&journal->j_list_lock); | 1188 | spin_lock(&journal->j_list_lock); |
@@ -1410,6 +1428,8 @@ int jbd2_journal_stop(handle_t *handle) | |||
1410 | spin_unlock(&journal->j_state_lock); | 1428 | spin_unlock(&journal->j_state_lock); |
1411 | } | 1429 | } |
1412 | 1430 | ||
1431 | lock_release(&handle->h_lockdep_map, 1, _THIS_IP_); | ||
1432 | |||
1413 | jbd2_free_handle(handle); | 1433 | jbd2_free_handle(handle); |
1414 | return err; | 1434 | return err; |
1415 | } | 1435 | } |
@@ -1512,7 +1532,7 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | |||
1512 | 1532 | ||
1513 | J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); | 1533 | J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); |
1514 | if (jh->b_jlist != BJ_None) | 1534 | if (jh->b_jlist != BJ_None) |
1515 | J_ASSERT_JH(jh, transaction != 0); | 1535 | J_ASSERT_JH(jh, transaction != NULL); |
1516 | 1536 | ||
1517 | switch (jh->b_jlist) { | 1537 | switch (jh->b_jlist) { |
1518 | case BJ_None: | 1538 | case BJ_None: |
@@ -1581,11 +1601,11 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
1581 | if (buffer_locked(bh) || buffer_dirty(bh)) | 1601 | if (buffer_locked(bh) || buffer_dirty(bh)) |
1582 | goto out; | 1602 | goto out; |
1583 | 1603 | ||
1584 | if (jh->b_next_transaction != 0) | 1604 | if (jh->b_next_transaction != NULL) |
1585 | goto out; | 1605 | goto out; |
1586 | 1606 | ||
1587 | spin_lock(&journal->j_list_lock); | 1607 | spin_lock(&journal->j_list_lock); |
1588 | if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) { | 1608 | if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) { |
1589 | if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) { | 1609 | if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) { |
1590 | /* A written-back ordered data buffer */ | 1610 | /* A written-back ordered data buffer */ |
1591 | JBUFFER_TRACE(jh, "release data"); | 1611 | JBUFFER_TRACE(jh, "release data"); |
@@ -1593,7 +1613,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
1593 | jbd2_journal_remove_journal_head(bh); | 1613 | jbd2_journal_remove_journal_head(bh); |
1594 | __brelse(bh); | 1614 | __brelse(bh); |
1595 | } | 1615 | } |
1596 | } else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) { | 1616 | } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { |
1597 | /* written-back checkpointed metadata buffer */ | 1617 | /* written-back checkpointed metadata buffer */ |
1598 | if (jh->b_jlist == BJ_None) { | 1618 | if (jh->b_jlist == BJ_None) { |
1599 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 1619 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
@@ -1953,7 +1973,7 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, | |||
1953 | 1973 | ||
1954 | J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); | 1974 | J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); |
1955 | J_ASSERT_JH(jh, jh->b_transaction == transaction || | 1975 | J_ASSERT_JH(jh, jh->b_transaction == transaction || |
1956 | jh->b_transaction == 0); | 1976 | jh->b_transaction == NULL); |
1957 | 1977 | ||
1958 | if (jh->b_transaction && jh->b_jlist == jlist) | 1978 | if (jh->b_transaction && jh->b_jlist == jlist) |
1959 | return; | 1979 | return; |
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index d568ae846741..8adebd3e43c6 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c | |||
@@ -105,7 +105,7 @@ static int jffs2_garbage_collect_thread(void *_c) | |||
105 | 105 | ||
106 | /* Put_super will send a SIGKILL and then wait on the sem. | 106 | /* Put_super will send a SIGKILL and then wait on the sem. |
107 | */ | 107 | */ |
108 | while (signal_pending(current)) { | 108 | while (signal_pending(current) || freezing(current)) { |
109 | siginfo_t info; | 109 | siginfo_t info; |
110 | unsigned long signr; | 110 | unsigned long signr; |
111 | 111 | ||
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index df25ecc418af..4dcc05819998 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c | |||
@@ -284,11 +284,11 @@ static struct dir_table_slot *find_index(struct inode *ip, u32 index, | |||
284 | release_metapage(*mp); | 284 | release_metapage(*mp); |
285 | *mp = NULL; | 285 | *mp = NULL; |
286 | } | 286 | } |
287 | if (*mp == 0) { | 287 | if (!(*mp)) { |
288 | *lblock = blkno; | 288 | *lblock = blkno; |
289 | *mp = read_index_page(ip, blkno); | 289 | *mp = read_index_page(ip, blkno); |
290 | } | 290 | } |
291 | if (*mp == 0) { | 291 | if (!(*mp)) { |
292 | jfs_err("free_index: error reading directory table"); | 292 | jfs_err("free_index: error reading directory table"); |
293 | return NULL; | 293 | return NULL; |
294 | } | 294 | } |
@@ -413,7 +413,8 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot) | |||
413 | } | 413 | } |
414 | ip->i_size = PSIZE; | 414 | ip->i_size = PSIZE; |
415 | 415 | ||
416 | if ((mp = get_index_page(ip, 0)) == 0) { | 416 | mp = get_index_page(ip, 0); |
417 | if (!mp) { | ||
417 | jfs_err("add_index: get_metapage failed!"); | 418 | jfs_err("add_index: get_metapage failed!"); |
418 | xtTruncate(tid, ip, 0, COMMIT_PWMAP); | 419 | xtTruncate(tid, ip, 0, COMMIT_PWMAP); |
419 | memcpy(&jfs_ip->i_dirtable, temp_table, | 420 | memcpy(&jfs_ip->i_dirtable, temp_table, |
@@ -461,7 +462,7 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot) | |||
461 | } else | 462 | } else |
462 | mp = read_index_page(ip, blkno); | 463 | mp = read_index_page(ip, blkno); |
463 | 464 | ||
464 | if (mp == 0) { | 465 | if (!mp) { |
465 | jfs_err("add_index: get/read_metapage failed!"); | 466 | jfs_err("add_index: get/read_metapage failed!"); |
466 | goto clean_up; | 467 | goto clean_up; |
467 | } | 468 | } |
@@ -499,7 +500,7 @@ static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next) | |||
499 | 500 | ||
500 | dirtab_slot = find_index(ip, index, &mp, &lblock); | 501 | dirtab_slot = find_index(ip, index, &mp, &lblock); |
501 | 502 | ||
502 | if (dirtab_slot == 0) | 503 | if (!dirtab_slot) |
503 | return; | 504 | return; |
504 | 505 | ||
505 | dirtab_slot->flag = DIR_INDEX_FREE; | 506 | dirtab_slot->flag = DIR_INDEX_FREE; |
@@ -526,7 +527,7 @@ static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn, | |||
526 | 527 | ||
527 | dirtab_slot = find_index(ip, index, mp, lblock); | 528 | dirtab_slot = find_index(ip, index, mp, lblock); |
528 | 529 | ||
529 | if (dirtab_slot == 0) | 530 | if (!dirtab_slot) |
530 | return; | 531 | return; |
531 | 532 | ||
532 | DTSaddress(dirtab_slot, bn); | 533 | DTSaddress(dirtab_slot, bn); |
@@ -552,7 +553,7 @@ static int read_index(struct inode *ip, u32 index, | |||
552 | struct dir_table_slot *slot; | 553 | struct dir_table_slot *slot; |
553 | 554 | ||
554 | slot = find_index(ip, index, &mp, &lblock); | 555 | slot = find_index(ip, index, &mp, &lblock); |
555 | if (slot == 0) { | 556 | if (!slot) { |
556 | return -EIO; | 557 | return -EIO; |
557 | } | 558 | } |
558 | 559 | ||
@@ -592,10 +593,8 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, | |||
592 | struct component_name ciKey; | 593 | struct component_name ciKey; |
593 | struct super_block *sb = ip->i_sb; | 594 | struct super_block *sb = ip->i_sb; |
594 | 595 | ||
595 | ciKey.name = | 596 | ciKey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), GFP_NOFS); |
596 | (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), | 597 | if (!ciKey.name) { |
597 | GFP_NOFS); | ||
598 | if (ciKey.name == 0) { | ||
599 | rc = -ENOMEM; | 598 | rc = -ENOMEM; |
600 | goto dtSearch_Exit2; | 599 | goto dtSearch_Exit2; |
601 | } | 600 | } |
@@ -957,10 +956,8 @@ static int dtSplitUp(tid_t tid, | |||
957 | smp = split->mp; | 956 | smp = split->mp; |
958 | sp = DT_PAGE(ip, smp); | 957 | sp = DT_PAGE(ip, smp); |
959 | 958 | ||
960 | key.name = | 959 | key.name = kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t), GFP_NOFS); |
961 | (wchar_t *) kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t), | 960 | if (!key.name) { |
962 | GFP_NOFS); | ||
963 | if (key.name == 0) { | ||
964 | DT_PUTPAGE(smp); | 961 | DT_PUTPAGE(smp); |
965 | rc = -ENOMEM; | 962 | rc = -ENOMEM; |
966 | goto dtSplitUp_Exit; | 963 | goto dtSplitUp_Exit; |
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h index 8561c6ecece0..cdac2d5bafeb 100644 --- a/fs/jfs/jfs_dtree.h +++ b/fs/jfs/jfs_dtree.h | |||
@@ -74,7 +74,7 @@ struct idtentry { | |||
74 | #define DTIHDRDATALEN 11 | 74 | #define DTIHDRDATALEN 11 |
75 | 75 | ||
76 | /* compute number of slots for entry */ | 76 | /* compute number of slots for entry */ |
77 | #define NDTINTERNAL(klen) ( ((4 + (klen)) + (15 - 1)) / 15 ) | 77 | #define NDTINTERNAL(klen) (DIV_ROUND_UP((4 + (klen)), 15)) |
78 | 78 | ||
79 | 79 | ||
80 | /* | 80 | /* |
@@ -133,7 +133,7 @@ struct dir_table_slot { | |||
133 | ( ((s64)((dts)->addr1)) << 32 | __le32_to_cpu((dts)->addr2) ) | 133 | ( ((s64)((dts)->addr1)) << 32 | __le32_to_cpu((dts)->addr2) ) |
134 | 134 | ||
135 | /* compute number of slots for entry */ | 135 | /* compute number of slots for entry */ |
136 | #define NDTLEAF_LEGACY(klen) ( ((2 + (klen)) + (15 - 1)) / 15 ) | 136 | #define NDTLEAF_LEGACY(klen) (DIV_ROUND_UP((2 + (klen)), 15)) |
137 | #define NDTLEAF NDTINTERNAL | 137 | #define NDTLEAF NDTINTERNAL |
138 | 138 | ||
139 | 139 | ||
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 3870ba8b9086..9bf29f771737 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c | |||
@@ -381,7 +381,7 @@ int diRead(struct inode *ip) | |||
381 | 381 | ||
382 | /* read the page of disk inode */ | 382 | /* read the page of disk inode */ |
383 | mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); | 383 | mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); |
384 | if (mp == 0) { | 384 | if (!mp) { |
385 | jfs_err("diRead: read_metapage failed"); | 385 | jfs_err("diRead: read_metapage failed"); |
386 | return -EIO; | 386 | return -EIO; |
387 | } | 387 | } |
@@ -654,7 +654,7 @@ int diWrite(tid_t tid, struct inode *ip) | |||
654 | /* read the page of disk inode */ | 654 | /* read the page of disk inode */ |
655 | retry: | 655 | retry: |
656 | mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); | 656 | mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); |
657 | if (mp == 0) | 657 | if (!mp) |
658 | return -EIO; | 658 | return -EIO; |
659 | 659 | ||
660 | /* get the pointer to the disk inode */ | 660 | /* get the pointer to the disk inode */ |
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 15a3974cdeeb..325a9679b95a 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c | |||
@@ -208,6 +208,17 @@ static struct lmStat { | |||
208 | } lmStat; | 208 | } lmStat; |
209 | #endif | 209 | #endif |
210 | 210 | ||
211 | static void write_special_inodes(struct jfs_log *log, | ||
212 | int (*writer)(struct address_space *)) | ||
213 | { | ||
214 | struct jfs_sb_info *sbi; | ||
215 | |||
216 | list_for_each_entry(sbi, &log->sb_list, log_list) { | ||
217 | writer(sbi->ipbmap->i_mapping); | ||
218 | writer(sbi->ipimap->i_mapping); | ||
219 | writer(sbi->direct_inode->i_mapping); | ||
220 | } | ||
221 | } | ||
211 | 222 | ||
212 | /* | 223 | /* |
213 | * NAME: lmLog() | 224 | * NAME: lmLog() |
@@ -935,22 +946,13 @@ static int lmLogSync(struct jfs_log * log, int hard_sync) | |||
935 | struct lrd lrd; | 946 | struct lrd lrd; |
936 | int lsn; | 947 | int lsn; |
937 | struct logsyncblk *lp; | 948 | struct logsyncblk *lp; |
938 | struct jfs_sb_info *sbi; | ||
939 | unsigned long flags; | 949 | unsigned long flags; |
940 | 950 | ||
941 | /* push dirty metapages out to disk */ | 951 | /* push dirty metapages out to disk */ |
942 | if (hard_sync) | 952 | if (hard_sync) |
943 | list_for_each_entry(sbi, &log->sb_list, log_list) { | 953 | write_special_inodes(log, filemap_fdatawrite); |
944 | filemap_fdatawrite(sbi->ipbmap->i_mapping); | ||
945 | filemap_fdatawrite(sbi->ipimap->i_mapping); | ||
946 | filemap_fdatawrite(sbi->direct_inode->i_mapping); | ||
947 | } | ||
948 | else | 954 | else |
949 | list_for_each_entry(sbi, &log->sb_list, log_list) { | 955 | write_special_inodes(log, filemap_flush); |
950 | filemap_flush(sbi->ipbmap->i_mapping); | ||
951 | filemap_flush(sbi->ipimap->i_mapping); | ||
952 | filemap_flush(sbi->direct_inode->i_mapping); | ||
953 | } | ||
954 | 956 | ||
955 | /* | 957 | /* |
956 | * forward syncpt | 958 | * forward syncpt |
@@ -1536,7 +1538,6 @@ void jfs_flush_journal(struct jfs_log *log, int wait) | |||
1536 | { | 1538 | { |
1537 | int i; | 1539 | int i; |
1538 | struct tblock *target = NULL; | 1540 | struct tblock *target = NULL; |
1539 | struct jfs_sb_info *sbi; | ||
1540 | 1541 | ||
1541 | /* jfs_write_inode may call us during read-only mount */ | 1542 | /* jfs_write_inode may call us during read-only mount */ |
1542 | if (!log) | 1543 | if (!log) |
@@ -1598,11 +1599,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait) | |||
1598 | if (wait < 2) | 1599 | if (wait < 2) |
1599 | return; | 1600 | return; |
1600 | 1601 | ||
1601 | list_for_each_entry(sbi, &log->sb_list, log_list) { | 1602 | write_special_inodes(log, filemap_fdatawrite); |
1602 | filemap_fdatawrite(sbi->ipbmap->i_mapping); | ||
1603 | filemap_fdatawrite(sbi->ipimap->i_mapping); | ||
1604 | filemap_fdatawrite(sbi->direct_inode->i_mapping); | ||
1605 | } | ||
1606 | 1603 | ||
1607 | /* | 1604 | /* |
1608 | * If there was recent activity, we may need to wait | 1605 | * If there was recent activity, we may need to wait |
@@ -1611,6 +1608,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait) | |||
1611 | if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) { | 1608 | if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) { |
1612 | for (i = 0; i < 200; i++) { /* Too much? */ | 1609 | for (i = 0; i < 200; i++) { /* Too much? */ |
1613 | msleep(250); | 1610 | msleep(250); |
1611 | write_special_inodes(log, filemap_fdatawrite); | ||
1614 | if (list_empty(&log->cqueue) && | 1612 | if (list_empty(&log->cqueue) && |
1615 | list_empty(&log->synclist)) | 1613 | list_empty(&log->synclist)) |
1616 | break; | 1614 | break; |
@@ -2347,7 +2345,7 @@ int jfsIOWait(void *arg) | |||
2347 | 2345 | ||
2348 | do { | 2346 | do { |
2349 | spin_lock_irq(&log_redrive_lock); | 2347 | spin_lock_irq(&log_redrive_lock); |
2350 | while ((bp = log_redrive_list) != 0) { | 2348 | while ((bp = log_redrive_list)) { |
2351 | log_redrive_list = bp->l_redrive_next; | 2349 | log_redrive_list = bp->l_redrive_next; |
2352 | bp->l_redrive_next = NULL; | 2350 | bp->l_redrive_next = NULL; |
2353 | spin_unlock_irq(&log_redrive_lock); | 2351 | spin_unlock_irq(&log_redrive_lock); |
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index f5cd8d38af7a..d1e64f2f2fcd 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c | |||
@@ -39,11 +39,11 @@ static struct { | |||
39 | #endif | 39 | #endif |
40 | 40 | ||
41 | #define metapage_locked(mp) test_bit(META_locked, &(mp)->flag) | 41 | #define metapage_locked(mp) test_bit(META_locked, &(mp)->flag) |
42 | #define trylock_metapage(mp) test_and_set_bit(META_locked, &(mp)->flag) | 42 | #define trylock_metapage(mp) test_and_set_bit_lock(META_locked, &(mp)->flag) |
43 | 43 | ||
44 | static inline void unlock_metapage(struct metapage *mp) | 44 | static inline void unlock_metapage(struct metapage *mp) |
45 | { | 45 | { |
46 | clear_bit(META_locked, &mp->flag); | 46 | clear_bit_unlock(META_locked, &mp->flag); |
47 | wake_up(&mp->wait); | 47 | wake_up(&mp->wait); |
48 | } | 48 | } |
49 | 49 | ||
@@ -88,7 +88,7 @@ struct meta_anchor { | |||
88 | }; | 88 | }; |
89 | #define mp_anchor(page) ((struct meta_anchor *)page_private(page)) | 89 | #define mp_anchor(page) ((struct meta_anchor *)page_private(page)) |
90 | 90 | ||
91 | static inline struct metapage *page_to_mp(struct page *page, uint offset) | 91 | static inline struct metapage *page_to_mp(struct page *page, int offset) |
92 | { | 92 | { |
93 | if (!PagePrivate(page)) | 93 | if (!PagePrivate(page)) |
94 | return NULL; | 94 | return NULL; |
@@ -153,7 +153,7 @@ static inline void dec_io(struct page *page, void (*handler) (struct page *)) | |||
153 | } | 153 | } |
154 | 154 | ||
155 | #else | 155 | #else |
156 | static inline struct metapage *page_to_mp(struct page *page, uint offset) | 156 | static inline struct metapage *page_to_mp(struct page *page, int offset) |
157 | { | 157 | { |
158 | return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL; | 158 | return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL; |
159 | } | 159 | } |
@@ -249,7 +249,7 @@ static inline void drop_metapage(struct page *page, struct metapage *mp) | |||
249 | */ | 249 | */ |
250 | 250 | ||
251 | static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock, | 251 | static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock, |
252 | unsigned int *len) | 252 | int *len) |
253 | { | 253 | { |
254 | int rc = 0; | 254 | int rc = 0; |
255 | int xflag; | 255 | int xflag; |
@@ -352,25 +352,27 @@ static void metapage_write_end_io(struct bio *bio, int err) | |||
352 | static int metapage_writepage(struct page *page, struct writeback_control *wbc) | 352 | static int metapage_writepage(struct page *page, struct writeback_control *wbc) |
353 | { | 353 | { |
354 | struct bio *bio = NULL; | 354 | struct bio *bio = NULL; |
355 | unsigned int block_offset; /* block offset of mp within page */ | 355 | int block_offset; /* block offset of mp within page */ |
356 | struct inode *inode = page->mapping->host; | 356 | struct inode *inode = page->mapping->host; |
357 | unsigned int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage; | 357 | int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage; |
358 | unsigned int len; | 358 | int len; |
359 | unsigned int xlen; | 359 | int xlen; |
360 | struct metapage *mp; | 360 | struct metapage *mp; |
361 | int redirty = 0; | 361 | int redirty = 0; |
362 | sector_t lblock; | 362 | sector_t lblock; |
363 | int nr_underway = 0; | ||
363 | sector_t pblock; | 364 | sector_t pblock; |
364 | sector_t next_block = 0; | 365 | sector_t next_block = 0; |
365 | sector_t page_start; | 366 | sector_t page_start; |
366 | unsigned long bio_bytes = 0; | 367 | unsigned long bio_bytes = 0; |
367 | unsigned long bio_offset = 0; | 368 | unsigned long bio_offset = 0; |
368 | unsigned int offset; | 369 | int offset; |
369 | 370 | ||
370 | page_start = (sector_t)page->index << | 371 | page_start = (sector_t)page->index << |
371 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | 372 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
372 | BUG_ON(!PageLocked(page)); | 373 | BUG_ON(!PageLocked(page)); |
373 | BUG_ON(PageWriteback(page)); | 374 | BUG_ON(PageWriteback(page)); |
375 | set_page_writeback(page); | ||
374 | 376 | ||
375 | for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { | 377 | for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { |
376 | mp = page_to_mp(page, offset); | 378 | mp = page_to_mp(page, offset); |
@@ -413,11 +415,10 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) | |||
413 | if (!bio->bi_size) | 415 | if (!bio->bi_size) |
414 | goto dump_bio; | 416 | goto dump_bio; |
415 | submit_bio(WRITE, bio); | 417 | submit_bio(WRITE, bio); |
418 | nr_underway++; | ||
416 | bio = NULL; | 419 | bio = NULL; |
417 | } else { | 420 | } else |
418 | set_page_writeback(page); | ||
419 | inc_io(page); | 421 | inc_io(page); |
420 | } | ||
421 | xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits; | 422 | xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits; |
422 | pblock = metapage_get_blocks(inode, lblock, &xlen); | 423 | pblock = metapage_get_blocks(inode, lblock, &xlen); |
423 | if (!pblock) { | 424 | if (!pblock) { |
@@ -427,7 +428,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) | |||
427 | continue; | 428 | continue; |
428 | } | 429 | } |
429 | set_bit(META_io, &mp->flag); | 430 | set_bit(META_io, &mp->flag); |
430 | len = min(xlen, (uint) JFS_SBI(inode->i_sb)->nbperpage); | 431 | len = min(xlen, (int)JFS_SBI(inode->i_sb)->nbperpage); |
431 | 432 | ||
432 | bio = bio_alloc(GFP_NOFS, 1); | 433 | bio = bio_alloc(GFP_NOFS, 1); |
433 | bio->bi_bdev = inode->i_sb->s_bdev; | 434 | bio->bi_bdev = inode->i_sb->s_bdev; |
@@ -449,12 +450,16 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) | |||
449 | goto dump_bio; | 450 | goto dump_bio; |
450 | 451 | ||
451 | submit_bio(WRITE, bio); | 452 | submit_bio(WRITE, bio); |
453 | nr_underway++; | ||
452 | } | 454 | } |
453 | if (redirty) | 455 | if (redirty) |
454 | redirty_page_for_writepage(wbc, page); | 456 | redirty_page_for_writepage(wbc, page); |
455 | 457 | ||
456 | unlock_page(page); | 458 | unlock_page(page); |
457 | 459 | ||
460 | if (nr_underway == 0) | ||
461 | end_page_writeback(page); | ||
462 | |||
458 | return 0; | 463 | return 0; |
459 | add_failed: | 464 | add_failed: |
460 | /* We should never reach here, since we're only adding one vec */ | 465 | /* We should never reach here, since we're only adding one vec */ |
@@ -475,13 +480,13 @@ static int metapage_readpage(struct file *fp, struct page *page) | |||
475 | { | 480 | { |
476 | struct inode *inode = page->mapping->host; | 481 | struct inode *inode = page->mapping->host; |
477 | struct bio *bio = NULL; | 482 | struct bio *bio = NULL; |
478 | unsigned int block_offset; | 483 | int block_offset; |
479 | unsigned int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits; | 484 | int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits; |
480 | sector_t page_start; /* address of page in fs blocks */ | 485 | sector_t page_start; /* address of page in fs blocks */ |
481 | sector_t pblock; | 486 | sector_t pblock; |
482 | unsigned int xlen; | 487 | int xlen; |
483 | unsigned int len; | 488 | unsigned int len; |
484 | unsigned int offset; | 489 | int offset; |
485 | 490 | ||
486 | BUG_ON(!PageLocked(page)); | 491 | BUG_ON(!PageLocked(page)); |
487 | page_start = (sector_t)page->index << | 492 | page_start = (sector_t)page->index << |
@@ -530,7 +535,7 @@ static int metapage_releasepage(struct page *page, gfp_t gfp_mask) | |||
530 | { | 535 | { |
531 | struct metapage *mp; | 536 | struct metapage *mp; |
532 | int ret = 1; | 537 | int ret = 1; |
533 | unsigned int offset; | 538 | int offset; |
534 | 539 | ||
535 | for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { | 540 | for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { |
536 | mp = page_to_mp(page, offset); | 541 | mp = page_to_mp(page, offset); |
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c index 644429acb8c0..7b698f2ec45a 100644 --- a/fs/jfs/jfs_mount.c +++ b/fs/jfs/jfs_mount.c | |||
@@ -147,7 +147,7 @@ int jfs_mount(struct super_block *sb) | |||
147 | */ | 147 | */ |
148 | if ((sbi->mntflag & JFS_BAD_SAIT) == 0) { | 148 | if ((sbi->mntflag & JFS_BAD_SAIT) == 0) { |
149 | ipaimap2 = diReadSpecial(sb, AGGREGATE_I, 1); | 149 | ipaimap2 = diReadSpecial(sb, AGGREGATE_I, 1); |
150 | if (ipaimap2 == 0) { | 150 | if (!ipaimap2) { |
151 | jfs_err("jfs_mount: Faild to read AGGREGATE_I"); | 151 | jfs_err("jfs_mount: Faild to read AGGREGATE_I"); |
152 | rc = -EIO; | 152 | rc = -EIO; |
153 | goto errout35; | 153 | goto errout35; |
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c index 7971f37534a3..adcf92d3b603 100644 --- a/fs/jfs/jfs_umount.c +++ b/fs/jfs/jfs_umount.c | |||
@@ -68,7 +68,7 @@ int jfs_umount(struct super_block *sb) | |||
68 | /* | 68 | /* |
69 | * Wait for outstanding transactions to be written to log: | 69 | * Wait for outstanding transactions to be written to log: |
70 | */ | 70 | */ |
71 | jfs_flush_journal(log, 2); | 71 | jfs_flush_journal(log, 1); |
72 | 72 | ||
73 | /* | 73 | /* |
74 | * close fileset inode allocation map (aka fileset inode) | 74 | * close fileset inode allocation map (aka fileset inode) |
@@ -146,7 +146,7 @@ int jfs_umount_rw(struct super_block *sb) | |||
146 | * | 146 | * |
147 | * remove file system from log active file system list. | 147 | * remove file system from log active file system list. |
148 | */ | 148 | */ |
149 | jfs_flush_journal(log, 2); | 149 | jfs_flush_journal(log, 1); |
150 | 150 | ||
151 | /* | 151 | /* |
152 | * Make sure all metadata makes it to disk | 152 | * Make sure all metadata makes it to disk |
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 4e0a8493cef6..f8718de3505e 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
@@ -1103,8 +1103,8 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1103 | * Make sure dest inode number (if any) is what we think it is | 1103 | * Make sure dest inode number (if any) is what we think it is |
1104 | */ | 1104 | */ |
1105 | rc = dtSearch(new_dir, &new_dname, &ino, &btstack, JFS_LOOKUP); | 1105 | rc = dtSearch(new_dir, &new_dname, &ino, &btstack, JFS_LOOKUP); |
1106 | if (rc == 0) { | 1106 | if (!rc) { |
1107 | if ((new_ip == 0) || (ino != new_ip->i_ino)) { | 1107 | if ((!new_ip) || (ino != new_ip->i_ino)) { |
1108 | rc = -ESTALE; | 1108 | rc = -ESTALE; |
1109 | goto out3; | 1109 | goto out3; |
1110 | } | 1110 | } |
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index 71984ee95346..7f24a0bb08ca 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c | |||
@@ -172,7 +172,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) | |||
172 | */ | 172 | */ |
173 | t64 = ((newLVSize - newLogSize + BPERDMAP - 1) >> L2BPERDMAP) | 173 | t64 = ((newLVSize - newLogSize + BPERDMAP - 1) >> L2BPERDMAP) |
174 | << L2BPERDMAP; | 174 | << L2BPERDMAP; |
175 | t32 = ((t64 + (BITSPERPAGE - 1)) / BITSPERPAGE) + 1 + 50; | 175 | t32 = DIV_ROUND_UP(t64, BITSPERPAGE) + 1 + 50; |
176 | newFSCKSize = t32 << sbi->l2nbperpage; | 176 | newFSCKSize = t32 << sbi->l2nbperpage; |
177 | newFSCKAddress = newLogAddress - newFSCKSize; | 177 | newFSCKAddress = newLogAddress - newFSCKSize; |
178 | 178 | ||
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 314bb4ff1ba8..70a14001c98f 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -598,6 +598,12 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
598 | seq_printf(seq, ",umask=%03o", sbi->umask); | 598 | seq_printf(seq, ",umask=%03o", sbi->umask); |
599 | if (sbi->flag & JFS_NOINTEGRITY) | 599 | if (sbi->flag & JFS_NOINTEGRITY) |
600 | seq_puts(seq, ",nointegrity"); | 600 | seq_puts(seq, ",nointegrity"); |
601 | if (sbi->nls_tab) | ||
602 | seq_printf(seq, ",iocharset=%s", sbi->nls_tab->charset); | ||
603 | if (sbi->flag & JFS_ERR_CONTINUE) | ||
604 | seq_printf(seq, ",errors=continue"); | ||
605 | if (sbi->flag & JFS_ERR_PANIC) | ||
606 | seq_printf(seq, ",errors=panic"); | ||
601 | 607 | ||
602 | #ifdef CONFIG_QUOTA | 608 | #ifdef CONFIG_QUOTA |
603 | if (sbi->flag & JFS_USRQUOTA) | 609 | if (sbi->flag & JFS_USRQUOTA) |
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index d070b18e539d..0b45fd3a4bfd 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c | |||
@@ -41,6 +41,48 @@ struct nlm_wait { | |||
41 | 41 | ||
42 | static LIST_HEAD(nlm_blocked); | 42 | static LIST_HEAD(nlm_blocked); |
43 | 43 | ||
44 | /** | ||
45 | * nlmclnt_init - Set up per-NFS mount point lockd data structures | ||
46 | * @nlm_init: pointer to arguments structure | ||
47 | * | ||
48 | * Returns pointer to an appropriate nlm_host struct, | ||
49 | * or an ERR_PTR value. | ||
50 | */ | ||
51 | struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) | ||
52 | { | ||
53 | struct nlm_host *host; | ||
54 | u32 nlm_version = (nlm_init->nfs_version == 2) ? 1 : 4; | ||
55 | int status; | ||
56 | |||
57 | status = lockd_up(nlm_init->protocol); | ||
58 | if (status < 0) | ||
59 | return ERR_PTR(status); | ||
60 | |||
61 | host = nlmclnt_lookup_host((struct sockaddr_in *)nlm_init->address, | ||
62 | nlm_init->protocol, nlm_version, | ||
63 | nlm_init->hostname, | ||
64 | strlen(nlm_init->hostname)); | ||
65 | if (host == NULL) { | ||
66 | lockd_down(); | ||
67 | return ERR_PTR(-ENOLCK); | ||
68 | } | ||
69 | |||
70 | return host; | ||
71 | } | ||
72 | EXPORT_SYMBOL_GPL(nlmclnt_init); | ||
73 | |||
74 | /** | ||
75 | * nlmclnt_done - Release resources allocated by nlmclnt_init() | ||
76 | * @host: nlm_host structure reserved by nlmclnt_init() | ||
77 | * | ||
78 | */ | ||
79 | void nlmclnt_done(struct nlm_host *host) | ||
80 | { | ||
81 | nlm_release_host(host); | ||
82 | lockd_down(); | ||
83 | } | ||
84 | EXPORT_SYMBOL_GPL(nlmclnt_done); | ||
85 | |||
44 | /* | 86 | /* |
45 | * Queue up a lock for blocking so that the GRANTED request can see it | 87 | * Queue up a lock for blocking so that the GRANTED request can see it |
46 | */ | 88 | */ |
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index a10343bed160..b6b74a60e1eb 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c | |||
@@ -145,34 +145,21 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req) | |||
145 | BUG_ON(req->a_args.lock.fl.fl_ops != NULL); | 145 | BUG_ON(req->a_args.lock.fl.fl_ops != NULL); |
146 | } | 146 | } |
147 | 147 | ||
148 | /* | 148 | /** |
149 | * This is the main entry point for the NLM client. | 149 | * nlmclnt_proc - Perform a single client-side lock request |
150 | * @host: address of a valid nlm_host context representing the NLM server | ||
151 | * @cmd: fcntl-style file lock operation to perform | ||
152 | * @fl: address of arguments for the lock operation | ||
153 | * | ||
150 | */ | 154 | */ |
151 | int | 155 | int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) |
152 | nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) | ||
153 | { | 156 | { |
154 | struct rpc_clnt *client = NFS_CLIENT(inode); | ||
155 | struct sockaddr_in addr; | ||
156 | struct nfs_server *nfssrv = NFS_SERVER(inode); | ||
157 | struct nlm_host *host; | ||
158 | struct nlm_rqst *call; | 157 | struct nlm_rqst *call; |
159 | sigset_t oldset; | 158 | sigset_t oldset; |
160 | unsigned long flags; | 159 | unsigned long flags; |
161 | int status, vers; | 160 | int status; |
162 | |||
163 | vers = (NFS_PROTO(inode)->version == 3) ? 4 : 1; | ||
164 | if (NFS_PROTO(inode)->version > 3) { | ||
165 | printk(KERN_NOTICE "NFSv4 file locking not implemented!\n"); | ||
166 | return -ENOLCK; | ||
167 | } | ||
168 | |||
169 | rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr)); | ||
170 | host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers, | ||
171 | nfssrv->nfs_client->cl_hostname, | ||
172 | strlen(nfssrv->nfs_client->cl_hostname)); | ||
173 | if (host == NULL) | ||
174 | return -ENOLCK; | ||
175 | 161 | ||
162 | nlm_get_host(host); | ||
176 | call = nlm_alloc_call(host); | 163 | call = nlm_alloc_call(host); |
177 | if (call == NULL) | 164 | if (call == NULL) |
178 | return -ENOMEM; | 165 | return -ENOMEM; |
@@ -219,7 +206,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) | |||
219 | dprintk("lockd: clnt proc returns %d\n", status); | 206 | dprintk("lockd: clnt proc returns %d\n", status); |
220 | return status; | 207 | return status; |
221 | } | 208 | } |
222 | EXPORT_SYMBOL(nlmclnt_proc); | 209 | EXPORT_SYMBOL_GPL(nlmclnt_proc); |
223 | 210 | ||
224 | /* | 211 | /* |
225 | * Allocate an NLM RPC call struct | 212 | * Allocate an NLM RPC call struct |
@@ -257,7 +244,7 @@ void nlm_release_call(struct nlm_rqst *call) | |||
257 | 244 | ||
258 | static void nlmclnt_rpc_release(void *data) | 245 | static void nlmclnt_rpc_release(void *data) |
259 | { | 246 | { |
260 | return nlm_release_call(data); | 247 | nlm_release_call(data); |
261 | } | 248 | } |
262 | 249 | ||
263 | static int nlm_wait_on_grace(wait_queue_head_t *queue) | 250 | static int nlm_wait_on_grace(wait_queue_head_t *queue) |
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 633653bff944..3e459e18cc31 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c | |||
@@ -612,8 +612,7 @@ const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) | |||
612 | * called with BKL held. | 612 | * called with BKL held. |
613 | */ | 613 | */ |
614 | static char buf[2*NLM_MAXCOOKIELEN+1]; | 614 | static char buf[2*NLM_MAXCOOKIELEN+1]; |
615 | int i; | 615 | unsigned int i, len = sizeof(buf); |
616 | int len = sizeof(buf); | ||
617 | char *p = buf; | 616 | char *p = buf; |
618 | 617 | ||
619 | len--; /* allow for trailing \0 */ | 618 | len--; /* allow for trailing \0 */ |
diff --git a/fs/namei.c b/fs/namei.c index 3b993db26cee..73e2e665817a 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1605,7 +1605,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) | |||
1605 | if (S_ISLNK(inode->i_mode)) | 1605 | if (S_ISLNK(inode->i_mode)) |
1606 | return -ELOOP; | 1606 | return -ELOOP; |
1607 | 1607 | ||
1608 | if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) | 1608 | if (S_ISDIR(inode->i_mode) && (acc_mode & MAY_WRITE)) |
1609 | return -EISDIR; | 1609 | return -EISDIR; |
1610 | 1610 | ||
1611 | /* | 1611 | /* |
@@ -1620,7 +1620,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) | |||
1620 | return -EACCES; | 1620 | return -EACCES; |
1621 | 1621 | ||
1622 | flag &= ~O_TRUNC; | 1622 | flag &= ~O_TRUNC; |
1623 | } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE)) | 1623 | } else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE)) |
1624 | return -EROFS; | 1624 | return -EROFS; |
1625 | 1625 | ||
1626 | error = vfs_permission(nd, acc_mode); | 1626 | error = vfs_permission(nd, acc_mode); |
diff --git a/fs/namespace.c b/fs/namespace.c index 06083885b21e..61bf376e29e8 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -41,8 +41,8 @@ static struct kmem_cache *mnt_cache __read_mostly; | |||
41 | static struct rw_semaphore namespace_sem; | 41 | static struct rw_semaphore namespace_sem; |
42 | 42 | ||
43 | /* /sys/fs */ | 43 | /* /sys/fs */ |
44 | decl_subsys(fs, NULL, NULL); | 44 | struct kobject *fs_kobj; |
45 | EXPORT_SYMBOL_GPL(fs_subsys); | 45 | EXPORT_SYMBOL_GPL(fs_kobj); |
46 | 46 | ||
47 | static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) | 47 | static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) |
48 | { | 48 | { |
@@ -1861,10 +1861,9 @@ void __init mnt_init(void) | |||
1861 | if (err) | 1861 | if (err) |
1862 | printk(KERN_WARNING "%s: sysfs_init error: %d\n", | 1862 | printk(KERN_WARNING "%s: sysfs_init error: %d\n", |
1863 | __FUNCTION__, err); | 1863 | __FUNCTION__, err); |
1864 | err = subsystem_register(&fs_subsys); | 1864 | fs_kobj = kobject_create_and_add("fs", NULL); |
1865 | if (err) | 1865 | if (!fs_kobj) |
1866 | printk(KERN_WARNING "%s: subsystem_register error: %d\n", | 1866 | printk(KERN_WARNING "%s: kobj create error\n", __FUNCTION__); |
1867 | __FUNCTION__, err); | ||
1868 | init_rootfs(); | 1867 | init_rootfs(); |
1869 | init_mount_tree(); | 1868 | init_mount_tree(); |
1870 | } | 1869 | } |
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index a796be5051bf..9b6bbf1b9787 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -73,8 +73,6 @@ static void nfs_callback_svc(struct svc_rqst *rqstp) | |||
73 | complete(&nfs_callback_info.started); | 73 | complete(&nfs_callback_info.started); |
74 | 74 | ||
75 | for(;;) { | 75 | for(;;) { |
76 | char buf[RPC_MAX_ADDRBUFLEN]; | ||
77 | |||
78 | if (signalled()) { | 76 | if (signalled()) { |
79 | if (nfs_callback_info.users == 0) | 77 | if (nfs_callback_info.users == 0) |
80 | break; | 78 | break; |
@@ -92,8 +90,6 @@ static void nfs_callback_svc(struct svc_rqst *rqstp) | |||
92 | __FUNCTION__, -err); | 90 | __FUNCTION__, -err); |
93 | break; | 91 | break; |
94 | } | 92 | } |
95 | dprintk("%s: request from %s\n", __FUNCTION__, | ||
96 | svc_print_addr(rqstp, buf, sizeof(buf))); | ||
97 | svc_process(rqstp); | 93 | svc_process(rqstp); |
98 | } | 94 | } |
99 | 95 | ||
@@ -168,12 +164,11 @@ void nfs_callback_down(void) | |||
168 | 164 | ||
169 | static int nfs_callback_authenticate(struct svc_rqst *rqstp) | 165 | static int nfs_callback_authenticate(struct svc_rqst *rqstp) |
170 | { | 166 | { |
171 | struct sockaddr_in *addr = svc_addr_in(rqstp); | ||
172 | struct nfs_client *clp; | 167 | struct nfs_client *clp; |
173 | char buf[RPC_MAX_ADDRBUFLEN]; | 168 | char buf[RPC_MAX_ADDRBUFLEN]; |
174 | 169 | ||
175 | /* Don't talk to strangers */ | 170 | /* Don't talk to strangers */ |
176 | clp = nfs_find_client(addr, 4); | 171 | clp = nfs_find_client(svc_addr(rqstp), 4); |
177 | if (clp == NULL) | 172 | if (clp == NULL) |
178 | return SVC_DROP; | 173 | return SVC_DROP; |
179 | 174 | ||
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index c2bb14e053e1..bb25d2135ff1 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h | |||
@@ -38,7 +38,7 @@ struct cb_compound_hdr_res { | |||
38 | }; | 38 | }; |
39 | 39 | ||
40 | struct cb_getattrargs { | 40 | struct cb_getattrargs { |
41 | struct sockaddr_in *addr; | 41 | struct sockaddr *addr; |
42 | struct nfs_fh fh; | 42 | struct nfs_fh fh; |
43 | uint32_t bitmap[2]; | 43 | uint32_t bitmap[2]; |
44 | }; | 44 | }; |
@@ -53,7 +53,7 @@ struct cb_getattrres { | |||
53 | }; | 53 | }; |
54 | 54 | ||
55 | struct cb_recallargs { | 55 | struct cb_recallargs { |
56 | struct sockaddr_in *addr; | 56 | struct sockaddr *addr; |
57 | struct nfs_fh fh; | 57 | struct nfs_fh fh; |
58 | nfs4_stateid stateid; | 58 | nfs4_stateid stateid; |
59 | uint32_t truncate; | 59 | uint32_t truncate; |
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 72e55d83756d..15f7785048d3 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -12,7 +12,9 @@ | |||
12 | #include "delegation.h" | 12 | #include "delegation.h" |
13 | #include "internal.h" | 13 | #include "internal.h" |
14 | 14 | ||
15 | #ifdef NFS_DEBUG | ||
15 | #define NFSDBG_FACILITY NFSDBG_CALLBACK | 16 | #define NFSDBG_FACILITY NFSDBG_CALLBACK |
17 | #endif | ||
16 | 18 | ||
17 | __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res) | 19 | __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res) |
18 | { | 20 | { |
@@ -20,12 +22,16 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres * | |||
20 | struct nfs_delegation *delegation; | 22 | struct nfs_delegation *delegation; |
21 | struct nfs_inode *nfsi; | 23 | struct nfs_inode *nfsi; |
22 | struct inode *inode; | 24 | struct inode *inode; |
23 | 25 | ||
24 | res->bitmap[0] = res->bitmap[1] = 0; | 26 | res->bitmap[0] = res->bitmap[1] = 0; |
25 | res->status = htonl(NFS4ERR_BADHANDLE); | 27 | res->status = htonl(NFS4ERR_BADHANDLE); |
26 | clp = nfs_find_client(args->addr, 4); | 28 | clp = nfs_find_client(args->addr, 4); |
27 | if (clp == NULL) | 29 | if (clp == NULL) |
28 | goto out; | 30 | goto out; |
31 | |||
32 | dprintk("NFS: GETATTR callback request from %s\n", | ||
33 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); | ||
34 | |||
29 | inode = nfs_delegation_find_inode(clp, &args->fh); | 35 | inode = nfs_delegation_find_inode(clp, &args->fh); |
30 | if (inode == NULL) | 36 | if (inode == NULL) |
31 | goto out_putclient; | 37 | goto out_putclient; |
@@ -65,23 +71,32 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy) | |||
65 | clp = nfs_find_client(args->addr, 4); | 71 | clp = nfs_find_client(args->addr, 4); |
66 | if (clp == NULL) | 72 | if (clp == NULL) |
67 | goto out; | 73 | goto out; |
68 | inode = nfs_delegation_find_inode(clp, &args->fh); | 74 | |
69 | if (inode == NULL) | 75 | dprintk("NFS: RECALL callback request from %s\n", |
70 | goto out_putclient; | 76 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); |
71 | /* Set up a helper thread to actually return the delegation */ | 77 | |
72 | switch(nfs_async_inode_return_delegation(inode, &args->stateid)) { | 78 | do { |
73 | case 0: | 79 | struct nfs_client *prev = clp; |
74 | res = 0; | 80 | |
75 | break; | 81 | inode = nfs_delegation_find_inode(clp, &args->fh); |
76 | case -ENOENT: | 82 | if (inode != NULL) { |
77 | res = htonl(NFS4ERR_BAD_STATEID); | 83 | /* Set up a helper thread to actually return the delegation */ |
78 | break; | 84 | switch(nfs_async_inode_return_delegation(inode, &args->stateid)) { |
79 | default: | 85 | case 0: |
80 | res = htonl(NFS4ERR_RESOURCE); | 86 | res = 0; |
81 | } | 87 | break; |
82 | iput(inode); | 88 | case -ENOENT: |
83 | out_putclient: | 89 | if (res != 0) |
84 | nfs_put_client(clp); | 90 | res = htonl(NFS4ERR_BAD_STATEID); |
91 | break; | ||
92 | default: | ||
93 | res = htonl(NFS4ERR_RESOURCE); | ||
94 | } | ||
95 | iput(inode); | ||
96 | } | ||
97 | clp = nfs_find_client_next(prev); | ||
98 | nfs_put_client(prev); | ||
99 | } while (clp != NULL); | ||
85 | out: | 100 | out: |
86 | dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res)); | 101 | dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res)); |
87 | return res; | 102 | return res; |
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 058ade7efe79..c63eb720b68b 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c | |||
@@ -139,7 +139,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound | |||
139 | if (unlikely(status != 0)) | 139 | if (unlikely(status != 0)) |
140 | return status; | 140 | return status; |
141 | /* We do not like overly long tags! */ | 141 | /* We do not like overly long tags! */ |
142 | if (hdr->taglen > CB_OP_TAGLEN_MAXSZ-12 || hdr->taglen < 0) { | 142 | if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) { |
143 | printk("NFSv4 CALLBACK %s: client sent tag of length %u\n", | 143 | printk("NFSv4 CALLBACK %s: client sent tag of length %u\n", |
144 | __FUNCTION__, hdr->taglen); | 144 | __FUNCTION__, hdr->taglen); |
145 | return htonl(NFS4ERR_RESOURCE); | 145 | return htonl(NFS4ERR_RESOURCE); |
@@ -176,7 +176,7 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr | |||
176 | status = decode_fh(xdr, &args->fh); | 176 | status = decode_fh(xdr, &args->fh); |
177 | if (unlikely(status != 0)) | 177 | if (unlikely(status != 0)) |
178 | goto out; | 178 | goto out; |
179 | args->addr = svc_addr_in(rqstp); | 179 | args->addr = svc_addr(rqstp); |
180 | status = decode_bitmap(xdr, args->bitmap); | 180 | status = decode_bitmap(xdr, args->bitmap); |
181 | out: | 181 | out: |
182 | dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(status)); | 182 | dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(status)); |
@@ -188,7 +188,7 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, | |||
188 | __be32 *p; | 188 | __be32 *p; |
189 | __be32 status; | 189 | __be32 status; |
190 | 190 | ||
191 | args->addr = svc_addr_in(rqstp); | 191 | args->addr = svc_addr(rqstp); |
192 | status = decode_stateid(xdr, &args->stateid); | 192 | status = decode_stateid(xdr, &args->stateid); |
193 | if (unlikely(status != 0)) | 193 | if (unlikely(status != 0)) |
194 | goto out; | 194 | goto out; |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 310fa2f4cbb8..c5c0175898f6 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -34,6 +34,8 @@ | |||
34 | #include <linux/nfs_idmap.h> | 34 | #include <linux/nfs_idmap.h> |
35 | #include <linux/vfs.h> | 35 | #include <linux/vfs.h> |
36 | #include <linux/inet.h> | 36 | #include <linux/inet.h> |
37 | #include <linux/in6.h> | ||
38 | #include <net/ipv6.h> | ||
37 | #include <linux/nfs_xdr.h> | 39 | #include <linux/nfs_xdr.h> |
38 | 40 | ||
39 | #include <asm/system.h> | 41 | #include <asm/system.h> |
@@ -93,22 +95,30 @@ struct rpc_program nfsacl_program = { | |||
93 | }; | 95 | }; |
94 | #endif /* CONFIG_NFS_V3_ACL */ | 96 | #endif /* CONFIG_NFS_V3_ACL */ |
95 | 97 | ||
98 | struct nfs_client_initdata { | ||
99 | const char *hostname; | ||
100 | const struct sockaddr *addr; | ||
101 | size_t addrlen; | ||
102 | const struct nfs_rpc_ops *rpc_ops; | ||
103 | int proto; | ||
104 | }; | ||
105 | |||
96 | /* | 106 | /* |
97 | * Allocate a shared client record | 107 | * Allocate a shared client record |
98 | * | 108 | * |
99 | * Since these are allocated/deallocated very rarely, we don't | 109 | * Since these are allocated/deallocated very rarely, we don't |
100 | * bother putting them in a slab cache... | 110 | * bother putting them in a slab cache... |
101 | */ | 111 | */ |
102 | static struct nfs_client *nfs_alloc_client(const char *hostname, | 112 | static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) |
103 | const struct sockaddr_in *addr, | ||
104 | int nfsversion) | ||
105 | { | 113 | { |
106 | struct nfs_client *clp; | 114 | struct nfs_client *clp; |
107 | 115 | ||
108 | if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) | 116 | if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) |
109 | goto error_0; | 117 | goto error_0; |
110 | 118 | ||
111 | if (nfsversion == 4) { | 119 | clp->rpc_ops = cl_init->rpc_ops; |
120 | |||
121 | if (cl_init->rpc_ops->version == 4) { | ||
112 | if (nfs_callback_up() < 0) | 122 | if (nfs_callback_up() < 0) |
113 | goto error_2; | 123 | goto error_2; |
114 | __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); | 124 | __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); |
@@ -117,11 +127,11 @@ static struct nfs_client *nfs_alloc_client(const char *hostname, | |||
117 | atomic_set(&clp->cl_count, 1); | 127 | atomic_set(&clp->cl_count, 1); |
118 | clp->cl_cons_state = NFS_CS_INITING; | 128 | clp->cl_cons_state = NFS_CS_INITING; |
119 | 129 | ||
120 | clp->cl_nfsversion = nfsversion; | 130 | memcpy(&clp->cl_addr, cl_init->addr, cl_init->addrlen); |
121 | memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr)); | 131 | clp->cl_addrlen = cl_init->addrlen; |
122 | 132 | ||
123 | if (hostname) { | 133 | if (cl_init->hostname) { |
124 | clp->cl_hostname = kstrdup(hostname, GFP_KERNEL); | 134 | clp->cl_hostname = kstrdup(cl_init->hostname, GFP_KERNEL); |
125 | if (!clp->cl_hostname) | 135 | if (!clp->cl_hostname) |
126 | goto error_3; | 136 | goto error_3; |
127 | } | 137 | } |
@@ -129,6 +139,8 @@ static struct nfs_client *nfs_alloc_client(const char *hostname, | |||
129 | INIT_LIST_HEAD(&clp->cl_superblocks); | 139 | INIT_LIST_HEAD(&clp->cl_superblocks); |
130 | clp->cl_rpcclient = ERR_PTR(-EINVAL); | 140 | clp->cl_rpcclient = ERR_PTR(-EINVAL); |
131 | 141 | ||
142 | clp->cl_proto = cl_init->proto; | ||
143 | |||
132 | #ifdef CONFIG_NFS_V4 | 144 | #ifdef CONFIG_NFS_V4 |
133 | init_rwsem(&clp->cl_sem); | 145 | init_rwsem(&clp->cl_sem); |
134 | INIT_LIST_HEAD(&clp->cl_delegations); | 146 | INIT_LIST_HEAD(&clp->cl_delegations); |
@@ -166,7 +178,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp) | |||
166 | */ | 178 | */ |
167 | static void nfs_free_client(struct nfs_client *clp) | 179 | static void nfs_free_client(struct nfs_client *clp) |
168 | { | 180 | { |
169 | dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion); | 181 | dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version); |
170 | 182 | ||
171 | nfs4_shutdown_client(clp); | 183 | nfs4_shutdown_client(clp); |
172 | 184 | ||
@@ -203,76 +215,148 @@ void nfs_put_client(struct nfs_client *clp) | |||
203 | } | 215 | } |
204 | } | 216 | } |
205 | 217 | ||
218 | static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1, | ||
219 | const struct sockaddr_in *sa2) | ||
220 | { | ||
221 | return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr; | ||
222 | } | ||
223 | |||
224 | static int nfs_sockaddr_match_ipaddr6(const struct sockaddr_in6 *sa1, | ||
225 | const struct sockaddr_in6 *sa2) | ||
226 | { | ||
227 | return ipv6_addr_equal(&sa1->sin6_addr, &sa2->sin6_addr); | ||
228 | } | ||
229 | |||
230 | static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, | ||
231 | const struct sockaddr *sa2) | ||
232 | { | ||
233 | switch (sa1->sa_family) { | ||
234 | case AF_INET: | ||
235 | return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1, | ||
236 | (const struct sockaddr_in *)sa2); | ||
237 | case AF_INET6: | ||
238 | return nfs_sockaddr_match_ipaddr6((const struct sockaddr_in6 *)sa1, | ||
239 | (const struct sockaddr_in6 *)sa2); | ||
240 | } | ||
241 | BUG(); | ||
242 | } | ||
243 | |||
206 | /* | 244 | /* |
207 | * Find a client by address | 245 | * Find a client by IP address and protocol version |
208 | * - caller must hold nfs_client_lock | 246 | * - returns NULL if no such client |
209 | */ | 247 | */ |
210 | static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion, int match_port) | 248 | struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion) |
211 | { | 249 | { |
212 | struct nfs_client *clp; | 250 | struct nfs_client *clp; |
213 | 251 | ||
252 | spin_lock(&nfs_client_lock); | ||
214 | list_for_each_entry(clp, &nfs_client_list, cl_share_link) { | 253 | list_for_each_entry(clp, &nfs_client_list, cl_share_link) { |
254 | struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; | ||
255 | |||
215 | /* Don't match clients that failed to initialise properly */ | 256 | /* Don't match clients that failed to initialise properly */ |
216 | if (clp->cl_cons_state < 0) | 257 | if (clp->cl_cons_state != NFS_CS_READY) |
217 | continue; | 258 | continue; |
218 | 259 | ||
219 | /* Different NFS versions cannot share the same nfs_client */ | 260 | /* Different NFS versions cannot share the same nfs_client */ |
220 | if (clp->cl_nfsversion != nfsversion) | 261 | if (clp->rpc_ops->version != nfsversion) |
221 | continue; | 262 | continue; |
222 | 263 | ||
223 | if (memcmp(&clp->cl_addr.sin_addr, &addr->sin_addr, | 264 | if (addr->sa_family != clap->sa_family) |
224 | sizeof(clp->cl_addr.sin_addr)) != 0) | 265 | continue; |
266 | /* Match only the IP address, not the port number */ | ||
267 | if (!nfs_sockaddr_match_ipaddr(addr, clap)) | ||
225 | continue; | 268 | continue; |
226 | 269 | ||
227 | if (!match_port || clp->cl_addr.sin_port == addr->sin_port) | 270 | atomic_inc(&clp->cl_count); |
228 | goto found; | 271 | spin_unlock(&nfs_client_lock); |
272 | return clp; | ||
229 | } | 273 | } |
230 | 274 | spin_unlock(&nfs_client_lock); | |
231 | return NULL; | 275 | return NULL; |
232 | |||
233 | found: | ||
234 | atomic_inc(&clp->cl_count); | ||
235 | return clp; | ||
236 | } | 276 | } |
237 | 277 | ||
238 | /* | 278 | /* |
239 | * Find a client by IP address and protocol version | 279 | * Find a client by IP address and protocol version |
240 | * - returns NULL if no such client | 280 | * - returns NULL if no such client |
241 | */ | 281 | */ |
242 | struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion) | 282 | struct nfs_client *nfs_find_client_next(struct nfs_client *clp) |
243 | { | 283 | { |
244 | struct nfs_client *clp; | 284 | struct sockaddr *sap = (struct sockaddr *)&clp->cl_addr; |
285 | u32 nfsvers = clp->rpc_ops->version; | ||
245 | 286 | ||
246 | spin_lock(&nfs_client_lock); | 287 | spin_lock(&nfs_client_lock); |
247 | clp = __nfs_find_client(addr, nfsversion, 0); | 288 | list_for_each_entry_continue(clp, &nfs_client_list, cl_share_link) { |
289 | struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; | ||
290 | |||
291 | /* Don't match clients that failed to initialise properly */ | ||
292 | if (clp->cl_cons_state != NFS_CS_READY) | ||
293 | continue; | ||
294 | |||
295 | /* Different NFS versions cannot share the same nfs_client */ | ||
296 | if (clp->rpc_ops->version != nfsvers) | ||
297 | continue; | ||
298 | |||
299 | if (sap->sa_family != clap->sa_family) | ||
300 | continue; | ||
301 | /* Match only the IP address, not the port number */ | ||
302 | if (!nfs_sockaddr_match_ipaddr(sap, clap)) | ||
303 | continue; | ||
304 | |||
305 | atomic_inc(&clp->cl_count); | ||
306 | spin_unlock(&nfs_client_lock); | ||
307 | return clp; | ||
308 | } | ||
248 | spin_unlock(&nfs_client_lock); | 309 | spin_unlock(&nfs_client_lock); |
249 | if (clp != NULL && clp->cl_cons_state != NFS_CS_READY) { | 310 | return NULL; |
250 | nfs_put_client(clp); | 311 | } |
251 | clp = NULL; | 312 | |
313 | /* | ||
314 | * Find an nfs_client on the list that matches the initialisation data | ||
315 | * that is supplied. | ||
316 | */ | ||
317 | static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *data) | ||
318 | { | ||
319 | struct nfs_client *clp; | ||
320 | |||
321 | list_for_each_entry(clp, &nfs_client_list, cl_share_link) { | ||
322 | /* Don't match clients that failed to initialise properly */ | ||
323 | if (clp->cl_cons_state < 0) | ||
324 | continue; | ||
325 | |||
326 | /* Different NFS versions cannot share the same nfs_client */ | ||
327 | if (clp->rpc_ops != data->rpc_ops) | ||
328 | continue; | ||
329 | |||
330 | if (clp->cl_proto != data->proto) | ||
331 | continue; | ||
332 | |||
333 | /* Match the full socket address */ | ||
334 | if (memcmp(&clp->cl_addr, data->addr, sizeof(clp->cl_addr)) != 0) | ||
335 | continue; | ||
336 | |||
337 | atomic_inc(&clp->cl_count); | ||
338 | return clp; | ||
252 | } | 339 | } |
253 | return clp; | 340 | return NULL; |
254 | } | 341 | } |
255 | 342 | ||
256 | /* | 343 | /* |
257 | * Look up a client by IP address and protocol version | 344 | * Look up a client by IP address and protocol version |
258 | * - creates a new record if one doesn't yet exist | 345 | * - creates a new record if one doesn't yet exist |
259 | */ | 346 | */ |
260 | static struct nfs_client *nfs_get_client(const char *hostname, | 347 | static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) |
261 | const struct sockaddr_in *addr, | ||
262 | int nfsversion) | ||
263 | { | 348 | { |
264 | struct nfs_client *clp, *new = NULL; | 349 | struct nfs_client *clp, *new = NULL; |
265 | int error; | 350 | int error; |
266 | 351 | ||
267 | dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%d)\n", | 352 | dprintk("--> nfs_get_client(%s,v%u)\n", |
268 | hostname ?: "", NIPQUAD(addr->sin_addr), | 353 | cl_init->hostname ?: "", cl_init->rpc_ops->version); |
269 | addr->sin_port, nfsversion); | ||
270 | 354 | ||
271 | /* see if the client already exists */ | 355 | /* see if the client already exists */ |
272 | do { | 356 | do { |
273 | spin_lock(&nfs_client_lock); | 357 | spin_lock(&nfs_client_lock); |
274 | 358 | ||
275 | clp = __nfs_find_client(addr, nfsversion, 1); | 359 | clp = nfs_match_client(cl_init); |
276 | if (clp) | 360 | if (clp) |
277 | goto found_client; | 361 | goto found_client; |
278 | if (new) | 362 | if (new) |
@@ -280,7 +364,7 @@ static struct nfs_client *nfs_get_client(const char *hostname, | |||
280 | 364 | ||
281 | spin_unlock(&nfs_client_lock); | 365 | spin_unlock(&nfs_client_lock); |
282 | 366 | ||
283 | new = nfs_alloc_client(hostname, addr, nfsversion); | 367 | new = nfs_alloc_client(cl_init); |
284 | } while (new); | 368 | } while (new); |
285 | 369 | ||
286 | return ERR_PTR(-ENOMEM); | 370 | return ERR_PTR(-ENOMEM); |
@@ -344,12 +428,16 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, | |||
344 | switch (proto) { | 428 | switch (proto) { |
345 | case XPRT_TRANSPORT_TCP: | 429 | case XPRT_TRANSPORT_TCP: |
346 | case XPRT_TRANSPORT_RDMA: | 430 | case XPRT_TRANSPORT_RDMA: |
347 | if (!to->to_initval) | 431 | if (to->to_initval == 0) |
348 | to->to_initval = 60 * HZ; | 432 | to->to_initval = 60 * HZ; |
349 | if (to->to_initval > NFS_MAX_TCP_TIMEOUT) | 433 | if (to->to_initval > NFS_MAX_TCP_TIMEOUT) |
350 | to->to_initval = NFS_MAX_TCP_TIMEOUT; | 434 | to->to_initval = NFS_MAX_TCP_TIMEOUT; |
351 | to->to_increment = to->to_initval; | 435 | to->to_increment = to->to_initval; |
352 | to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); | 436 | to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); |
437 | if (to->to_maxval > NFS_MAX_TCP_TIMEOUT) | ||
438 | to->to_maxval = NFS_MAX_TCP_TIMEOUT; | ||
439 | if (to->to_maxval < to->to_initval) | ||
440 | to->to_maxval = to->to_initval; | ||
353 | to->to_exponential = 0; | 441 | to->to_exponential = 0; |
354 | break; | 442 | break; |
355 | case XPRT_TRANSPORT_UDP: | 443 | case XPRT_TRANSPORT_UDP: |
@@ -367,19 +455,17 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, | |||
367 | /* | 455 | /* |
368 | * Create an RPC client handle | 456 | * Create an RPC client handle |
369 | */ | 457 | */ |
370 | static int nfs_create_rpc_client(struct nfs_client *clp, int proto, | 458 | static int nfs_create_rpc_client(struct nfs_client *clp, |
371 | unsigned int timeo, | 459 | const struct rpc_timeout *timeparms, |
372 | unsigned int retrans, | 460 | rpc_authflavor_t flavor, |
373 | rpc_authflavor_t flavor, | 461 | int flags) |
374 | int flags) | ||
375 | { | 462 | { |
376 | struct rpc_timeout timeparms; | ||
377 | struct rpc_clnt *clnt = NULL; | 463 | struct rpc_clnt *clnt = NULL; |
378 | struct rpc_create_args args = { | 464 | struct rpc_create_args args = { |
379 | .protocol = proto, | 465 | .protocol = clp->cl_proto, |
380 | .address = (struct sockaddr *)&clp->cl_addr, | 466 | .address = (struct sockaddr *)&clp->cl_addr, |
381 | .addrsize = sizeof(clp->cl_addr), | 467 | .addrsize = clp->cl_addrlen, |
382 | .timeout = &timeparms, | 468 | .timeout = timeparms, |
383 | .servername = clp->cl_hostname, | 469 | .servername = clp->cl_hostname, |
384 | .program = &nfs_program, | 470 | .program = &nfs_program, |
385 | .version = clp->rpc_ops->version, | 471 | .version = clp->rpc_ops->version, |
@@ -390,10 +476,6 @@ static int nfs_create_rpc_client(struct nfs_client *clp, int proto, | |||
390 | if (!IS_ERR(clp->cl_rpcclient)) | 476 | if (!IS_ERR(clp->cl_rpcclient)) |
391 | return 0; | 477 | return 0; |
392 | 478 | ||
393 | nfs_init_timeout_values(&timeparms, proto, timeo, retrans); | ||
394 | clp->retrans_timeo = timeparms.to_initval; | ||
395 | clp->retrans_count = timeparms.to_retries; | ||
396 | |||
397 | clnt = rpc_create(&args); | 479 | clnt = rpc_create(&args); |
398 | if (IS_ERR(clnt)) { | 480 | if (IS_ERR(clnt)) { |
399 | dprintk("%s: cannot create RPC client. Error = %ld\n", | 481 | dprintk("%s: cannot create RPC client. Error = %ld\n", |
@@ -410,11 +492,8 @@ static int nfs_create_rpc_client(struct nfs_client *clp, int proto, | |||
410 | */ | 492 | */ |
411 | static void nfs_destroy_server(struct nfs_server *server) | 493 | static void nfs_destroy_server(struct nfs_server *server) |
412 | { | 494 | { |
413 | if (!IS_ERR(server->client_acl)) | ||
414 | rpc_shutdown_client(server->client_acl); | ||
415 | |||
416 | if (!(server->flags & NFS_MOUNT_NONLM)) | 495 | if (!(server->flags & NFS_MOUNT_NONLM)) |
417 | lockd_down(); /* release rpc.lockd */ | 496 | nlmclnt_done(server->nlm_host); |
418 | } | 497 | } |
419 | 498 | ||
420 | /* | 499 | /* |
@@ -422,20 +501,29 @@ static void nfs_destroy_server(struct nfs_server *server) | |||
422 | */ | 501 | */ |
423 | static int nfs_start_lockd(struct nfs_server *server) | 502 | static int nfs_start_lockd(struct nfs_server *server) |
424 | { | 503 | { |
425 | int error = 0; | 504 | struct nlm_host *host; |
505 | struct nfs_client *clp = server->nfs_client; | ||
506 | struct nlmclnt_initdata nlm_init = { | ||
507 | .hostname = clp->cl_hostname, | ||
508 | .address = (struct sockaddr *)&clp->cl_addr, | ||
509 | .addrlen = clp->cl_addrlen, | ||
510 | .protocol = server->flags & NFS_MOUNT_TCP ? | ||
511 | IPPROTO_TCP : IPPROTO_UDP, | ||
512 | .nfs_version = clp->rpc_ops->version, | ||
513 | }; | ||
426 | 514 | ||
427 | if (server->nfs_client->cl_nfsversion > 3) | 515 | if (nlm_init.nfs_version > 3) |
428 | goto out; | 516 | return 0; |
429 | if (server->flags & NFS_MOUNT_NONLM) | 517 | if (server->flags & NFS_MOUNT_NONLM) |
430 | goto out; | 518 | return 0; |
431 | error = lockd_up((server->flags & NFS_MOUNT_TCP) ? | 519 | |
432 | IPPROTO_TCP : IPPROTO_UDP); | 520 | host = nlmclnt_init(&nlm_init); |
433 | if (error < 0) | 521 | if (IS_ERR(host)) |
434 | server->flags |= NFS_MOUNT_NONLM; | 522 | return PTR_ERR(host); |
435 | else | 523 | |
436 | server->destroy = nfs_destroy_server; | 524 | server->nlm_host = host; |
437 | out: | 525 | server->destroy = nfs_destroy_server; |
438 | return error; | 526 | return 0; |
439 | } | 527 | } |
440 | 528 | ||
441 | /* | 529 | /* |
@@ -444,7 +532,7 @@ out: | |||
444 | #ifdef CONFIG_NFS_V3_ACL | 532 | #ifdef CONFIG_NFS_V3_ACL |
445 | static void nfs_init_server_aclclient(struct nfs_server *server) | 533 | static void nfs_init_server_aclclient(struct nfs_server *server) |
446 | { | 534 | { |
447 | if (server->nfs_client->cl_nfsversion != 3) | 535 | if (server->nfs_client->rpc_ops->version != 3) |
448 | goto out_noacl; | 536 | goto out_noacl; |
449 | if (server->flags & NFS_MOUNT_NOACL) | 537 | if (server->flags & NFS_MOUNT_NOACL) |
450 | goto out_noacl; | 538 | goto out_noacl; |
@@ -471,7 +559,9 @@ static inline void nfs_init_server_aclclient(struct nfs_server *server) | |||
471 | /* | 559 | /* |
472 | * Create a general RPC client | 560 | * Create a general RPC client |
473 | */ | 561 | */ |
474 | static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t pseudoflavour) | 562 | static int nfs_init_server_rpcclient(struct nfs_server *server, |
563 | const struct rpc_timeout *timeo, | ||
564 | rpc_authflavor_t pseudoflavour) | ||
475 | { | 565 | { |
476 | struct nfs_client *clp = server->nfs_client; | 566 | struct nfs_client *clp = server->nfs_client; |
477 | 567 | ||
@@ -481,6 +571,11 @@ static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t | |||
481 | return PTR_ERR(server->client); | 571 | return PTR_ERR(server->client); |
482 | } | 572 | } |
483 | 573 | ||
574 | memcpy(&server->client->cl_timeout_default, | ||
575 | timeo, | ||
576 | sizeof(server->client->cl_timeout_default)); | ||
577 | server->client->cl_timeout = &server->client->cl_timeout_default; | ||
578 | |||
484 | if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) { | 579 | if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) { |
485 | struct rpc_auth *auth; | 580 | struct rpc_auth *auth; |
486 | 581 | ||
@@ -501,6 +596,7 @@ static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t | |||
501 | * Initialise an NFS2 or NFS3 client | 596 | * Initialise an NFS2 or NFS3 client |
502 | */ | 597 | */ |
503 | static int nfs_init_client(struct nfs_client *clp, | 598 | static int nfs_init_client(struct nfs_client *clp, |
599 | const struct rpc_timeout *timeparms, | ||
504 | const struct nfs_parsed_mount_data *data) | 600 | const struct nfs_parsed_mount_data *data) |
505 | { | 601 | { |
506 | int error; | 602 | int error; |
@@ -511,18 +607,11 @@ static int nfs_init_client(struct nfs_client *clp, | |||
511 | return 0; | 607 | return 0; |
512 | } | 608 | } |
513 | 609 | ||
514 | /* Check NFS protocol revision and initialize RPC op vector */ | ||
515 | clp->rpc_ops = &nfs_v2_clientops; | ||
516 | #ifdef CONFIG_NFS_V3 | ||
517 | if (clp->cl_nfsversion == 3) | ||
518 | clp->rpc_ops = &nfs_v3_clientops; | ||
519 | #endif | ||
520 | /* | 610 | /* |
521 | * Create a client RPC handle for doing FSSTAT with UNIX auth only | 611 | * Create a client RPC handle for doing FSSTAT with UNIX auth only |
522 | * - RFC 2623, sec 2.3.2 | 612 | * - RFC 2623, sec 2.3.2 |
523 | */ | 613 | */ |
524 | error = nfs_create_rpc_client(clp, data->nfs_server.protocol, | 614 | error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 0); |
525 | data->timeo, data->retrans, RPC_AUTH_UNIX, 0); | ||
526 | if (error < 0) | 615 | if (error < 0) |
527 | goto error; | 616 | goto error; |
528 | nfs_mark_client_ready(clp, NFS_CS_READY); | 617 | nfs_mark_client_ready(clp, NFS_CS_READY); |
@@ -540,25 +629,34 @@ error: | |||
540 | static int nfs_init_server(struct nfs_server *server, | 629 | static int nfs_init_server(struct nfs_server *server, |
541 | const struct nfs_parsed_mount_data *data) | 630 | const struct nfs_parsed_mount_data *data) |
542 | { | 631 | { |
632 | struct nfs_client_initdata cl_init = { | ||
633 | .hostname = data->nfs_server.hostname, | ||
634 | .addr = (const struct sockaddr *)&data->nfs_server.address, | ||
635 | .addrlen = data->nfs_server.addrlen, | ||
636 | .rpc_ops = &nfs_v2_clientops, | ||
637 | .proto = data->nfs_server.protocol, | ||
638 | }; | ||
639 | struct rpc_timeout timeparms; | ||
543 | struct nfs_client *clp; | 640 | struct nfs_client *clp; |
544 | int error, nfsvers = 2; | 641 | int error; |
545 | 642 | ||
546 | dprintk("--> nfs_init_server()\n"); | 643 | dprintk("--> nfs_init_server()\n"); |
547 | 644 | ||
548 | #ifdef CONFIG_NFS_V3 | 645 | #ifdef CONFIG_NFS_V3 |
549 | if (data->flags & NFS_MOUNT_VER3) | 646 | if (data->flags & NFS_MOUNT_VER3) |
550 | nfsvers = 3; | 647 | cl_init.rpc_ops = &nfs_v3_clientops; |
551 | #endif | 648 | #endif |
552 | 649 | ||
553 | /* Allocate or find a client reference we can use */ | 650 | /* Allocate or find a client reference we can use */ |
554 | clp = nfs_get_client(data->nfs_server.hostname, | 651 | clp = nfs_get_client(&cl_init); |
555 | &data->nfs_server.address, nfsvers); | ||
556 | if (IS_ERR(clp)) { | 652 | if (IS_ERR(clp)) { |
557 | dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); | 653 | dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); |
558 | return PTR_ERR(clp); | 654 | return PTR_ERR(clp); |
559 | } | 655 | } |
560 | 656 | ||
561 | error = nfs_init_client(clp, data); | 657 | nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, |
658 | data->timeo, data->retrans); | ||
659 | error = nfs_init_client(clp, &timeparms, data); | ||
562 | if (error < 0) | 660 | if (error < 0) |
563 | goto error; | 661 | goto error; |
564 | 662 | ||
@@ -582,7 +680,7 @@ static int nfs_init_server(struct nfs_server *server, | |||
582 | if (error < 0) | 680 | if (error < 0) |
583 | goto error; | 681 | goto error; |
584 | 682 | ||
585 | error = nfs_init_server_rpcclient(server, data->auth_flavors[0]); | 683 | error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); |
586 | if (error < 0) | 684 | if (error < 0) |
587 | goto error; | 685 | goto error; |
588 | 686 | ||
@@ -728,6 +826,9 @@ static struct nfs_server *nfs_alloc_server(void) | |||
728 | INIT_LIST_HEAD(&server->client_link); | 826 | INIT_LIST_HEAD(&server->client_link); |
729 | INIT_LIST_HEAD(&server->master_link); | 827 | INIT_LIST_HEAD(&server->master_link); |
730 | 828 | ||
829 | init_waitqueue_head(&server->active_wq); | ||
830 | atomic_set(&server->active, 0); | ||
831 | |||
731 | server->io_stats = nfs_alloc_iostats(); | 832 | server->io_stats = nfs_alloc_iostats(); |
732 | if (!server->io_stats) { | 833 | if (!server->io_stats) { |
733 | kfree(server); | 834 | kfree(server); |
@@ -751,6 +852,9 @@ void nfs_free_server(struct nfs_server *server) | |||
751 | 852 | ||
752 | if (server->destroy != NULL) | 853 | if (server->destroy != NULL) |
753 | server->destroy(server); | 854 | server->destroy(server); |
855 | |||
856 | if (!IS_ERR(server->client_acl)) | ||
857 | rpc_shutdown_client(server->client_acl); | ||
754 | if (!IS_ERR(server->client)) | 858 | if (!IS_ERR(server->client)) |
755 | rpc_shutdown_client(server->client); | 859 | rpc_shutdown_client(server->client); |
756 | 860 | ||
@@ -836,7 +940,7 @@ error: | |||
836 | * Initialise an NFS4 client record | 940 | * Initialise an NFS4 client record |
837 | */ | 941 | */ |
838 | static int nfs4_init_client(struct nfs_client *clp, | 942 | static int nfs4_init_client(struct nfs_client *clp, |
839 | int proto, int timeo, int retrans, | 943 | const struct rpc_timeout *timeparms, |
840 | const char *ip_addr, | 944 | const char *ip_addr, |
841 | rpc_authflavor_t authflavour) | 945 | rpc_authflavor_t authflavour) |
842 | { | 946 | { |
@@ -851,7 +955,7 @@ static int nfs4_init_client(struct nfs_client *clp, | |||
851 | /* Check NFS protocol revision and initialize RPC op vector */ | 955 | /* Check NFS protocol revision and initialize RPC op vector */ |
852 | clp->rpc_ops = &nfs_v4_clientops; | 956 | clp->rpc_ops = &nfs_v4_clientops; |
853 | 957 | ||
854 | error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour, | 958 | error = nfs_create_rpc_client(clp, timeparms, authflavour, |
855 | RPC_CLNT_CREATE_DISCRTRY); | 959 | RPC_CLNT_CREATE_DISCRTRY); |
856 | if (error < 0) | 960 | if (error < 0) |
857 | goto error; | 961 | goto error; |
@@ -878,23 +982,32 @@ error: | |||
878 | * Set up an NFS4 client | 982 | * Set up an NFS4 client |
879 | */ | 983 | */ |
880 | static int nfs4_set_client(struct nfs_server *server, | 984 | static int nfs4_set_client(struct nfs_server *server, |
881 | const char *hostname, const struct sockaddr_in *addr, | 985 | const char *hostname, |
986 | const struct sockaddr *addr, | ||
987 | const size_t addrlen, | ||
882 | const char *ip_addr, | 988 | const char *ip_addr, |
883 | rpc_authflavor_t authflavour, | 989 | rpc_authflavor_t authflavour, |
884 | int proto, int timeo, int retrans) | 990 | int proto, const struct rpc_timeout *timeparms) |
885 | { | 991 | { |
992 | struct nfs_client_initdata cl_init = { | ||
993 | .hostname = hostname, | ||
994 | .addr = addr, | ||
995 | .addrlen = addrlen, | ||
996 | .rpc_ops = &nfs_v4_clientops, | ||
997 | .proto = proto, | ||
998 | }; | ||
886 | struct nfs_client *clp; | 999 | struct nfs_client *clp; |
887 | int error; | 1000 | int error; |
888 | 1001 | ||
889 | dprintk("--> nfs4_set_client()\n"); | 1002 | dprintk("--> nfs4_set_client()\n"); |
890 | 1003 | ||
891 | /* Allocate or find a client reference we can use */ | 1004 | /* Allocate or find a client reference we can use */ |
892 | clp = nfs_get_client(hostname, addr, 4); | 1005 | clp = nfs_get_client(&cl_init); |
893 | if (IS_ERR(clp)) { | 1006 | if (IS_ERR(clp)) { |
894 | error = PTR_ERR(clp); | 1007 | error = PTR_ERR(clp); |
895 | goto error; | 1008 | goto error; |
896 | } | 1009 | } |
897 | error = nfs4_init_client(clp, proto, timeo, retrans, ip_addr, authflavour); | 1010 | error = nfs4_init_client(clp, timeparms, ip_addr, authflavour); |
898 | if (error < 0) | 1011 | if (error < 0) |
899 | goto error_put; | 1012 | goto error_put; |
900 | 1013 | ||
@@ -915,10 +1028,26 @@ error: | |||
915 | static int nfs4_init_server(struct nfs_server *server, | 1028 | static int nfs4_init_server(struct nfs_server *server, |
916 | const struct nfs_parsed_mount_data *data) | 1029 | const struct nfs_parsed_mount_data *data) |
917 | { | 1030 | { |
1031 | struct rpc_timeout timeparms; | ||
918 | int error; | 1032 | int error; |
919 | 1033 | ||
920 | dprintk("--> nfs4_init_server()\n"); | 1034 | dprintk("--> nfs4_init_server()\n"); |
921 | 1035 | ||
1036 | nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, | ||
1037 | data->timeo, data->retrans); | ||
1038 | |||
1039 | /* Get a client record */ | ||
1040 | error = nfs4_set_client(server, | ||
1041 | data->nfs_server.hostname, | ||
1042 | (const struct sockaddr *)&data->nfs_server.address, | ||
1043 | data->nfs_server.addrlen, | ||
1044 | data->client_address, | ||
1045 | data->auth_flavors[0], | ||
1046 | data->nfs_server.protocol, | ||
1047 | &timeparms); | ||
1048 | if (error < 0) | ||
1049 | goto error; | ||
1050 | |||
922 | /* Initialise the client representation from the mount data */ | 1051 | /* Initialise the client representation from the mount data */ |
923 | server->flags = data->flags & NFS_MOUNT_FLAGMASK; | 1052 | server->flags = data->flags & NFS_MOUNT_FLAGMASK; |
924 | server->caps |= NFS_CAP_ATOMIC_OPEN; | 1053 | server->caps |= NFS_CAP_ATOMIC_OPEN; |
@@ -933,8 +1062,9 @@ static int nfs4_init_server(struct nfs_server *server, | |||
933 | server->acdirmin = data->acdirmin * HZ; | 1062 | server->acdirmin = data->acdirmin * HZ; |
934 | server->acdirmax = data->acdirmax * HZ; | 1063 | server->acdirmax = data->acdirmax * HZ; |
935 | 1064 | ||
936 | error = nfs_init_server_rpcclient(server, data->auth_flavors[0]); | 1065 | error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); |
937 | 1066 | ||
1067 | error: | ||
938 | /* Done */ | 1068 | /* Done */ |
939 | dprintk("<-- nfs4_init_server() = %d\n", error); | 1069 | dprintk("<-- nfs4_init_server() = %d\n", error); |
940 | return error; | 1070 | return error; |
@@ -957,17 +1087,6 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, | |||
957 | if (!server) | 1087 | if (!server) |
958 | return ERR_PTR(-ENOMEM); | 1088 | return ERR_PTR(-ENOMEM); |
959 | 1089 | ||
960 | /* Get a client record */ | ||
961 | error = nfs4_set_client(server, | ||
962 | data->nfs_server.hostname, | ||
963 | &data->nfs_server.address, | ||
964 | data->client_address, | ||
965 | data->auth_flavors[0], | ||
966 | data->nfs_server.protocol, | ||
967 | data->timeo, data->retrans); | ||
968 | if (error < 0) | ||
969 | goto error; | ||
970 | |||
971 | /* set up the general RPC client */ | 1090 | /* set up the general RPC client */ |
972 | error = nfs4_init_server(server, data); | 1091 | error = nfs4_init_server(server, data); |
973 | if (error < 0) | 1092 | if (error < 0) |
@@ -1035,12 +1154,13 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, | |||
1035 | 1154 | ||
1036 | /* Get a client representation. | 1155 | /* Get a client representation. |
1037 | * Note: NFSv4 always uses TCP, */ | 1156 | * Note: NFSv4 always uses TCP, */ |
1038 | error = nfs4_set_client(server, data->hostname, data->addr, | 1157 | error = nfs4_set_client(server, data->hostname, |
1039 | parent_client->cl_ipaddr, | 1158 | data->addr, |
1040 | data->authflavor, | 1159 | data->addrlen, |
1041 | parent_server->client->cl_xprt->prot, | 1160 | parent_client->cl_ipaddr, |
1042 | parent_client->retrans_timeo, | 1161 | data->authflavor, |
1043 | parent_client->retrans_count); | 1162 | parent_server->client->cl_xprt->prot, |
1163 | parent_server->client->cl_timeout); | ||
1044 | if (error < 0) | 1164 | if (error < 0) |
1045 | goto error; | 1165 | goto error; |
1046 | 1166 | ||
@@ -1048,7 +1168,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, | |||
1048 | nfs_server_copy_userdata(server, parent_server); | 1168 | nfs_server_copy_userdata(server, parent_server); |
1049 | server->caps |= NFS_CAP_ATOMIC_OPEN; | 1169 | server->caps |= NFS_CAP_ATOMIC_OPEN; |
1050 | 1170 | ||
1051 | error = nfs_init_server_rpcclient(server, data->authflavor); | 1171 | error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor); |
1052 | if (error < 0) | 1172 | if (error < 0) |
1053 | goto error; | 1173 | goto error; |
1054 | 1174 | ||
@@ -1117,7 +1237,9 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, | |||
1117 | 1237 | ||
1118 | server->fsid = fattr->fsid; | 1238 | server->fsid = fattr->fsid; |
1119 | 1239 | ||
1120 | error = nfs_init_server_rpcclient(server, source->client->cl_auth->au_flavor); | 1240 | error = nfs_init_server_rpcclient(server, |
1241 | source->client->cl_timeout, | ||
1242 | source->client->cl_auth->au_flavor); | ||
1121 | if (error < 0) | 1243 | if (error < 0) |
1122 | goto out_free_server; | 1244 | goto out_free_server; |
1123 | if (!IS_ERR(source->client_acl)) | 1245 | if (!IS_ERR(source->client_acl)) |
@@ -1259,10 +1381,10 @@ static int nfs_server_list_show(struct seq_file *m, void *v) | |||
1259 | /* display one transport per line on subsequent lines */ | 1381 | /* display one transport per line on subsequent lines */ |
1260 | clp = list_entry(v, struct nfs_client, cl_share_link); | 1382 | clp = list_entry(v, struct nfs_client, cl_share_link); |
1261 | 1383 | ||
1262 | seq_printf(m, "v%d %02x%02x%02x%02x %4hx %3d %s\n", | 1384 | seq_printf(m, "v%u %s %s %3d %s\n", |
1263 | clp->cl_nfsversion, | 1385 | clp->rpc_ops->version, |
1264 | NIPQUAD(clp->cl_addr.sin_addr), | 1386 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), |
1265 | ntohs(clp->cl_addr.sin_port), | 1387 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), |
1266 | atomic_read(&clp->cl_count), | 1388 | atomic_read(&clp->cl_count), |
1267 | clp->cl_hostname); | 1389 | clp->cl_hostname); |
1268 | 1390 | ||
@@ -1338,10 +1460,10 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) | |||
1338 | (unsigned long long) server->fsid.major, | 1460 | (unsigned long long) server->fsid.major, |
1339 | (unsigned long long) server->fsid.minor); | 1461 | (unsigned long long) server->fsid.minor); |
1340 | 1462 | ||
1341 | seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n", | 1463 | seq_printf(m, "v%u %s %s %-7s %-17s\n", |
1342 | clp->cl_nfsversion, | 1464 | clp->rpc_ops->version, |
1343 | NIPQUAD(clp->cl_addr.sin_addr), | 1465 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), |
1344 | ntohs(clp->cl_addr.sin_port), | 1466 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), |
1345 | dev, | 1467 | dev, |
1346 | fsid); | 1468 | fsid); |
1347 | 1469 | ||
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 11833f4caeaa..b9eadd18ba70 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
@@ -125,6 +125,32 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st | |||
125 | put_rpccred(oldcred); | 125 | put_rpccred(oldcred); |
126 | } | 126 | } |
127 | 127 | ||
128 | static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) | ||
129 | { | ||
130 | int res = 0; | ||
131 | |||
132 | res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync); | ||
133 | nfs_free_delegation(delegation); | ||
134 | return res; | ||
135 | } | ||
136 | |||
137 | static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) | ||
138 | { | ||
139 | struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); | ||
140 | |||
141 | if (delegation == NULL) | ||
142 | goto nomatch; | ||
143 | if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data, | ||
144 | sizeof(delegation->stateid.data)) != 0) | ||
145 | goto nomatch; | ||
146 | list_del_rcu(&delegation->super_list); | ||
147 | nfsi->delegation_state = 0; | ||
148 | rcu_assign_pointer(nfsi->delegation, NULL); | ||
149 | return delegation; | ||
150 | nomatch: | ||
151 | return NULL; | ||
152 | } | ||
153 | |||
128 | /* | 154 | /* |
129 | * Set up a delegation on an inode | 155 | * Set up a delegation on an inode |
130 | */ | 156 | */ |
@@ -133,6 +159,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct | |||
133 | struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; | 159 | struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; |
134 | struct nfs_inode *nfsi = NFS_I(inode); | 160 | struct nfs_inode *nfsi = NFS_I(inode); |
135 | struct nfs_delegation *delegation; | 161 | struct nfs_delegation *delegation; |
162 | struct nfs_delegation *freeme = NULL; | ||
136 | int status = 0; | 163 | int status = 0; |
137 | 164 | ||
138 | delegation = kmalloc(sizeof(*delegation), GFP_KERNEL); | 165 | delegation = kmalloc(sizeof(*delegation), GFP_KERNEL); |
@@ -147,41 +174,45 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct | |||
147 | delegation->inode = inode; | 174 | delegation->inode = inode; |
148 | 175 | ||
149 | spin_lock(&clp->cl_lock); | 176 | spin_lock(&clp->cl_lock); |
150 | if (rcu_dereference(nfsi->delegation) == NULL) { | 177 | if (rcu_dereference(nfsi->delegation) != NULL) { |
151 | list_add_rcu(&delegation->super_list, &clp->cl_delegations); | ||
152 | nfsi->delegation_state = delegation->type; | ||
153 | rcu_assign_pointer(nfsi->delegation, delegation); | ||
154 | delegation = NULL; | ||
155 | } else { | ||
156 | if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, | 178 | if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, |
157 | sizeof(delegation->stateid)) != 0 || | 179 | sizeof(delegation->stateid)) == 0 && |
158 | delegation->type != nfsi->delegation->type) { | 180 | delegation->type == nfsi->delegation->type) { |
159 | printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n", | 181 | goto out; |
160 | __FUNCTION__, NIPQUAD(clp->cl_addr.sin_addr)); | 182 | } |
161 | status = -EIO; | 183 | /* |
184 | * Deal with broken servers that hand out two | ||
185 | * delegations for the same file. | ||
186 | */ | ||
187 | dfprintk(FILE, "%s: server %s handed out " | ||
188 | "a duplicate delegation!\n", | ||
189 | __FUNCTION__, clp->cl_hostname); | ||
190 | if (delegation->type <= nfsi->delegation->type) { | ||
191 | freeme = delegation; | ||
192 | delegation = NULL; | ||
193 | goto out; | ||
162 | } | 194 | } |
195 | freeme = nfs_detach_delegation_locked(nfsi, NULL); | ||
163 | } | 196 | } |
197 | list_add_rcu(&delegation->super_list, &clp->cl_delegations); | ||
198 | nfsi->delegation_state = delegation->type; | ||
199 | rcu_assign_pointer(nfsi->delegation, delegation); | ||
200 | delegation = NULL; | ||
164 | 201 | ||
165 | /* Ensure we revalidate the attributes and page cache! */ | 202 | /* Ensure we revalidate the attributes and page cache! */ |
166 | spin_lock(&inode->i_lock); | 203 | spin_lock(&inode->i_lock); |
167 | nfsi->cache_validity |= NFS_INO_REVAL_FORCED; | 204 | nfsi->cache_validity |= NFS_INO_REVAL_FORCED; |
168 | spin_unlock(&inode->i_lock); | 205 | spin_unlock(&inode->i_lock); |
169 | 206 | ||
207 | out: | ||
170 | spin_unlock(&clp->cl_lock); | 208 | spin_unlock(&clp->cl_lock); |
171 | if (delegation != NULL) | 209 | if (delegation != NULL) |
172 | nfs_free_delegation(delegation); | 210 | nfs_free_delegation(delegation); |
211 | if (freeme != NULL) | ||
212 | nfs_do_return_delegation(inode, freeme, 0); | ||
173 | return status; | 213 | return status; |
174 | } | 214 | } |
175 | 215 | ||
176 | static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation) | ||
177 | { | ||
178 | int res = 0; | ||
179 | |||
180 | res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid); | ||
181 | nfs_free_delegation(delegation); | ||
182 | return res; | ||
183 | } | ||
184 | |||
185 | /* Sync all data to disk upon delegation return */ | 216 | /* Sync all data to disk upon delegation return */ |
186 | static void nfs_msync_inode(struct inode *inode) | 217 | static void nfs_msync_inode(struct inode *inode) |
187 | { | 218 | { |
@@ -207,24 +238,28 @@ static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegat | |||
207 | up_read(&clp->cl_sem); | 238 | up_read(&clp->cl_sem); |
208 | nfs_msync_inode(inode); | 239 | nfs_msync_inode(inode); |
209 | 240 | ||
210 | return nfs_do_return_delegation(inode, delegation); | 241 | return nfs_do_return_delegation(inode, delegation, 1); |
211 | } | 242 | } |
212 | 243 | ||
213 | static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) | 244 | /* |
245 | * This function returns the delegation without reclaiming opens | ||
246 | * or protecting against delegation reclaims. | ||
247 | * It is therefore really only safe to be called from | ||
248 | * nfs4_clear_inode() | ||
249 | */ | ||
250 | void nfs_inode_return_delegation_noreclaim(struct inode *inode) | ||
214 | { | 251 | { |
215 | struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); | 252 | struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; |
253 | struct nfs_inode *nfsi = NFS_I(inode); | ||
254 | struct nfs_delegation *delegation; | ||
216 | 255 | ||
217 | if (delegation == NULL) | 256 | if (rcu_dereference(nfsi->delegation) != NULL) { |
218 | goto nomatch; | 257 | spin_lock(&clp->cl_lock); |
219 | if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data, | 258 | delegation = nfs_detach_delegation_locked(nfsi, NULL); |
220 | sizeof(delegation->stateid.data)) != 0) | 259 | spin_unlock(&clp->cl_lock); |
221 | goto nomatch; | 260 | if (delegation != NULL) |
222 | list_del_rcu(&delegation->super_list); | 261 | nfs_do_return_delegation(inode, delegation, 0); |
223 | nfsi->delegation_state = 0; | 262 | } |
224 | rcu_assign_pointer(nfsi->delegation, NULL); | ||
225 | return delegation; | ||
226 | nomatch: | ||
227 | return NULL; | ||
228 | } | 263 | } |
229 | 264 | ||
230 | int nfs_inode_return_delegation(struct inode *inode) | 265 | int nfs_inode_return_delegation(struct inode *inode) |
@@ -314,8 +349,9 @@ void nfs_expire_all_delegations(struct nfs_client *clp) | |||
314 | __module_get(THIS_MODULE); | 349 | __module_get(THIS_MODULE); |
315 | atomic_inc(&clp->cl_count); | 350 | atomic_inc(&clp->cl_count); |
316 | task = kthread_run(nfs_do_expire_all_delegations, clp, | 351 | task = kthread_run(nfs_do_expire_all_delegations, clp, |
317 | "%u.%u.%u.%u-delegreturn", | 352 | "%s-delegreturn", |
318 | NIPQUAD(clp->cl_addr.sin_addr)); | 353 | rpc_peeraddr2str(clp->cl_rpcclient, |
354 | RPC_DISPLAY_ADDR)); | ||
319 | if (!IS_ERR(task)) | 355 | if (!IS_ERR(task)) |
320 | return; | 356 | return; |
321 | nfs_put_client(clp); | 357 | nfs_put_client(clp); |
@@ -386,7 +422,7 @@ static int recall_thread(void *data) | |||
386 | nfs_msync_inode(inode); | 422 | nfs_msync_inode(inode); |
387 | 423 | ||
388 | if (delegation != NULL) | 424 | if (delegation != NULL) |
389 | nfs_do_return_delegation(inode, delegation); | 425 | nfs_do_return_delegation(inode, delegation, 1); |
390 | iput(inode); | 426 | iput(inode); |
391 | module_put_and_exit(0); | 427 | module_put_and_exit(0); |
392 | } | 428 | } |
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 5874ce7fdbae..f1c5e2a5d88e 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h | |||
@@ -29,6 +29,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct | |||
29 | void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); | 29 | void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); |
30 | int nfs_inode_return_delegation(struct inode *inode); | 30 | int nfs_inode_return_delegation(struct inode *inode); |
31 | int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); | 31 | int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); |
32 | void nfs_inode_return_delegation_noreclaim(struct inode *inode); | ||
32 | 33 | ||
33 | struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); | 34 | struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); |
34 | void nfs_return_all_delegations(struct super_block *sb); | 35 | void nfs_return_all_delegations(struct super_block *sb); |
@@ -39,7 +40,7 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp); | |||
39 | void nfs_delegation_reap_unclaimed(struct nfs_client *clp); | 40 | void nfs_delegation_reap_unclaimed(struct nfs_client *clp); |
40 | 41 | ||
41 | /* NFSv4 delegation-related procedures */ | 42 | /* NFSv4 delegation-related procedures */ |
42 | int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); | 43 | int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); |
43 | int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); | 44 | int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); |
44 | int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); | 45 | int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); |
45 | int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); | 46 | int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f697b5c74b7c..476cb0f837fd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -192,7 +192,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | |||
192 | /* We requested READDIRPLUS, but the server doesn't grok it */ | 192 | /* We requested READDIRPLUS, but the server doesn't grok it */ |
193 | if (error == -ENOTSUPP && desc->plus) { | 193 | if (error == -ENOTSUPP && desc->plus) { |
194 | NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; | 194 | NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; |
195 | clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); | 195 | clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); |
196 | desc->plus = 0; | 196 | desc->plus = 0; |
197 | goto again; | 197 | goto again; |
198 | } | 198 | } |
@@ -537,12 +537,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
537 | 537 | ||
538 | lock_kernel(); | 538 | lock_kernel(); |
539 | 539 | ||
540 | res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping); | ||
541 | if (res < 0) { | ||
542 | unlock_kernel(); | ||
543 | return res; | ||
544 | } | ||
545 | |||
546 | /* | 540 | /* |
547 | * filp->f_pos points to the dirent entry number. | 541 | * filp->f_pos points to the dirent entry number. |
548 | * *desc->dir_cookie has the cookie for the next entry. We have | 542 | * *desc->dir_cookie has the cookie for the next entry. We have |
@@ -564,6 +558,10 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
564 | desc->entry = &my_entry; | 558 | desc->entry = &my_entry; |
565 | 559 | ||
566 | nfs_block_sillyrename(dentry); | 560 | nfs_block_sillyrename(dentry); |
561 | res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping); | ||
562 | if (res < 0) | ||
563 | goto out; | ||
564 | |||
567 | while(!desc->entry->eof) { | 565 | while(!desc->entry->eof) { |
568 | res = readdir_search_pagecache(desc); | 566 | res = readdir_search_pagecache(desc); |
569 | 567 | ||
@@ -579,7 +577,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
579 | break; | 577 | break; |
580 | } | 578 | } |
581 | if (res == -ETOOSMALL && desc->plus) { | 579 | if (res == -ETOOSMALL && desc->plus) { |
582 | clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); | 580 | clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); |
583 | nfs_zap_caches(inode); | 581 | nfs_zap_caches(inode); |
584 | desc->plus = 0; | 582 | desc->plus = 0; |
585 | desc->entry->eof = 0; | 583 | desc->entry->eof = 0; |
@@ -594,6 +592,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
594 | break; | 592 | break; |
595 | } | 593 | } |
596 | } | 594 | } |
595 | out: | ||
597 | nfs_unblock_sillyrename(dentry); | 596 | nfs_unblock_sillyrename(dentry); |
598 | unlock_kernel(); | 597 | unlock_kernel(); |
599 | if (res > 0) | 598 | if (res > 0) |
@@ -639,6 +638,21 @@ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) | |||
639 | return 0; | 638 | return 0; |
640 | } | 639 | } |
641 | 640 | ||
641 | /** | ||
642 | * nfs_force_lookup_revalidate - Mark the directory as having changed | ||
643 | * @dir - pointer to directory inode | ||
644 | * | ||
645 | * This forces the revalidation code in nfs_lookup_revalidate() to do a | ||
646 | * full lookup on all child dentries of 'dir' whenever a change occurs | ||
647 | * on the server that might have invalidated our dcache. | ||
648 | * | ||
649 | * The caller should be holding dir->i_lock | ||
650 | */ | ||
651 | void nfs_force_lookup_revalidate(struct inode *dir) | ||
652 | { | ||
653 | NFS_I(dir)->cache_change_attribute = jiffies; | ||
654 | } | ||
655 | |||
642 | /* | 656 | /* |
643 | * A check for whether or not the parent directory has changed. | 657 | * A check for whether or not the parent directory has changed. |
644 | * In the case it has, we assume that the dentries are untrustworthy | 658 | * In the case it has, we assume that the dentries are untrustworthy |
@@ -827,6 +841,10 @@ static int nfs_dentry_delete(struct dentry *dentry) | |||
827 | dentry->d_parent->d_name.name, dentry->d_name.name, | 841 | dentry->d_parent->d_name.name, dentry->d_name.name, |
828 | dentry->d_flags); | 842 | dentry->d_flags); |
829 | 843 | ||
844 | /* Unhash any dentry with a stale inode */ | ||
845 | if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode)) | ||
846 | return 1; | ||
847 | |||
830 | if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { | 848 | if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { |
831 | /* Unhash it, so that ->d_iput() would be called */ | 849 | /* Unhash it, so that ->d_iput() would be called */ |
832 | return 1; | 850 | return 1; |
@@ -846,7 +864,6 @@ static int nfs_dentry_delete(struct dentry *dentry) | |||
846 | */ | 864 | */ |
847 | static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) | 865 | static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) |
848 | { | 866 | { |
849 | nfs_inode_return_delegation(inode); | ||
850 | if (S_ISDIR(inode->i_mode)) | 867 | if (S_ISDIR(inode->i_mode)) |
851 | /* drop any readdir cache as it could easily be old */ | 868 | /* drop any readdir cache as it could easily be old */ |
852 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; | 869 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; |
@@ -1268,6 +1285,12 @@ out_err: | |||
1268 | return error; | 1285 | return error; |
1269 | } | 1286 | } |
1270 | 1287 | ||
1288 | static void nfs_dentry_handle_enoent(struct dentry *dentry) | ||
1289 | { | ||
1290 | if (dentry->d_inode != NULL && !d_unhashed(dentry)) | ||
1291 | d_delete(dentry); | ||
1292 | } | ||
1293 | |||
1271 | static int nfs_rmdir(struct inode *dir, struct dentry *dentry) | 1294 | static int nfs_rmdir(struct inode *dir, struct dentry *dentry) |
1272 | { | 1295 | { |
1273 | int error; | 1296 | int error; |
@@ -1280,6 +1303,8 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1280 | /* Ensure the VFS deletes this inode */ | 1303 | /* Ensure the VFS deletes this inode */ |
1281 | if (error == 0 && dentry->d_inode != NULL) | 1304 | if (error == 0 && dentry->d_inode != NULL) |
1282 | clear_nlink(dentry->d_inode); | 1305 | clear_nlink(dentry->d_inode); |
1306 | else if (error == -ENOENT) | ||
1307 | nfs_dentry_handle_enoent(dentry); | ||
1283 | unlock_kernel(); | 1308 | unlock_kernel(); |
1284 | 1309 | ||
1285 | return error; | 1310 | return error; |
@@ -1386,6 +1411,8 @@ static int nfs_safe_remove(struct dentry *dentry) | |||
1386 | nfs_mark_for_revalidate(inode); | 1411 | nfs_mark_for_revalidate(inode); |
1387 | } else | 1412 | } else |
1388 | error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); | 1413 | error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); |
1414 | if (error == -ENOENT) | ||
1415 | nfs_dentry_handle_enoent(dentry); | ||
1389 | out: | 1416 | out: |
1390 | return error; | 1417 | return error; |
1391 | } | 1418 | } |
@@ -1422,7 +1449,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) | |||
1422 | spin_unlock(&dentry->d_lock); | 1449 | spin_unlock(&dentry->d_lock); |
1423 | spin_unlock(&dcache_lock); | 1450 | spin_unlock(&dcache_lock); |
1424 | error = nfs_safe_remove(dentry); | 1451 | error = nfs_safe_remove(dentry); |
1425 | if (!error) { | 1452 | if (!error || error == -ENOENT) { |
1426 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | 1453 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
1427 | } else if (need_rehash) | 1454 | } else if (need_rehash) |
1428 | d_rehash(dentry); | 1455 | d_rehash(dentry); |
@@ -1635,7 +1662,8 @@ out: | |||
1635 | d_move(old_dentry, new_dentry); | 1662 | d_move(old_dentry, new_dentry); |
1636 | nfs_set_verifier(new_dentry, | 1663 | nfs_set_verifier(new_dentry, |
1637 | nfs_save_change_attribute(new_dir)); | 1664 | nfs_save_change_attribute(new_dir)); |
1638 | } | 1665 | } else if (error == -ENOENT) |
1666 | nfs_dentry_handle_enoent(old_dentry); | ||
1639 | 1667 | ||
1640 | /* new dentry created? */ | 1668 | /* new dentry created? */ |
1641 | if (dentry) | 1669 | if (dentry) |
@@ -1666,13 +1694,19 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) | |||
1666 | restart: | 1694 | restart: |
1667 | spin_lock(&nfs_access_lru_lock); | 1695 | spin_lock(&nfs_access_lru_lock); |
1668 | list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { | 1696 | list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { |
1697 | struct rw_semaphore *s_umount; | ||
1669 | struct inode *inode; | 1698 | struct inode *inode; |
1670 | 1699 | ||
1671 | if (nr_to_scan-- == 0) | 1700 | if (nr_to_scan-- == 0) |
1672 | break; | 1701 | break; |
1702 | s_umount = &nfsi->vfs_inode.i_sb->s_umount; | ||
1703 | if (!down_read_trylock(s_umount)) | ||
1704 | continue; | ||
1673 | inode = igrab(&nfsi->vfs_inode); | 1705 | inode = igrab(&nfsi->vfs_inode); |
1674 | if (inode == NULL) | 1706 | if (inode == NULL) { |
1707 | up_read(s_umount); | ||
1675 | continue; | 1708 | continue; |
1709 | } | ||
1676 | spin_lock(&inode->i_lock); | 1710 | spin_lock(&inode->i_lock); |
1677 | if (list_empty(&nfsi->access_cache_entry_lru)) | 1711 | if (list_empty(&nfsi->access_cache_entry_lru)) |
1678 | goto remove_lru_entry; | 1712 | goto remove_lru_entry; |
@@ -1691,6 +1725,7 @@ remove_lru_entry: | |||
1691 | spin_unlock(&inode->i_lock); | 1725 | spin_unlock(&inode->i_lock); |
1692 | spin_unlock(&nfs_access_lru_lock); | 1726 | spin_unlock(&nfs_access_lru_lock); |
1693 | iput(inode); | 1727 | iput(inode); |
1728 | up_read(s_umount); | ||
1694 | goto restart; | 1729 | goto restart; |
1695 | } | 1730 | } |
1696 | spin_unlock(&nfs_access_lru_lock); | 1731 | spin_unlock(&nfs_access_lru_lock); |
@@ -1731,7 +1766,7 @@ static void __nfs_access_zap_cache(struct inode *inode) | |||
1731 | void nfs_access_zap_cache(struct inode *inode) | 1766 | void nfs_access_zap_cache(struct inode *inode) |
1732 | { | 1767 | { |
1733 | /* Remove from global LRU init */ | 1768 | /* Remove from global LRU init */ |
1734 | if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) { | 1769 | if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) { |
1735 | spin_lock(&nfs_access_lru_lock); | 1770 | spin_lock(&nfs_access_lru_lock); |
1736 | list_del_init(&NFS_I(inode)->access_cache_inode_lru); | 1771 | list_del_init(&NFS_I(inode)->access_cache_inode_lru); |
1737 | spin_unlock(&nfs_access_lru_lock); | 1772 | spin_unlock(&nfs_access_lru_lock); |
@@ -1845,7 +1880,7 @@ static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *s | |||
1845 | smp_mb__after_atomic_inc(); | 1880 | smp_mb__after_atomic_inc(); |
1846 | 1881 | ||
1847 | /* Add inode to global LRU list */ | 1882 | /* Add inode to global LRU list */ |
1848 | if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) { | 1883 | if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) { |
1849 | spin_lock(&nfs_access_lru_lock); | 1884 | spin_lock(&nfs_access_lru_lock); |
1850 | list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list); | 1885 | list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list); |
1851 | spin_unlock(&nfs_access_lru_lock); | 1886 | spin_unlock(&nfs_access_lru_lock); |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 7b994b2fa593..16844f98f50e 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -272,6 +272,16 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, | |||
272 | unsigned long user_addr = (unsigned long)iov->iov_base; | 272 | unsigned long user_addr = (unsigned long)iov->iov_base; |
273 | size_t count = iov->iov_len; | 273 | size_t count = iov->iov_len; |
274 | size_t rsize = NFS_SERVER(inode)->rsize; | 274 | size_t rsize = NFS_SERVER(inode)->rsize; |
275 | struct rpc_task *task; | ||
276 | struct rpc_message msg = { | ||
277 | .rpc_cred = ctx->cred, | ||
278 | }; | ||
279 | struct rpc_task_setup task_setup_data = { | ||
280 | .rpc_client = NFS_CLIENT(inode), | ||
281 | .rpc_message = &msg, | ||
282 | .callback_ops = &nfs_read_direct_ops, | ||
283 | .flags = RPC_TASK_ASYNC, | ||
284 | }; | ||
275 | unsigned int pgbase; | 285 | unsigned int pgbase; |
276 | int result; | 286 | int result; |
277 | ssize_t started = 0; | 287 | ssize_t started = 0; |
@@ -311,7 +321,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, | |||
311 | 321 | ||
312 | data->req = (struct nfs_page *) dreq; | 322 | data->req = (struct nfs_page *) dreq; |
313 | data->inode = inode; | 323 | data->inode = inode; |
314 | data->cred = ctx->cred; | 324 | data->cred = msg.rpc_cred; |
315 | data->args.fh = NFS_FH(inode); | 325 | data->args.fh = NFS_FH(inode); |
316 | data->args.context = ctx; | 326 | data->args.context = ctx; |
317 | data->args.offset = pos; | 327 | data->args.offset = pos; |
@@ -321,14 +331,16 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, | |||
321 | data->res.fattr = &data->fattr; | 331 | data->res.fattr = &data->fattr; |
322 | data->res.eof = 0; | 332 | data->res.eof = 0; |
323 | data->res.count = bytes; | 333 | data->res.count = bytes; |
334 | msg.rpc_argp = &data->args; | ||
335 | msg.rpc_resp = &data->res; | ||
324 | 336 | ||
325 | rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, | 337 | task_setup_data.task = &data->task; |
326 | &nfs_read_direct_ops, data); | 338 | task_setup_data.callback_data = data; |
327 | NFS_PROTO(inode)->read_setup(data); | 339 | NFS_PROTO(inode)->read_setup(data, &msg); |
328 | 340 | ||
329 | data->task.tk_cookie = (unsigned long) inode; | 341 | task = rpc_run_task(&task_setup_data); |
330 | 342 | if (!IS_ERR(task)) | |
331 | rpc_execute(&data->task); | 343 | rpc_put_task(task); |
332 | 344 | ||
333 | dprintk("NFS: %5u initiated direct read call " | 345 | dprintk("NFS: %5u initiated direct read call " |
334 | "(req %s/%Ld, %zu bytes @ offset %Lu)\n", | 346 | "(req %s/%Ld, %zu bytes @ offset %Lu)\n", |
@@ -427,6 +439,15 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | |||
427 | struct inode *inode = dreq->inode; | 439 | struct inode *inode = dreq->inode; |
428 | struct list_head *p; | 440 | struct list_head *p; |
429 | struct nfs_write_data *data; | 441 | struct nfs_write_data *data; |
442 | struct rpc_task *task; | ||
443 | struct rpc_message msg = { | ||
444 | .rpc_cred = dreq->ctx->cred, | ||
445 | }; | ||
446 | struct rpc_task_setup task_setup_data = { | ||
447 | .rpc_client = NFS_CLIENT(inode), | ||
448 | .callback_ops = &nfs_write_direct_ops, | ||
449 | .flags = RPC_TASK_ASYNC, | ||
450 | }; | ||
430 | 451 | ||
431 | dreq->count = 0; | 452 | dreq->count = 0; |
432 | get_dreq(dreq); | 453 | get_dreq(dreq); |
@@ -436,6 +457,9 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | |||
436 | 457 | ||
437 | get_dreq(dreq); | 458 | get_dreq(dreq); |
438 | 459 | ||
460 | /* Use stable writes */ | ||
461 | data->args.stable = NFS_FILE_SYNC; | ||
462 | |||
439 | /* | 463 | /* |
440 | * Reset data->res. | 464 | * Reset data->res. |
441 | */ | 465 | */ |
@@ -447,17 +471,18 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | |||
447 | * Reuse data->task; data->args should not have changed | 471 | * Reuse data->task; data->args should not have changed |
448 | * since the original request was sent. | 472 | * since the original request was sent. |
449 | */ | 473 | */ |
450 | rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, | 474 | task_setup_data.task = &data->task; |
451 | &nfs_write_direct_ops, data); | 475 | task_setup_data.callback_data = data; |
452 | NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE); | 476 | msg.rpc_argp = &data->args; |
453 | 477 | msg.rpc_resp = &data->res; | |
454 | data->task.tk_priority = RPC_PRIORITY_NORMAL; | 478 | NFS_PROTO(inode)->write_setup(data, &msg); |
455 | data->task.tk_cookie = (unsigned long) inode; | ||
456 | 479 | ||
457 | /* | 480 | /* |
458 | * We're called via an RPC callback, so BKL is already held. | 481 | * We're called via an RPC callback, so BKL is already held. |
459 | */ | 482 | */ |
460 | rpc_execute(&data->task); | 483 | task = rpc_run_task(&task_setup_data); |
484 | if (!IS_ERR(task)) | ||
485 | rpc_put_task(task); | ||
461 | 486 | ||
462 | dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n", | 487 | dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n", |
463 | data->task.tk_pid, | 488 | data->task.tk_pid, |
@@ -500,9 +525,23 @@ static const struct rpc_call_ops nfs_commit_direct_ops = { | |||
500 | static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) | 525 | static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) |
501 | { | 526 | { |
502 | struct nfs_write_data *data = dreq->commit_data; | 527 | struct nfs_write_data *data = dreq->commit_data; |
528 | struct rpc_task *task; | ||
529 | struct rpc_message msg = { | ||
530 | .rpc_argp = &data->args, | ||
531 | .rpc_resp = &data->res, | ||
532 | .rpc_cred = dreq->ctx->cred, | ||
533 | }; | ||
534 | struct rpc_task_setup task_setup_data = { | ||
535 | .task = &data->task, | ||
536 | .rpc_client = NFS_CLIENT(dreq->inode), | ||
537 | .rpc_message = &msg, | ||
538 | .callback_ops = &nfs_commit_direct_ops, | ||
539 | .callback_data = data, | ||
540 | .flags = RPC_TASK_ASYNC, | ||
541 | }; | ||
503 | 542 | ||
504 | data->inode = dreq->inode; | 543 | data->inode = dreq->inode; |
505 | data->cred = dreq->ctx->cred; | 544 | data->cred = msg.rpc_cred; |
506 | 545 | ||
507 | data->args.fh = NFS_FH(data->inode); | 546 | data->args.fh = NFS_FH(data->inode); |
508 | data->args.offset = 0; | 547 | data->args.offset = 0; |
@@ -511,18 +550,16 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) | |||
511 | data->res.fattr = &data->fattr; | 550 | data->res.fattr = &data->fattr; |
512 | data->res.verf = &data->verf; | 551 | data->res.verf = &data->verf; |
513 | 552 | ||
514 | rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC, | 553 | NFS_PROTO(data->inode)->commit_setup(data, &msg); |
515 | &nfs_commit_direct_ops, data); | ||
516 | NFS_PROTO(data->inode)->commit_setup(data, 0); | ||
517 | 554 | ||
518 | data->task.tk_priority = RPC_PRIORITY_NORMAL; | ||
519 | data->task.tk_cookie = (unsigned long)data->inode; | ||
520 | /* Note: task.tk_ops->rpc_release will free dreq->commit_data */ | 555 | /* Note: task.tk_ops->rpc_release will free dreq->commit_data */ |
521 | dreq->commit_data = NULL; | 556 | dreq->commit_data = NULL; |
522 | 557 | ||
523 | dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); | 558 | dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); |
524 | 559 | ||
525 | rpc_execute(&data->task); | 560 | task = rpc_run_task(&task_setup_data); |
561 | if (!IS_ERR(task)) | ||
562 | rpc_put_task(task); | ||
526 | } | 563 | } |
527 | 564 | ||
528 | static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) | 565 | static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) |
@@ -637,6 +674,16 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, | |||
637 | struct inode *inode = ctx->path.dentry->d_inode; | 674 | struct inode *inode = ctx->path.dentry->d_inode; |
638 | unsigned long user_addr = (unsigned long)iov->iov_base; | 675 | unsigned long user_addr = (unsigned long)iov->iov_base; |
639 | size_t count = iov->iov_len; | 676 | size_t count = iov->iov_len; |
677 | struct rpc_task *task; | ||
678 | struct rpc_message msg = { | ||
679 | .rpc_cred = ctx->cred, | ||
680 | }; | ||
681 | struct rpc_task_setup task_setup_data = { | ||
682 | .rpc_client = NFS_CLIENT(inode), | ||
683 | .rpc_message = &msg, | ||
684 | .callback_ops = &nfs_write_direct_ops, | ||
685 | .flags = RPC_TASK_ASYNC, | ||
686 | }; | ||
640 | size_t wsize = NFS_SERVER(inode)->wsize; | 687 | size_t wsize = NFS_SERVER(inode)->wsize; |
641 | unsigned int pgbase; | 688 | unsigned int pgbase; |
642 | int result; | 689 | int result; |
@@ -679,25 +726,27 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, | |||
679 | 726 | ||
680 | data->req = (struct nfs_page *) dreq; | 727 | data->req = (struct nfs_page *) dreq; |
681 | data->inode = inode; | 728 | data->inode = inode; |
682 | data->cred = ctx->cred; | 729 | data->cred = msg.rpc_cred; |
683 | data->args.fh = NFS_FH(inode); | 730 | data->args.fh = NFS_FH(inode); |
684 | data->args.context = ctx; | 731 | data->args.context = ctx; |
685 | data->args.offset = pos; | 732 | data->args.offset = pos; |
686 | data->args.pgbase = pgbase; | 733 | data->args.pgbase = pgbase; |
687 | data->args.pages = data->pagevec; | 734 | data->args.pages = data->pagevec; |
688 | data->args.count = bytes; | 735 | data->args.count = bytes; |
736 | data->args.stable = sync; | ||
689 | data->res.fattr = &data->fattr; | 737 | data->res.fattr = &data->fattr; |
690 | data->res.count = bytes; | 738 | data->res.count = bytes; |
691 | data->res.verf = &data->verf; | 739 | data->res.verf = &data->verf; |
692 | 740 | ||
693 | rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, | 741 | task_setup_data.task = &data->task; |
694 | &nfs_write_direct_ops, data); | 742 | task_setup_data.callback_data = data; |
695 | NFS_PROTO(inode)->write_setup(data, sync); | 743 | msg.rpc_argp = &data->args; |
744 | msg.rpc_resp = &data->res; | ||
745 | NFS_PROTO(inode)->write_setup(data, &msg); | ||
696 | 746 | ||
697 | data->task.tk_priority = RPC_PRIORITY_NORMAL; | 747 | task = rpc_run_task(&task_setup_data); |
698 | data->task.tk_cookie = (unsigned long) inode; | 748 | if (!IS_ERR(task)) |
699 | 749 | rpc_put_task(task); | |
700 | rpc_execute(&data->task); | ||
701 | 750 | ||
702 | dprintk("NFS: %5u initiated direct write call " | 751 | dprintk("NFS: %5u initiated direct write call " |
703 | "(req %s/%Ld, %zu bytes @ offset %Lu)\n", | 752 | "(req %s/%Ld, %zu bytes @ offset %Lu)\n", |
@@ -766,7 +815,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
766 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 815 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
767 | struct nfs_direct_req *dreq; | 816 | struct nfs_direct_req *dreq; |
768 | size_t wsize = NFS_SERVER(inode)->wsize; | 817 | size_t wsize = NFS_SERVER(inode)->wsize; |
769 | int sync = 0; | 818 | int sync = NFS_UNSTABLE; |
770 | 819 | ||
771 | dreq = nfs_direct_req_alloc(); | 820 | dreq = nfs_direct_req_alloc(); |
772 | if (!dreq) | 821 | if (!dreq) |
@@ -774,7 +823,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
774 | nfs_alloc_commit_data(dreq); | 823 | nfs_alloc_commit_data(dreq); |
775 | 824 | ||
776 | if (dreq->commit_data == NULL || count < wsize) | 825 | if (dreq->commit_data == NULL || count < wsize) |
777 | sync = FLUSH_STABLE; | 826 | sync = NFS_FILE_SYNC; |
778 | 827 | ||
779 | dreq->inode = inode; | 828 | dreq->inode = inode; |
780 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); | 829 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); |
@@ -886,8 +935,6 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
886 | retval = generic_write_checks(file, &pos, &count, 0); | 935 | retval = generic_write_checks(file, &pos, &count, 0); |
887 | if (retval) | 936 | if (retval) |
888 | goto out; | 937 | goto out; |
889 | if (!count) | ||
890 | goto out; /* return 0 */ | ||
891 | 938 | ||
892 | retval = -EINVAL; | 939 | retval = -EINVAL; |
893 | if ((ssize_t) count < 0) | 940 | if ((ssize_t) count < 0) |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index b3bb89f7d5d2..ef57a5ae5904 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -349,7 +349,9 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, | |||
349 | unlock_page(page); | 349 | unlock_page(page); |
350 | page_cache_release(page); | 350 | page_cache_release(page); |
351 | 351 | ||
352 | return status < 0 ? status : copied; | 352 | if (status < 0) |
353 | return status; | ||
354 | return copied; | ||
353 | } | 355 | } |
354 | 356 | ||
355 | static void nfs_invalidate_page(struct page *page, unsigned long offset) | 357 | static void nfs_invalidate_page(struct page *page, unsigned long offset) |
@@ -392,35 +394,27 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
392 | struct file *filp = vma->vm_file; | 394 | struct file *filp = vma->vm_file; |
393 | unsigned pagelen; | 395 | unsigned pagelen; |
394 | int ret = -EINVAL; | 396 | int ret = -EINVAL; |
395 | void *fsdata; | ||
396 | struct address_space *mapping; | 397 | struct address_space *mapping; |
397 | loff_t offset; | ||
398 | 398 | ||
399 | lock_page(page); | 399 | lock_page(page); |
400 | mapping = page->mapping; | 400 | mapping = page->mapping; |
401 | if (mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) { | 401 | if (mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) |
402 | unlock_page(page); | 402 | goto out_unlock; |
403 | return -EINVAL; | 403 | |
404 | } | 404 | ret = 0; |
405 | pagelen = nfs_page_length(page); | 405 | pagelen = nfs_page_length(page); |
406 | offset = (loff_t)page->index << PAGE_CACHE_SHIFT; | 406 | if (pagelen == 0) |
407 | unlock_page(page); | 407 | goto out_unlock; |
408 | 408 | ||
409 | /* | 409 | ret = nfs_flush_incompatible(filp, page); |
410 | * we can use mapping after releasing the page lock, because: | 410 | if (ret != 0) |
411 | * we hold mmap_sem on the fault path, which should pin the vma | 411 | goto out_unlock; |
412 | * which should pin the file, which pins the dentry which should | ||
413 | * hold a reference on inode. | ||
414 | */ | ||
415 | 412 | ||
416 | if (pagelen) { | 413 | ret = nfs_updatepage(filp, page, 0, pagelen); |
417 | struct page *page2 = NULL; | 414 | if (ret == 0) |
418 | ret = nfs_write_begin(filp, mapping, offset, pagelen, | 415 | ret = pagelen; |
419 | 0, &page2, &fsdata); | 416 | out_unlock: |
420 | if (!ret) | 417 | unlock_page(page); |
421 | ret = nfs_write_end(filp, mapping, offset, pagelen, | ||
422 | pagelen, page2, fsdata); | ||
423 | } | ||
424 | return ret; | 418 | return ret; |
425 | } | 419 | } |
426 | 420 | ||
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 0ee43843f4ec..e6242cdbaf91 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c | |||
@@ -57,6 +57,17 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i | |||
57 | } | 57 | } |
58 | /* Circumvent igrab(): we know the inode is not being freed */ | 58 | /* Circumvent igrab(): we know the inode is not being freed */ |
59 | atomic_inc(&inode->i_count); | 59 | atomic_inc(&inode->i_count); |
60 | /* | ||
61 | * Ensure that this dentry is invisible to d_find_alias(). | ||
62 | * Otherwise, it may be spliced into the tree by | ||
63 | * d_materialise_unique if a parent directory from the same | ||
64 | * filesystem gets mounted at a later time. | ||
65 | * This again causes shrink_dcache_for_umount_subtree() to | ||
66 | * Oops, since the test for IS_ROOT() will fail. | ||
67 | */ | ||
68 | spin_lock(&dcache_lock); | ||
69 | list_del_init(&sb->s_root->d_alias); | ||
70 | spin_unlock(&dcache_lock); | ||
60 | } | 71 | } |
61 | return 0; | 72 | return 0; |
62 | } | 73 | } |
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index d11eb055265c..8ae5dba2d4e5 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c | |||
@@ -72,39 +72,39 @@ module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, | |||
72 | &nfs_idmap_cache_timeout, 0644); | 72 | &nfs_idmap_cache_timeout, 0644); |
73 | 73 | ||
74 | struct idmap_hashent { | 74 | struct idmap_hashent { |
75 | unsigned long ih_expires; | 75 | unsigned long ih_expires; |
76 | __u32 ih_id; | 76 | __u32 ih_id; |
77 | int ih_namelen; | 77 | size_t ih_namelen; |
78 | char ih_name[IDMAP_NAMESZ]; | 78 | char ih_name[IDMAP_NAMESZ]; |
79 | }; | 79 | }; |
80 | 80 | ||
81 | struct idmap_hashtable { | 81 | struct idmap_hashtable { |
82 | __u8 h_type; | 82 | __u8 h_type; |
83 | struct idmap_hashent h_entries[IDMAP_HASH_SZ]; | 83 | struct idmap_hashent h_entries[IDMAP_HASH_SZ]; |
84 | }; | 84 | }; |
85 | 85 | ||
86 | struct idmap { | 86 | struct idmap { |
87 | struct dentry *idmap_dentry; | 87 | struct dentry *idmap_dentry; |
88 | wait_queue_head_t idmap_wq; | 88 | wait_queue_head_t idmap_wq; |
89 | struct idmap_msg idmap_im; | 89 | struct idmap_msg idmap_im; |
90 | struct mutex idmap_lock; /* Serializes upcalls */ | 90 | struct mutex idmap_lock; /* Serializes upcalls */ |
91 | struct mutex idmap_im_lock; /* Protects the hashtable */ | 91 | struct mutex idmap_im_lock; /* Protects the hashtable */ |
92 | struct idmap_hashtable idmap_user_hash; | 92 | struct idmap_hashtable idmap_user_hash; |
93 | struct idmap_hashtable idmap_group_hash; | 93 | struct idmap_hashtable idmap_group_hash; |
94 | }; | 94 | }; |
95 | 95 | ||
96 | static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *, | 96 | static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *, |
97 | char __user *, size_t); | 97 | char __user *, size_t); |
98 | static ssize_t idmap_pipe_downcall(struct file *, const char __user *, | 98 | static ssize_t idmap_pipe_downcall(struct file *, const char __user *, |
99 | size_t); | 99 | size_t); |
100 | static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); | 100 | static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); |
101 | 101 | ||
102 | static unsigned int fnvhash32(const void *, size_t); | 102 | static unsigned int fnvhash32(const void *, size_t); |
103 | 103 | ||
104 | static struct rpc_pipe_ops idmap_upcall_ops = { | 104 | static struct rpc_pipe_ops idmap_upcall_ops = { |
105 | .upcall = idmap_pipe_upcall, | 105 | .upcall = idmap_pipe_upcall, |
106 | .downcall = idmap_pipe_downcall, | 106 | .downcall = idmap_pipe_downcall, |
107 | .destroy_msg = idmap_pipe_destroy_msg, | 107 | .destroy_msg = idmap_pipe_destroy_msg, |
108 | }; | 108 | }; |
109 | 109 | ||
110 | int | 110 | int |
@@ -115,19 +115,20 @@ nfs_idmap_new(struct nfs_client *clp) | |||
115 | 115 | ||
116 | BUG_ON(clp->cl_idmap != NULL); | 116 | BUG_ON(clp->cl_idmap != NULL); |
117 | 117 | ||
118 | if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) | 118 | idmap = kzalloc(sizeof(*idmap), GFP_KERNEL); |
119 | return -ENOMEM; | 119 | if (idmap == NULL) |
120 | return -ENOMEM; | ||
120 | 121 | ||
121 | idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap", | 122 | idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap", |
122 | idmap, &idmap_upcall_ops, 0); | 123 | idmap, &idmap_upcall_ops, 0); |
123 | if (IS_ERR(idmap->idmap_dentry)) { | 124 | if (IS_ERR(idmap->idmap_dentry)) { |
124 | error = PTR_ERR(idmap->idmap_dentry); | 125 | error = PTR_ERR(idmap->idmap_dentry); |
125 | kfree(idmap); | 126 | kfree(idmap); |
126 | return error; | 127 | return error; |
127 | } | 128 | } |
128 | 129 | ||
129 | mutex_init(&idmap->idmap_lock); | 130 | mutex_init(&idmap->idmap_lock); |
130 | mutex_init(&idmap->idmap_im_lock); | 131 | mutex_init(&idmap->idmap_im_lock); |
131 | init_waitqueue_head(&idmap->idmap_wq); | 132 | init_waitqueue_head(&idmap->idmap_wq); |
132 | idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER; | 133 | idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER; |
133 | idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; | 134 | idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; |
@@ -192,7 +193,7 @@ idmap_lookup_id(struct idmap_hashtable *h, __u32 id) | |||
192 | * pretty trivial. | 193 | * pretty trivial. |
193 | */ | 194 | */ |
194 | static inline struct idmap_hashent * | 195 | static inline struct idmap_hashent * |
195 | idmap_alloc_name(struct idmap_hashtable *h, char *name, unsigned len) | 196 | idmap_alloc_name(struct idmap_hashtable *h, char *name, size_t len) |
196 | { | 197 | { |
197 | return idmap_name_hash(h, name, len); | 198 | return idmap_name_hash(h, name, len); |
198 | } | 199 | } |
@@ -285,7 +286,7 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, | |||
285 | memset(im, 0, sizeof(*im)); | 286 | memset(im, 0, sizeof(*im)); |
286 | mutex_unlock(&idmap->idmap_im_lock); | 287 | mutex_unlock(&idmap->idmap_im_lock); |
287 | mutex_unlock(&idmap->idmap_lock); | 288 | mutex_unlock(&idmap->idmap_lock); |
288 | return (ret); | 289 | return ret; |
289 | } | 290 | } |
290 | 291 | ||
291 | /* | 292 | /* |
@@ -354,42 +355,40 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, | |||
354 | /* RPC pipefs upcall/downcall routines */ | 355 | /* RPC pipefs upcall/downcall routines */ |
355 | static ssize_t | 356 | static ssize_t |
356 | idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, | 357 | idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, |
357 | char __user *dst, size_t buflen) | 358 | char __user *dst, size_t buflen) |
358 | { | 359 | { |
359 | char *data = (char *)msg->data + msg->copied; | 360 | char *data = (char *)msg->data + msg->copied; |
360 | ssize_t mlen = msg->len - msg->copied; | 361 | size_t mlen = min(msg->len, buflen); |
361 | ssize_t left; | 362 | unsigned long left; |
362 | 363 | ||
363 | if (mlen > buflen) | 364 | left = copy_to_user(dst, data, mlen); |
364 | mlen = buflen; | 365 | if (left == mlen) { |
365 | 366 | msg->errno = -EFAULT; | |
366 | left = copy_to_user(dst, data, mlen); | 367 | return -EFAULT; |
367 | if (left < 0) { | ||
368 | msg->errno = left; | ||
369 | return left; | ||
370 | } | 368 | } |
369 | |||
371 | mlen -= left; | 370 | mlen -= left; |
372 | msg->copied += mlen; | 371 | msg->copied += mlen; |
373 | msg->errno = 0; | 372 | msg->errno = 0; |
374 | return mlen; | 373 | return mlen; |
375 | } | 374 | } |
376 | 375 | ||
377 | static ssize_t | 376 | static ssize_t |
378 | idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) | 377 | idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) |
379 | { | 378 | { |
380 | struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); | 379 | struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); |
381 | struct idmap *idmap = (struct idmap *)rpci->private; | 380 | struct idmap *idmap = (struct idmap *)rpci->private; |
382 | struct idmap_msg im_in, *im = &idmap->idmap_im; | 381 | struct idmap_msg im_in, *im = &idmap->idmap_im; |
383 | struct idmap_hashtable *h; | 382 | struct idmap_hashtable *h; |
384 | struct idmap_hashent *he = NULL; | 383 | struct idmap_hashent *he = NULL; |
385 | int namelen_in; | 384 | size_t namelen_in; |
386 | int ret; | 385 | int ret; |
387 | 386 | ||
388 | if (mlen != sizeof(im_in)) | 387 | if (mlen != sizeof(im_in)) |
389 | return (-ENOSPC); | 388 | return -ENOSPC; |
390 | 389 | ||
391 | if (copy_from_user(&im_in, src, mlen) != 0) | 390 | if (copy_from_user(&im_in, src, mlen) != 0) |
392 | return (-EFAULT); | 391 | return -EFAULT; |
393 | 392 | ||
394 | mutex_lock(&idmap->idmap_im_lock); | 393 | mutex_lock(&idmap->idmap_im_lock); |
395 | 394 | ||
@@ -487,7 +486,7 @@ static unsigned int fnvhash32(const void *buf, size_t buflen) | |||
487 | hash ^= (unsigned int)*p; | 486 | hash ^= (unsigned int)*p; |
488 | } | 487 | } |
489 | 488 | ||
490 | return (hash); | 489 | return hash; |
491 | } | 490 | } |
492 | 491 | ||
493 | int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) | 492 | int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f68c22215b14..966a8850aa30 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -192,7 +192,7 @@ void nfs_invalidate_atime(struct inode *inode) | |||
192 | */ | 192 | */ |
193 | static void nfs_invalidate_inode(struct inode *inode) | 193 | static void nfs_invalidate_inode(struct inode *inode) |
194 | { | 194 | { |
195 | set_bit(NFS_INO_STALE, &NFS_FLAGS(inode)); | 195 | set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); |
196 | nfs_zap_caches_locked(inode); | 196 | nfs_zap_caches_locked(inode); |
197 | } | 197 | } |
198 | 198 | ||
@@ -229,7 +229,7 @@ nfs_init_locked(struct inode *inode, void *opaque) | |||
229 | struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; | 229 | struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; |
230 | struct nfs_fattr *fattr = desc->fattr; | 230 | struct nfs_fattr *fattr = desc->fattr; |
231 | 231 | ||
232 | NFS_FILEID(inode) = fattr->fileid; | 232 | set_nfs_fileid(inode, fattr->fileid); |
233 | nfs_copy_fh(NFS_FH(inode), desc->fh); | 233 | nfs_copy_fh(NFS_FH(inode), desc->fh); |
234 | return 0; | 234 | return 0; |
235 | } | 235 | } |
@@ -291,7 +291,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
291 | inode->i_fop = &nfs_dir_operations; | 291 | inode->i_fop = &nfs_dir_operations; |
292 | if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) | 292 | if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) |
293 | && fattr->size <= NFS_LIMIT_READDIRPLUS) | 293 | && fattr->size <= NFS_LIMIT_READDIRPLUS) |
294 | set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); | 294 | set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); |
295 | /* Deal with crossing mountpoints */ | 295 | /* Deal with crossing mountpoints */ |
296 | if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { | 296 | if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { |
297 | if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) | 297 | if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) |
@@ -457,9 +457,18 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
457 | int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; | 457 | int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; |
458 | int err; | 458 | int err; |
459 | 459 | ||
460 | /* Flush out writes to the server in order to update c/mtime */ | 460 | /* |
461 | if (S_ISREG(inode->i_mode)) | 461 | * Flush out writes to the server in order to update c/mtime. |
462 | * | ||
463 | * Hold the i_mutex to suspend application writes temporarily; | ||
464 | * this prevents long-running writing applications from blocking | ||
465 | * nfs_wb_nocommit. | ||
466 | */ | ||
467 | if (S_ISREG(inode->i_mode)) { | ||
468 | mutex_lock(&inode->i_mutex); | ||
462 | nfs_wb_nocommit(inode); | 469 | nfs_wb_nocommit(inode); |
470 | mutex_unlock(&inode->i_mutex); | ||
471 | } | ||
463 | 472 | ||
464 | /* | 473 | /* |
465 | * We may force a getattr if the user cares about atime. | 474 | * We may force a getattr if the user cares about atime. |
@@ -655,7 +664,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
655 | if (status == -ESTALE) { | 664 | if (status == -ESTALE) { |
656 | nfs_zap_caches(inode); | 665 | nfs_zap_caches(inode); |
657 | if (!S_ISDIR(inode->i_mode)) | 666 | if (!S_ISDIR(inode->i_mode)) |
658 | set_bit(NFS_INO_STALE, &NFS_FLAGS(inode)); | 667 | set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); |
659 | } | 668 | } |
660 | goto out; | 669 | goto out; |
661 | } | 670 | } |
@@ -810,8 +819,9 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
810 | if (S_ISDIR(inode->i_mode)) | 819 | if (S_ISDIR(inode->i_mode)) |
811 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | 820 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; |
812 | } | 821 | } |
813 | if (inode->i_size == fattr->pre_size && nfsi->npages == 0) | 822 | if (inode->i_size == nfs_size_to_loff_t(fattr->pre_size) && |
814 | inode->i_size = fattr->size; | 823 | nfsi->npages == 0) |
824 | inode->i_size = nfs_size_to_loff_t(fattr->size); | ||
815 | } | 825 | } |
816 | } | 826 | } |
817 | 827 | ||
@@ -1015,7 +1025,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1015 | dprintk("NFS: mtime change on server for file %s/%ld\n", | 1025 | dprintk("NFS: mtime change on server for file %s/%ld\n", |
1016 | inode->i_sb->s_id, inode->i_ino); | 1026 | inode->i_sb->s_id, inode->i_ino); |
1017 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; | 1027 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; |
1018 | nfsi->cache_change_attribute = now; | 1028 | if (S_ISDIR(inode->i_mode)) |
1029 | nfs_force_lookup_revalidate(inode); | ||
1019 | } | 1030 | } |
1020 | /* If ctime has changed we should definitely clear access+acl caches */ | 1031 | /* If ctime has changed we should definitely clear access+acl caches */ |
1021 | if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) | 1032 | if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) |
@@ -1024,7 +1035,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1024 | dprintk("NFS: change_attr change on server for file %s/%ld\n", | 1035 | dprintk("NFS: change_attr change on server for file %s/%ld\n", |
1025 | inode->i_sb->s_id, inode->i_ino); | 1036 | inode->i_sb->s_id, inode->i_ino); |
1026 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 1037 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
1027 | nfsi->cache_change_attribute = now; | 1038 | if (S_ISDIR(inode->i_mode)) |
1039 | nfs_force_lookup_revalidate(inode); | ||
1028 | } | 1040 | } |
1029 | 1041 | ||
1030 | /* Check if our cached file size is stale */ | 1042 | /* Check if our cached file size is stale */ |
@@ -1129,7 +1141,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1129 | void nfs4_clear_inode(struct inode *inode) | 1141 | void nfs4_clear_inode(struct inode *inode) |
1130 | { | 1142 | { |
1131 | /* If we are holding a delegation, return it! */ | 1143 | /* If we are holding a delegation, return it! */ |
1132 | nfs_inode_return_delegation(inode); | 1144 | nfs_inode_return_delegation_noreclaim(inode); |
1133 | /* First call standard NFS clear_inode() code */ | 1145 | /* First call standard NFS clear_inode() code */ |
1134 | nfs_clear_inode(inode); | 1146 | nfs_clear_inode(inode); |
1135 | } | 1147 | } |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index f3acf48412be..0f5619611b8d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -21,7 +21,8 @@ struct nfs_clone_mount { | |||
21 | struct nfs_fattr *fattr; | 21 | struct nfs_fattr *fattr; |
22 | char *hostname; | 22 | char *hostname; |
23 | char *mnt_path; | 23 | char *mnt_path; |
24 | struct sockaddr_in *addr; | 24 | struct sockaddr *addr; |
25 | size_t addrlen; | ||
25 | rpc_authflavor_t authflavor; | 26 | rpc_authflavor_t authflavor; |
26 | }; | 27 | }; |
27 | 28 | ||
@@ -41,19 +42,19 @@ struct nfs_parsed_mount_data { | |||
41 | char *client_address; | 42 | char *client_address; |
42 | 43 | ||
43 | struct { | 44 | struct { |
44 | struct sockaddr_in address; | 45 | struct sockaddr_storage address; |
46 | size_t addrlen; | ||
45 | char *hostname; | 47 | char *hostname; |
46 | unsigned int program; | ||
47 | unsigned int version; | 48 | unsigned int version; |
48 | unsigned short port; | 49 | unsigned short port; |
49 | int protocol; | 50 | int protocol; |
50 | } mount_server; | 51 | } mount_server; |
51 | 52 | ||
52 | struct { | 53 | struct { |
53 | struct sockaddr_in address; | 54 | struct sockaddr_storage address; |
55 | size_t addrlen; | ||
54 | char *hostname; | 56 | char *hostname; |
55 | char *export_path; | 57 | char *export_path; |
56 | unsigned int program; | ||
57 | int protocol; | 58 | int protocol; |
58 | } nfs_server; | 59 | } nfs_server; |
59 | }; | 60 | }; |
@@ -62,7 +63,8 @@ struct nfs_parsed_mount_data { | |||
62 | extern struct rpc_program nfs_program; | 63 | extern struct rpc_program nfs_program; |
63 | 64 | ||
64 | extern void nfs_put_client(struct nfs_client *); | 65 | extern void nfs_put_client(struct nfs_client *); |
65 | extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int); | 66 | extern struct nfs_client *nfs_find_client(const struct sockaddr *, u32); |
67 | extern struct nfs_client *nfs_find_client_next(struct nfs_client *); | ||
66 | extern struct nfs_server *nfs_create_server( | 68 | extern struct nfs_server *nfs_create_server( |
67 | const struct nfs_parsed_mount_data *, | 69 | const struct nfs_parsed_mount_data *, |
68 | struct nfs_fh *); | 70 | struct nfs_fh *); |
@@ -160,6 +162,8 @@ extern struct rpc_stat nfs_rpcstat; | |||
160 | 162 | ||
161 | extern int __init register_nfs_fs(void); | 163 | extern int __init register_nfs_fs(void); |
162 | extern void __exit unregister_nfs_fs(void); | 164 | extern void __exit unregister_nfs_fs(void); |
165 | extern void nfs_sb_active(struct nfs_server *server); | ||
166 | extern void nfs_sb_deactive(struct nfs_server *server); | ||
163 | 167 | ||
164 | /* namespace.c */ | 168 | /* namespace.c */ |
165 | extern char *nfs_path(const char *base, | 169 | extern char *nfs_path(const char *base, |
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index acfc56f9edc0..be4ce1c3a3d8 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c | |||
@@ -188,7 +188,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, | |||
188 | { | 188 | { |
189 | #ifdef CONFIG_NFS_V4 | 189 | #ifdef CONFIG_NFS_V4 |
190 | struct vfsmount *mnt = NULL; | 190 | struct vfsmount *mnt = NULL; |
191 | switch (server->nfs_client->cl_nfsversion) { | 191 | switch (server->nfs_client->rpc_ops->version) { |
192 | case 2: | 192 | case 2: |
193 | case 3: | 193 | case 3: |
194 | mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); | 194 | mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); |
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 668ab96c7b59..1f7ea675e0c5 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c | |||
@@ -262,7 +262,9 @@ static int | |||
262 | nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) | 262 | nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) |
263 | { | 263 | { |
264 | struct kvec *iov = req->rq_rcv_buf.head; | 264 | struct kvec *iov = req->rq_rcv_buf.head; |
265 | int status, count, recvd, hdrlen; | 265 | size_t hdrlen; |
266 | u32 count, recvd; | ||
267 | int status; | ||
266 | 268 | ||
267 | if ((status = ntohl(*p++))) | 269 | if ((status = ntohl(*p++))) |
268 | return -nfs_stat_to_errno(status); | 270 | return -nfs_stat_to_errno(status); |
@@ -273,7 +275,7 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) | |||
273 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; | 275 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; |
274 | if (iov->iov_len < hdrlen) { | 276 | if (iov->iov_len < hdrlen) { |
275 | dprintk("NFS: READ reply header overflowed:" | 277 | dprintk("NFS: READ reply header overflowed:" |
276 | "length %d > %Zu\n", hdrlen, iov->iov_len); | 278 | "length %Zu > %Zu\n", hdrlen, iov->iov_len); |
277 | return -errno_NFSERR_IO; | 279 | return -errno_NFSERR_IO; |
278 | } else if (iov->iov_len != hdrlen) { | 280 | } else if (iov->iov_len != hdrlen) { |
279 | dprintk("NFS: READ header is short. iovec will be shifted.\n"); | 281 | dprintk("NFS: READ header is short. iovec will be shifted.\n"); |
@@ -283,11 +285,11 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) | |||
283 | recvd = req->rq_rcv_buf.len - hdrlen; | 285 | recvd = req->rq_rcv_buf.len - hdrlen; |
284 | if (count > recvd) { | 286 | if (count > recvd) { |
285 | dprintk("NFS: server cheating in read reply: " | 287 | dprintk("NFS: server cheating in read reply: " |
286 | "count %d > recvd %d\n", count, recvd); | 288 | "count %u > recvd %u\n", count, recvd); |
287 | count = recvd; | 289 | count = recvd; |
288 | } | 290 | } |
289 | 291 | ||
290 | dprintk("RPC: readres OK count %d\n", count); | 292 | dprintk("RPC: readres OK count %u\n", count); |
291 | if (count < res->count) | 293 | if (count < res->count) |
292 | res->count = count; | 294 | res->count = count; |
293 | 295 | ||
@@ -423,9 +425,10 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) | |||
423 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; | 425 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; |
424 | struct kvec *iov = rcvbuf->head; | 426 | struct kvec *iov = rcvbuf->head; |
425 | struct page **page; | 427 | struct page **page; |
426 | int hdrlen, recvd; | 428 | size_t hdrlen; |
429 | unsigned int pglen, recvd; | ||
430 | u32 len; | ||
427 | int status, nr; | 431 | int status, nr; |
428 | unsigned int len, pglen; | ||
429 | __be32 *end, *entry, *kaddr; | 432 | __be32 *end, *entry, *kaddr; |
430 | 433 | ||
431 | if ((status = ntohl(*p++))) | 434 | if ((status = ntohl(*p++))) |
@@ -434,7 +437,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) | |||
434 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; | 437 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; |
435 | if (iov->iov_len < hdrlen) { | 438 | if (iov->iov_len < hdrlen) { |
436 | dprintk("NFS: READDIR reply header overflowed:" | 439 | dprintk("NFS: READDIR reply header overflowed:" |
437 | "length %d > %Zu\n", hdrlen, iov->iov_len); | 440 | "length %Zu > %Zu\n", hdrlen, iov->iov_len); |
438 | return -errno_NFSERR_IO; | 441 | return -errno_NFSERR_IO; |
439 | } else if (iov->iov_len != hdrlen) { | 442 | } else if (iov->iov_len != hdrlen) { |
440 | dprintk("NFS: READDIR header is short. iovec will be shifted.\n"); | 443 | dprintk("NFS: READDIR header is short. iovec will be shifted.\n"); |
@@ -576,7 +579,8 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy) | |||
576 | { | 579 | { |
577 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; | 580 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; |
578 | struct kvec *iov = rcvbuf->head; | 581 | struct kvec *iov = rcvbuf->head; |
579 | int hdrlen, len, recvd; | 582 | size_t hdrlen; |
583 | u32 len, recvd; | ||
580 | char *kaddr; | 584 | char *kaddr; |
581 | int status; | 585 | int status; |
582 | 586 | ||
@@ -584,14 +588,14 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy) | |||
584 | return -nfs_stat_to_errno(status); | 588 | return -nfs_stat_to_errno(status); |
585 | /* Convert length of symlink */ | 589 | /* Convert length of symlink */ |
586 | len = ntohl(*p++); | 590 | len = ntohl(*p++); |
587 | if (len >= rcvbuf->page_len || len <= 0) { | 591 | if (len >= rcvbuf->page_len) { |
588 | dprintk("nfs: server returned giant symlink!\n"); | 592 | dprintk("nfs: server returned giant symlink!\n"); |
589 | return -ENAMETOOLONG; | 593 | return -ENAMETOOLONG; |
590 | } | 594 | } |
591 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; | 595 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; |
592 | if (iov->iov_len < hdrlen) { | 596 | if (iov->iov_len < hdrlen) { |
593 | dprintk("NFS: READLINK reply header overflowed:" | 597 | dprintk("NFS: READLINK reply header overflowed:" |
594 | "length %d > %Zu\n", hdrlen, iov->iov_len); | 598 | "length %Zu > %Zu\n", hdrlen, iov->iov_len); |
595 | return -errno_NFSERR_IO; | 599 | return -errno_NFSERR_IO; |
596 | } else if (iov->iov_len != hdrlen) { | 600 | } else if (iov->iov_len != hdrlen) { |
597 | dprintk("NFS: READLINK header is short. iovec will be shifted.\n"); | 601 | dprintk("NFS: READLINK header is short. iovec will be shifted.\n"); |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 5ae96340f2c2..549dbce714a4 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -729,16 +729,9 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) | |||
729 | return 0; | 729 | return 0; |
730 | } | 730 | } |
731 | 731 | ||
732 | static void nfs3_proc_read_setup(struct nfs_read_data *data) | 732 | static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) |
733 | { | 733 | { |
734 | struct rpc_message msg = { | 734 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; |
735 | .rpc_proc = &nfs3_procedures[NFS3PROC_READ], | ||
736 | .rpc_argp = &data->args, | ||
737 | .rpc_resp = &data->res, | ||
738 | .rpc_cred = data->cred, | ||
739 | }; | ||
740 | |||
741 | rpc_call_setup(&data->task, &msg, 0); | ||
742 | } | 735 | } |
743 | 736 | ||
744 | static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) | 737 | static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) |
@@ -750,24 +743,9 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) | |||
750 | return 0; | 743 | return 0; |
751 | } | 744 | } |
752 | 745 | ||
753 | static void nfs3_proc_write_setup(struct nfs_write_data *data, int how) | 746 | static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) |
754 | { | 747 | { |
755 | struct rpc_message msg = { | 748 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; |
756 | .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE], | ||
757 | .rpc_argp = &data->args, | ||
758 | .rpc_resp = &data->res, | ||
759 | .rpc_cred = data->cred, | ||
760 | }; | ||
761 | |||
762 | data->args.stable = NFS_UNSTABLE; | ||
763 | if (how & FLUSH_STABLE) { | ||
764 | data->args.stable = NFS_FILE_SYNC; | ||
765 | if (NFS_I(data->inode)->ncommit) | ||
766 | data->args.stable = NFS_DATA_SYNC; | ||
767 | } | ||
768 | |||
769 | /* Finalize the task. */ | ||
770 | rpc_call_setup(&data->task, &msg, 0); | ||
771 | } | 749 | } |
772 | 750 | ||
773 | static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) | 751 | static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) |
@@ -778,22 +756,17 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) | |||
778 | return 0; | 756 | return 0; |
779 | } | 757 | } |
780 | 758 | ||
781 | static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how) | 759 | static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) |
782 | { | 760 | { |
783 | struct rpc_message msg = { | 761 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT]; |
784 | .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT], | ||
785 | .rpc_argp = &data->args, | ||
786 | .rpc_resp = &data->res, | ||
787 | .rpc_cred = data->cred, | ||
788 | }; | ||
789 | |||
790 | rpc_call_setup(&data->task, &msg, 0); | ||
791 | } | 762 | } |
792 | 763 | ||
793 | static int | 764 | static int |
794 | nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) | 765 | nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) |
795 | { | 766 | { |
796 | return nlmclnt_proc(filp->f_path.dentry->d_inode, cmd, fl); | 767 | struct inode *inode = filp->f_path.dentry->d_inode; |
768 | |||
769 | return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); | ||
797 | } | 770 | } |
798 | 771 | ||
799 | const struct nfs_rpc_ops nfs_v3_clientops = { | 772 | const struct nfs_rpc_ops nfs_v3_clientops = { |
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 616d3267b7e7..3917e2fa4e40 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c | |||
@@ -506,9 +506,9 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res | |||
506 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; | 506 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; |
507 | struct kvec *iov = rcvbuf->head; | 507 | struct kvec *iov = rcvbuf->head; |
508 | struct page **page; | 508 | struct page **page; |
509 | int hdrlen, recvd; | 509 | size_t hdrlen; |
510 | u32 len, recvd, pglen; | ||
510 | int status, nr; | 511 | int status, nr; |
511 | unsigned int len, pglen; | ||
512 | __be32 *entry, *end, *kaddr; | 512 | __be32 *entry, *end, *kaddr; |
513 | 513 | ||
514 | status = ntohl(*p++); | 514 | status = ntohl(*p++); |
@@ -527,7 +527,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res | |||
527 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; | 527 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; |
528 | if (iov->iov_len < hdrlen) { | 528 | if (iov->iov_len < hdrlen) { |
529 | dprintk("NFS: READDIR reply header overflowed:" | 529 | dprintk("NFS: READDIR reply header overflowed:" |
530 | "length %d > %Zu\n", hdrlen, iov->iov_len); | 530 | "length %Zu > %Zu\n", hdrlen, iov->iov_len); |
531 | return -errno_NFSERR_IO; | 531 | return -errno_NFSERR_IO; |
532 | } else if (iov->iov_len != hdrlen) { | 532 | } else if (iov->iov_len != hdrlen) { |
533 | dprintk("NFS: READDIR header is short. iovec will be shifted.\n"); | 533 | dprintk("NFS: READDIR header is short. iovec will be shifted.\n"); |
@@ -549,7 +549,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res | |||
549 | len = ntohl(*p++); /* string length */ | 549 | len = ntohl(*p++); /* string length */ |
550 | p += XDR_QUADLEN(len) + 2; /* name + cookie */ | 550 | p += XDR_QUADLEN(len) + 2; /* name + cookie */ |
551 | if (len > NFS3_MAXNAMLEN) { | 551 | if (len > NFS3_MAXNAMLEN) { |
552 | dprintk("NFS: giant filename in readdir (len %x)!\n", | 552 | dprintk("NFS: giant filename in readdir (len 0x%x)!\n", |
553 | len); | 553 | len); |
554 | goto err_unmap; | 554 | goto err_unmap; |
555 | } | 555 | } |
@@ -570,7 +570,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res | |||
570 | len = ntohl(*p++); | 570 | len = ntohl(*p++); |
571 | if (len > NFS3_FHSIZE) { | 571 | if (len > NFS3_FHSIZE) { |
572 | dprintk("NFS: giant filehandle in " | 572 | dprintk("NFS: giant filehandle in " |
573 | "readdir (len %x)!\n", len); | 573 | "readdir (len 0x%x)!\n", len); |
574 | goto err_unmap; | 574 | goto err_unmap; |
575 | } | 575 | } |
576 | p += XDR_QUADLEN(len); | 576 | p += XDR_QUADLEN(len); |
@@ -815,7 +815,8 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) | |||
815 | { | 815 | { |
816 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; | 816 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; |
817 | struct kvec *iov = rcvbuf->head; | 817 | struct kvec *iov = rcvbuf->head; |
818 | int hdrlen, len, recvd; | 818 | size_t hdrlen; |
819 | u32 len, recvd; | ||
819 | char *kaddr; | 820 | char *kaddr; |
820 | int status; | 821 | int status; |
821 | 822 | ||
@@ -827,7 +828,7 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) | |||
827 | 828 | ||
828 | /* Convert length of symlink */ | 829 | /* Convert length of symlink */ |
829 | len = ntohl(*p++); | 830 | len = ntohl(*p++); |
830 | if (len >= rcvbuf->page_len || len <= 0) { | 831 | if (len >= rcvbuf->page_len) { |
831 | dprintk("nfs: server returned giant symlink!\n"); | 832 | dprintk("nfs: server returned giant symlink!\n"); |
832 | return -ENAMETOOLONG; | 833 | return -ENAMETOOLONG; |
833 | } | 834 | } |
@@ -835,7 +836,7 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) | |||
835 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; | 836 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; |
836 | if (iov->iov_len < hdrlen) { | 837 | if (iov->iov_len < hdrlen) { |
837 | dprintk("NFS: READLINK reply header overflowed:" | 838 | dprintk("NFS: READLINK reply header overflowed:" |
838 | "length %d > %Zu\n", hdrlen, iov->iov_len); | 839 | "length %Zu > %Zu\n", hdrlen, iov->iov_len); |
839 | return -errno_NFSERR_IO; | 840 | return -errno_NFSERR_IO; |
840 | } else if (iov->iov_len != hdrlen) { | 841 | } else if (iov->iov_len != hdrlen) { |
841 | dprintk("NFS: READLINK header is short. " | 842 | dprintk("NFS: READLINK header is short. " |
@@ -863,7 +864,9 @@ static int | |||
863 | nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) | 864 | nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) |
864 | { | 865 | { |
865 | struct kvec *iov = req->rq_rcv_buf.head; | 866 | struct kvec *iov = req->rq_rcv_buf.head; |
866 | int status, count, ocount, recvd, hdrlen; | 867 | size_t hdrlen; |
868 | u32 count, ocount, recvd; | ||
869 | int status; | ||
867 | 870 | ||
868 | status = ntohl(*p++); | 871 | status = ntohl(*p++); |
869 | p = xdr_decode_post_op_attr(p, res->fattr); | 872 | p = xdr_decode_post_op_attr(p, res->fattr); |
@@ -871,7 +874,7 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) | |||
871 | if (status != 0) | 874 | if (status != 0) |
872 | return -nfs_stat_to_errno(status); | 875 | return -nfs_stat_to_errno(status); |
873 | 876 | ||
874 | /* Decode reply could and EOF flag. NFSv3 is somewhat redundant | 877 | /* Decode reply count and EOF flag. NFSv3 is somewhat redundant |
875 | * in that it puts the count both in the res struct and in the | 878 | * in that it puts the count both in the res struct and in the |
876 | * opaque data count. */ | 879 | * opaque data count. */ |
877 | count = ntohl(*p++); | 880 | count = ntohl(*p++); |
@@ -886,7 +889,7 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) | |||
886 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; | 889 | hdrlen = (u8 *) p - (u8 *) iov->iov_base; |
887 | if (iov->iov_len < hdrlen) { | 890 | if (iov->iov_len < hdrlen) { |
888 | dprintk("NFS: READ reply header overflowed:" | 891 | dprintk("NFS: READ reply header overflowed:" |
889 | "length %d > %Zu\n", hdrlen, iov->iov_len); | 892 | "length %Zu > %Zu\n", hdrlen, iov->iov_len); |
890 | return -errno_NFSERR_IO; | 893 | return -errno_NFSERR_IO; |
891 | } else if (iov->iov_len != hdrlen) { | 894 | } else if (iov->iov_len != hdrlen) { |
892 | dprintk("NFS: READ header is short. iovec will be shifted.\n"); | 895 | dprintk("NFS: READ header is short. iovec will be shifted.\n"); |
@@ -896,7 +899,7 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) | |||
896 | recvd = req->rq_rcv_buf.len - hdrlen; | 899 | recvd = req->rq_rcv_buf.len - hdrlen; |
897 | if (count > recvd) { | 900 | if (count > recvd) { |
898 | dprintk("NFS: server cheating in read reply: " | 901 | dprintk("NFS: server cheating in read reply: " |
899 | "count %d > recvd %d\n", count, recvd); | 902 | "count %u > recvd %u\n", count, recvd); |
900 | count = recvd; | 903 | count = recvd; |
901 | res->eof = 0; | 904 | res->eof = 0; |
902 | } | 905 | } |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b35069a2aa9e..bd1b9d663fb9 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -115,6 +115,7 @@ struct nfs4_lock_state { | |||
115 | #define NFS_LOCK_INITIALIZED 1 | 115 | #define NFS_LOCK_INITIALIZED 1 |
116 | int ls_flags; | 116 | int ls_flags; |
117 | struct nfs_seqid_counter ls_seqid; | 117 | struct nfs_seqid_counter ls_seqid; |
118 | struct rpc_sequence ls_sequence; | ||
118 | struct nfs_unique_id ls_id; | 119 | struct nfs_unique_id ls_id; |
119 | nfs4_stateid ls_stateid; | 120 | nfs4_stateid ls_stateid; |
120 | atomic_t ls_count; | 121 | atomic_t ls_count; |
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index dd5fef20c702..5f9ba41ed5bf 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c | |||
@@ -114,10 +114,7 @@ static inline int valid_ipaddr4(const char *buf) | |||
114 | * nfs_follow_referral - set up mountpoint when hitting a referral on moved error | 114 | * nfs_follow_referral - set up mountpoint when hitting a referral on moved error |
115 | * @mnt_parent - mountpoint of parent directory | 115 | * @mnt_parent - mountpoint of parent directory |
116 | * @dentry - parent directory | 116 | * @dentry - parent directory |
117 | * @fspath - fs path returned in fs_locations | 117 | * @locations - array of NFSv4 server location information |
118 | * @mntpath - mount path to new server | ||
119 | * @hostname - hostname of new server | ||
120 | * @addr - host addr of new server | ||
121 | * | 118 | * |
122 | */ | 119 | */ |
123 | static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, | 120 | static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, |
@@ -131,7 +128,8 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, | |||
131 | .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, | 128 | .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, |
132 | }; | 129 | }; |
133 | char *page = NULL, *page2 = NULL; | 130 | char *page = NULL, *page2 = NULL; |
134 | int loc, s, error; | 131 | unsigned int s; |
132 | int loc, error; | ||
135 | 133 | ||
136 | if (locations == NULL || locations->nlocations <= 0) | 134 | if (locations == NULL || locations->nlocations <= 0) |
137 | goto out; | 135 | goto out; |
@@ -174,7 +172,10 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, | |||
174 | 172 | ||
175 | s = 0; | 173 | s = 0; |
176 | while (s < location->nservers) { | 174 | while (s < location->nservers) { |
177 | struct sockaddr_in addr = {}; | 175 | struct sockaddr_in addr = { |
176 | .sin_family = AF_INET, | ||
177 | .sin_port = htons(NFS_PORT), | ||
178 | }; | ||
178 | 179 | ||
179 | if (location->servers[s].len <= 0 || | 180 | if (location->servers[s].len <= 0 || |
180 | valid_ipaddr4(location->servers[s].data) < 0) { | 181 | valid_ipaddr4(location->servers[s].data) < 0) { |
@@ -183,10 +184,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, | |||
183 | } | 184 | } |
184 | 185 | ||
185 | mountdata.hostname = location->servers[s].data; | 186 | mountdata.hostname = location->servers[s].data; |
186 | addr.sin_addr.s_addr = in_aton(mountdata.hostname); | 187 | addr.sin_addr.s_addr = in_aton(mountdata.hostname), |
187 | addr.sin_family = AF_INET; | 188 | mountdata.addr = (struct sockaddr *)&addr; |
188 | addr.sin_port = htons(NFS_PORT); | 189 | mountdata.addrlen = sizeof(addr); |
189 | mountdata.addr = &addr; | ||
190 | 190 | ||
191 | snprintf(page, PAGE_SIZE, "%s:%s", | 191 | snprintf(page, PAGE_SIZE, "%s:%s", |
192 | mountdata.hostname, | 192 | mountdata.hostname, |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c4faa43b36de..027e1095256e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -210,7 +210,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) | |||
210 | spin_lock(&dir->i_lock); | 210 | spin_lock(&dir->i_lock); |
211 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; | 211 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; |
212 | if (!cinfo->atomic || cinfo->before != nfsi->change_attr) | 212 | if (!cinfo->atomic || cinfo->before != nfsi->change_attr) |
213 | nfsi->cache_change_attribute = jiffies; | 213 | nfs_force_lookup_revalidate(dir); |
214 | nfsi->change_attr = cinfo->after; | 214 | nfsi->change_attr = cinfo->after; |
215 | spin_unlock(&dir->i_lock); | 215 | spin_unlock(&dir->i_lock); |
216 | } | 216 | } |
@@ -715,19 +715,6 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state | |||
715 | return err; | 715 | return err; |
716 | } | 716 | } |
717 | 717 | ||
718 | static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata) | ||
719 | { | ||
720 | struct nfs4_opendata *data = calldata; | ||
721 | struct rpc_message msg = { | ||
722 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], | ||
723 | .rpc_argp = &data->c_arg, | ||
724 | .rpc_resp = &data->c_res, | ||
725 | .rpc_cred = data->owner->so_cred, | ||
726 | }; | ||
727 | data->timestamp = jiffies; | ||
728 | rpc_call_setup(task, &msg, 0); | ||
729 | } | ||
730 | |||
731 | static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) | 718 | static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) |
732 | { | 719 | { |
733 | struct nfs4_opendata *data = calldata; | 720 | struct nfs4_opendata *data = calldata; |
@@ -738,10 +725,10 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) | |||
738 | if (data->rpc_status == 0) { | 725 | if (data->rpc_status == 0) { |
739 | memcpy(data->o_res.stateid.data, data->c_res.stateid.data, | 726 | memcpy(data->o_res.stateid.data, data->c_res.stateid.data, |
740 | sizeof(data->o_res.stateid.data)); | 727 | sizeof(data->o_res.stateid.data)); |
728 | nfs_confirm_seqid(&data->owner->so_seqid, 0); | ||
741 | renew_lease(data->o_res.server, data->timestamp); | 729 | renew_lease(data->o_res.server, data->timestamp); |
742 | data->rpc_done = 1; | 730 | data->rpc_done = 1; |
743 | } | 731 | } |
744 | nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status); | ||
745 | nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid); | 732 | nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid); |
746 | } | 733 | } |
747 | 734 | ||
@@ -756,7 +743,6 @@ static void nfs4_open_confirm_release(void *calldata) | |||
756 | /* In case of error, no cleanup! */ | 743 | /* In case of error, no cleanup! */ |
757 | if (!data->rpc_done) | 744 | if (!data->rpc_done) |
758 | goto out_free; | 745 | goto out_free; |
759 | nfs_confirm_seqid(&data->owner->so_seqid, 0); | ||
760 | state = nfs4_opendata_to_nfs4_state(data); | 746 | state = nfs4_opendata_to_nfs4_state(data); |
761 | if (!IS_ERR(state)) | 747 | if (!IS_ERR(state)) |
762 | nfs4_close_state(&data->path, state, data->o_arg.open_flags); | 748 | nfs4_close_state(&data->path, state, data->o_arg.open_flags); |
@@ -765,7 +751,6 @@ out_free: | |||
765 | } | 751 | } |
766 | 752 | ||
767 | static const struct rpc_call_ops nfs4_open_confirm_ops = { | 753 | static const struct rpc_call_ops nfs4_open_confirm_ops = { |
768 | .rpc_call_prepare = nfs4_open_confirm_prepare, | ||
769 | .rpc_call_done = nfs4_open_confirm_done, | 754 | .rpc_call_done = nfs4_open_confirm_done, |
770 | .rpc_release = nfs4_open_confirm_release, | 755 | .rpc_release = nfs4_open_confirm_release, |
771 | }; | 756 | }; |
@@ -777,12 +762,26 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) | |||
777 | { | 762 | { |
778 | struct nfs_server *server = NFS_SERVER(data->dir->d_inode); | 763 | struct nfs_server *server = NFS_SERVER(data->dir->d_inode); |
779 | struct rpc_task *task; | 764 | struct rpc_task *task; |
765 | struct rpc_message msg = { | ||
766 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], | ||
767 | .rpc_argp = &data->c_arg, | ||
768 | .rpc_resp = &data->c_res, | ||
769 | .rpc_cred = data->owner->so_cred, | ||
770 | }; | ||
771 | struct rpc_task_setup task_setup_data = { | ||
772 | .rpc_client = server->client, | ||
773 | .rpc_message = &msg, | ||
774 | .callback_ops = &nfs4_open_confirm_ops, | ||
775 | .callback_data = data, | ||
776 | .flags = RPC_TASK_ASYNC, | ||
777 | }; | ||
780 | int status; | 778 | int status; |
781 | 779 | ||
782 | kref_get(&data->kref); | 780 | kref_get(&data->kref); |
783 | data->rpc_done = 0; | 781 | data->rpc_done = 0; |
784 | data->rpc_status = 0; | 782 | data->rpc_status = 0; |
785 | task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data); | 783 | data->timestamp = jiffies; |
784 | task = rpc_run_task(&task_setup_data); | ||
786 | if (IS_ERR(task)) | 785 | if (IS_ERR(task)) |
787 | return PTR_ERR(task); | 786 | return PTR_ERR(task); |
788 | status = nfs4_wait_for_completion_rpc_task(task); | 787 | status = nfs4_wait_for_completion_rpc_task(task); |
@@ -799,13 +798,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) | |||
799 | { | 798 | { |
800 | struct nfs4_opendata *data = calldata; | 799 | struct nfs4_opendata *data = calldata; |
801 | struct nfs4_state_owner *sp = data->owner; | 800 | struct nfs4_state_owner *sp = data->owner; |
802 | struct rpc_message msg = { | 801 | |
803 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN], | ||
804 | .rpc_argp = &data->o_arg, | ||
805 | .rpc_resp = &data->o_res, | ||
806 | .rpc_cred = sp->so_cred, | ||
807 | }; | ||
808 | |||
809 | if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) | 802 | if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) |
810 | return; | 803 | return; |
811 | /* | 804 | /* |
@@ -830,11 +823,11 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) | |||
830 | data->o_arg.id = sp->so_owner_id.id; | 823 | data->o_arg.id = sp->so_owner_id.id; |
831 | data->o_arg.clientid = sp->so_client->cl_clientid; | 824 | data->o_arg.clientid = sp->so_client->cl_clientid; |
832 | if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { | 825 | if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { |
833 | msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; | 826 | task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; |
834 | nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); | 827 | nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); |
835 | } | 828 | } |
836 | data->timestamp = jiffies; | 829 | data->timestamp = jiffies; |
837 | rpc_call_setup(task, &msg, 0); | 830 | rpc_call_start(task); |
838 | return; | 831 | return; |
839 | out_no_action: | 832 | out_no_action: |
840 | task->tk_action = NULL; | 833 | task->tk_action = NULL; |
@@ -883,7 +876,6 @@ static void nfs4_open_release(void *calldata) | |||
883 | /* In case we need an open_confirm, no cleanup! */ | 876 | /* In case we need an open_confirm, no cleanup! */ |
884 | if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) | 877 | if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) |
885 | goto out_free; | 878 | goto out_free; |
886 | nfs_confirm_seqid(&data->owner->so_seqid, 0); | ||
887 | state = nfs4_opendata_to_nfs4_state(data); | 879 | state = nfs4_opendata_to_nfs4_state(data); |
888 | if (!IS_ERR(state)) | 880 | if (!IS_ERR(state)) |
889 | nfs4_close_state(&data->path, state, data->o_arg.open_flags); | 881 | nfs4_close_state(&data->path, state, data->o_arg.open_flags); |
@@ -907,13 +899,26 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) | |||
907 | struct nfs_openargs *o_arg = &data->o_arg; | 899 | struct nfs_openargs *o_arg = &data->o_arg; |
908 | struct nfs_openres *o_res = &data->o_res; | 900 | struct nfs_openres *o_res = &data->o_res; |
909 | struct rpc_task *task; | 901 | struct rpc_task *task; |
902 | struct rpc_message msg = { | ||
903 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN], | ||
904 | .rpc_argp = o_arg, | ||
905 | .rpc_resp = o_res, | ||
906 | .rpc_cred = data->owner->so_cred, | ||
907 | }; | ||
908 | struct rpc_task_setup task_setup_data = { | ||
909 | .rpc_client = server->client, | ||
910 | .rpc_message = &msg, | ||
911 | .callback_ops = &nfs4_open_ops, | ||
912 | .callback_data = data, | ||
913 | .flags = RPC_TASK_ASYNC, | ||
914 | }; | ||
910 | int status; | 915 | int status; |
911 | 916 | ||
912 | kref_get(&data->kref); | 917 | kref_get(&data->kref); |
913 | data->rpc_done = 0; | 918 | data->rpc_done = 0; |
914 | data->rpc_status = 0; | 919 | data->rpc_status = 0; |
915 | data->cancelled = 0; | 920 | data->cancelled = 0; |
916 | task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data); | 921 | task = rpc_run_task(&task_setup_data); |
917 | if (IS_ERR(task)) | 922 | if (IS_ERR(task)) |
918 | return PTR_ERR(task); | 923 | return PTR_ERR(task); |
919 | status = nfs4_wait_for_completion_rpc_task(task); | 924 | status = nfs4_wait_for_completion_rpc_task(task); |
@@ -1243,12 +1248,6 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) | |||
1243 | { | 1248 | { |
1244 | struct nfs4_closedata *calldata = data; | 1249 | struct nfs4_closedata *calldata = data; |
1245 | struct nfs4_state *state = calldata->state; | 1250 | struct nfs4_state *state = calldata->state; |
1246 | struct rpc_message msg = { | ||
1247 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], | ||
1248 | .rpc_argp = &calldata->arg, | ||
1249 | .rpc_resp = &calldata->res, | ||
1250 | .rpc_cred = state->owner->so_cred, | ||
1251 | }; | ||
1252 | int clear_rd, clear_wr, clear_rdwr; | 1251 | int clear_rd, clear_wr, clear_rdwr; |
1253 | 1252 | ||
1254 | if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) | 1253 | if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) |
@@ -1275,14 +1274,14 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) | |||
1275 | } | 1274 | } |
1276 | nfs_fattr_init(calldata->res.fattr); | 1275 | nfs_fattr_init(calldata->res.fattr); |
1277 | if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) { | 1276 | if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) { |
1278 | msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; | 1277 | task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; |
1279 | calldata->arg.open_flags = FMODE_READ; | 1278 | calldata->arg.open_flags = FMODE_READ; |
1280 | } else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) { | 1279 | } else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) { |
1281 | msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; | 1280 | task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; |
1282 | calldata->arg.open_flags = FMODE_WRITE; | 1281 | calldata->arg.open_flags = FMODE_WRITE; |
1283 | } | 1282 | } |
1284 | calldata->timestamp = jiffies; | 1283 | calldata->timestamp = jiffies; |
1285 | rpc_call_setup(task, &msg, 0); | 1284 | rpc_call_start(task); |
1286 | } | 1285 | } |
1287 | 1286 | ||
1288 | static const struct rpc_call_ops nfs4_close_ops = { | 1287 | static const struct rpc_call_ops nfs4_close_ops = { |
@@ -1308,6 +1307,16 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) | |||
1308 | struct nfs4_closedata *calldata; | 1307 | struct nfs4_closedata *calldata; |
1309 | struct nfs4_state_owner *sp = state->owner; | 1308 | struct nfs4_state_owner *sp = state->owner; |
1310 | struct rpc_task *task; | 1309 | struct rpc_task *task; |
1310 | struct rpc_message msg = { | ||
1311 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], | ||
1312 | .rpc_cred = state->owner->so_cred, | ||
1313 | }; | ||
1314 | struct rpc_task_setup task_setup_data = { | ||
1315 | .rpc_client = server->client, | ||
1316 | .rpc_message = &msg, | ||
1317 | .callback_ops = &nfs4_close_ops, | ||
1318 | .flags = RPC_TASK_ASYNC, | ||
1319 | }; | ||
1311 | int status = -ENOMEM; | 1320 | int status = -ENOMEM; |
1312 | 1321 | ||
1313 | calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); | 1322 | calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); |
@@ -1327,7 +1336,10 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) | |||
1327 | calldata->path.mnt = mntget(path->mnt); | 1336 | calldata->path.mnt = mntget(path->mnt); |
1328 | calldata->path.dentry = dget(path->dentry); | 1337 | calldata->path.dentry = dget(path->dentry); |
1329 | 1338 | ||
1330 | task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata); | 1339 | msg.rpc_argp = &calldata->arg, |
1340 | msg.rpc_resp = &calldata->res, | ||
1341 | task_setup_data.callback_data = calldata; | ||
1342 | task = rpc_run_task(&task_setup_data); | ||
1331 | if (IS_ERR(task)) | 1343 | if (IS_ERR(task)) |
1332 | return PTR_ERR(task); | 1344 | return PTR_ERR(task); |
1333 | status = 0; | 1345 | status = 0; |
@@ -2413,18 +2425,10 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) | |||
2413 | return 0; | 2425 | return 0; |
2414 | } | 2426 | } |
2415 | 2427 | ||
2416 | static void nfs4_proc_read_setup(struct nfs_read_data *data) | 2428 | static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) |
2417 | { | 2429 | { |
2418 | struct rpc_message msg = { | ||
2419 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ], | ||
2420 | .rpc_argp = &data->args, | ||
2421 | .rpc_resp = &data->res, | ||
2422 | .rpc_cred = data->cred, | ||
2423 | }; | ||
2424 | |||
2425 | data->timestamp = jiffies; | 2430 | data->timestamp = jiffies; |
2426 | 2431 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; | |
2427 | rpc_call_setup(&data->task, &msg, 0); | ||
2428 | } | 2432 | } |
2429 | 2433 | ||
2430 | static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) | 2434 | static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) |
@@ -2442,33 +2446,15 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) | |||
2442 | return 0; | 2446 | return 0; |
2443 | } | 2447 | } |
2444 | 2448 | ||
2445 | static void nfs4_proc_write_setup(struct nfs_write_data *data, int how) | 2449 | static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) |
2446 | { | 2450 | { |
2447 | struct rpc_message msg = { | 2451 | struct nfs_server *server = NFS_SERVER(data->inode); |
2448 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE], | 2452 | |
2449 | .rpc_argp = &data->args, | ||
2450 | .rpc_resp = &data->res, | ||
2451 | .rpc_cred = data->cred, | ||
2452 | }; | ||
2453 | struct inode *inode = data->inode; | ||
2454 | struct nfs_server *server = NFS_SERVER(inode); | ||
2455 | int stable; | ||
2456 | |||
2457 | if (how & FLUSH_STABLE) { | ||
2458 | if (!NFS_I(inode)->ncommit) | ||
2459 | stable = NFS_FILE_SYNC; | ||
2460 | else | ||
2461 | stable = NFS_DATA_SYNC; | ||
2462 | } else | ||
2463 | stable = NFS_UNSTABLE; | ||
2464 | data->args.stable = stable; | ||
2465 | data->args.bitmask = server->attr_bitmask; | 2453 | data->args.bitmask = server->attr_bitmask; |
2466 | data->res.server = server; | 2454 | data->res.server = server; |
2467 | |||
2468 | data->timestamp = jiffies; | 2455 | data->timestamp = jiffies; |
2469 | 2456 | ||
2470 | /* Finalize the task. */ | 2457 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; |
2471 | rpc_call_setup(&data->task, &msg, 0); | ||
2472 | } | 2458 | } |
2473 | 2459 | ||
2474 | static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) | 2460 | static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) |
@@ -2483,20 +2469,13 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) | |||
2483 | return 0; | 2469 | return 0; |
2484 | } | 2470 | } |
2485 | 2471 | ||
2486 | static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how) | 2472 | static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) |
2487 | { | 2473 | { |
2488 | struct rpc_message msg = { | ||
2489 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT], | ||
2490 | .rpc_argp = &data->args, | ||
2491 | .rpc_resp = &data->res, | ||
2492 | .rpc_cred = data->cred, | ||
2493 | }; | ||
2494 | struct nfs_server *server = NFS_SERVER(data->inode); | 2474 | struct nfs_server *server = NFS_SERVER(data->inode); |
2495 | 2475 | ||
2496 | data->args.bitmask = server->attr_bitmask; | 2476 | data->args.bitmask = server->attr_bitmask; |
2497 | data->res.server = server; | 2477 | data->res.server = server; |
2498 | 2478 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; | |
2499 | rpc_call_setup(&data->task, &msg, 0); | ||
2500 | } | 2479 | } |
2501 | 2480 | ||
2502 | /* | 2481 | /* |
@@ -2899,14 +2878,20 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po | |||
2899 | 2878 | ||
2900 | for(;;) { | 2879 | for(;;) { |
2901 | setclientid.sc_name_len = scnprintf(setclientid.sc_name, | 2880 | setclientid.sc_name_len = scnprintf(setclientid.sc_name, |
2902 | sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u", | 2881 | sizeof(setclientid.sc_name), "%s/%s %s %s %u", |
2903 | clp->cl_ipaddr, NIPQUAD(clp->cl_addr.sin_addr), | 2882 | clp->cl_ipaddr, |
2883 | rpc_peeraddr2str(clp->cl_rpcclient, | ||
2884 | RPC_DISPLAY_ADDR), | ||
2885 | rpc_peeraddr2str(clp->cl_rpcclient, | ||
2886 | RPC_DISPLAY_PROTO), | ||
2904 | cred->cr_ops->cr_name, | 2887 | cred->cr_ops->cr_name, |
2905 | clp->cl_id_uniquifier); | 2888 | clp->cl_id_uniquifier); |
2906 | setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, | 2889 | setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, |
2907 | sizeof(setclientid.sc_netid), "tcp"); | 2890 | sizeof(setclientid.sc_netid), |
2891 | rpc_peeraddr2str(clp->cl_rpcclient, | ||
2892 | RPC_DISPLAY_NETID)); | ||
2908 | setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, | 2893 | setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, |
2909 | sizeof(setclientid.sc_uaddr), "%s.%d.%d", | 2894 | sizeof(setclientid.sc_uaddr), "%s.%u.%u", |
2910 | clp->cl_ipaddr, port >> 8, port & 255); | 2895 | clp->cl_ipaddr, port >> 8, port & 255); |
2911 | 2896 | ||
2912 | status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); | 2897 | status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); |
@@ -2970,25 +2955,11 @@ struct nfs4_delegreturndata { | |||
2970 | struct nfs4_delegreturnres res; | 2955 | struct nfs4_delegreturnres res; |
2971 | struct nfs_fh fh; | 2956 | struct nfs_fh fh; |
2972 | nfs4_stateid stateid; | 2957 | nfs4_stateid stateid; |
2973 | struct rpc_cred *cred; | ||
2974 | unsigned long timestamp; | 2958 | unsigned long timestamp; |
2975 | struct nfs_fattr fattr; | 2959 | struct nfs_fattr fattr; |
2976 | int rpc_status; | 2960 | int rpc_status; |
2977 | }; | 2961 | }; |
2978 | 2962 | ||
2979 | static void nfs4_delegreturn_prepare(struct rpc_task *task, void *calldata) | ||
2980 | { | ||
2981 | struct nfs4_delegreturndata *data = calldata; | ||
2982 | struct rpc_message msg = { | ||
2983 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN], | ||
2984 | .rpc_argp = &data->args, | ||
2985 | .rpc_resp = &data->res, | ||
2986 | .rpc_cred = data->cred, | ||
2987 | }; | ||
2988 | nfs_fattr_init(data->res.fattr); | ||
2989 | rpc_call_setup(task, &msg, 0); | ||
2990 | } | ||
2991 | |||
2992 | static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) | 2963 | static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) |
2993 | { | 2964 | { |
2994 | struct nfs4_delegreturndata *data = calldata; | 2965 | struct nfs4_delegreturndata *data = calldata; |
@@ -2999,24 +2970,30 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) | |||
2999 | 2970 | ||
3000 | static void nfs4_delegreturn_release(void *calldata) | 2971 | static void nfs4_delegreturn_release(void *calldata) |
3001 | { | 2972 | { |
3002 | struct nfs4_delegreturndata *data = calldata; | ||
3003 | |||
3004 | put_rpccred(data->cred); | ||
3005 | kfree(calldata); | 2973 | kfree(calldata); |
3006 | } | 2974 | } |
3007 | 2975 | ||
3008 | static const struct rpc_call_ops nfs4_delegreturn_ops = { | 2976 | static const struct rpc_call_ops nfs4_delegreturn_ops = { |
3009 | .rpc_call_prepare = nfs4_delegreturn_prepare, | ||
3010 | .rpc_call_done = nfs4_delegreturn_done, | 2977 | .rpc_call_done = nfs4_delegreturn_done, |
3011 | .rpc_release = nfs4_delegreturn_release, | 2978 | .rpc_release = nfs4_delegreturn_release, |
3012 | }; | 2979 | }; |
3013 | 2980 | ||
3014 | static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid) | 2981 | static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync) |
3015 | { | 2982 | { |
3016 | struct nfs4_delegreturndata *data; | 2983 | struct nfs4_delegreturndata *data; |
3017 | struct nfs_server *server = NFS_SERVER(inode); | 2984 | struct nfs_server *server = NFS_SERVER(inode); |
3018 | struct rpc_task *task; | 2985 | struct rpc_task *task; |
3019 | int status; | 2986 | struct rpc_message msg = { |
2987 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN], | ||
2988 | .rpc_cred = cred, | ||
2989 | }; | ||
2990 | struct rpc_task_setup task_setup_data = { | ||
2991 | .rpc_client = server->client, | ||
2992 | .rpc_message = &msg, | ||
2993 | .callback_ops = &nfs4_delegreturn_ops, | ||
2994 | .flags = RPC_TASK_ASYNC, | ||
2995 | }; | ||
2996 | int status = 0; | ||
3020 | 2997 | ||
3021 | data = kmalloc(sizeof(*data), GFP_KERNEL); | 2998 | data = kmalloc(sizeof(*data), GFP_KERNEL); |
3022 | if (data == NULL) | 2999 | if (data == NULL) |
@@ -3028,30 +3005,37 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co | |||
3028 | memcpy(&data->stateid, stateid, sizeof(data->stateid)); | 3005 | memcpy(&data->stateid, stateid, sizeof(data->stateid)); |
3029 | data->res.fattr = &data->fattr; | 3006 | data->res.fattr = &data->fattr; |
3030 | data->res.server = server; | 3007 | data->res.server = server; |
3031 | data->cred = get_rpccred(cred); | 3008 | nfs_fattr_init(data->res.fattr); |
3032 | data->timestamp = jiffies; | 3009 | data->timestamp = jiffies; |
3033 | data->rpc_status = 0; | 3010 | data->rpc_status = 0; |
3034 | 3011 | ||
3035 | task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data); | 3012 | task_setup_data.callback_data = data; |
3013 | msg.rpc_argp = &data->args, | ||
3014 | msg.rpc_resp = &data->res, | ||
3015 | task = rpc_run_task(&task_setup_data); | ||
3036 | if (IS_ERR(task)) | 3016 | if (IS_ERR(task)) |
3037 | return PTR_ERR(task); | 3017 | return PTR_ERR(task); |
3018 | if (!issync) | ||
3019 | goto out; | ||
3038 | status = nfs4_wait_for_completion_rpc_task(task); | 3020 | status = nfs4_wait_for_completion_rpc_task(task); |
3039 | if (status == 0) { | 3021 | if (status != 0) |
3040 | status = data->rpc_status; | 3022 | goto out; |
3041 | if (status == 0) | 3023 | status = data->rpc_status; |
3042 | nfs_refresh_inode(inode, &data->fattr); | 3024 | if (status != 0) |
3043 | } | 3025 | goto out; |
3026 | nfs_refresh_inode(inode, &data->fattr); | ||
3027 | out: | ||
3044 | rpc_put_task(task); | 3028 | rpc_put_task(task); |
3045 | return status; | 3029 | return status; |
3046 | } | 3030 | } |
3047 | 3031 | ||
3048 | int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid) | 3032 | int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync) |
3049 | { | 3033 | { |
3050 | struct nfs_server *server = NFS_SERVER(inode); | 3034 | struct nfs_server *server = NFS_SERVER(inode); |
3051 | struct nfs4_exception exception = { }; | 3035 | struct nfs4_exception exception = { }; |
3052 | int err; | 3036 | int err; |
3053 | do { | 3037 | do { |
3054 | err = _nfs4_proc_delegreturn(inode, cred, stateid); | 3038 | err = _nfs4_proc_delegreturn(inode, cred, stateid, issync); |
3055 | switch (err) { | 3039 | switch (err) { |
3056 | case -NFS4ERR_STALE_STATEID: | 3040 | case -NFS4ERR_STALE_STATEID: |
3057 | case -NFS4ERR_EXPIRED: | 3041 | case -NFS4ERR_EXPIRED: |
@@ -3219,12 +3203,6 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) | |||
3219 | static void nfs4_locku_prepare(struct rpc_task *task, void *data) | 3203 | static void nfs4_locku_prepare(struct rpc_task *task, void *data) |
3220 | { | 3204 | { |
3221 | struct nfs4_unlockdata *calldata = data; | 3205 | struct nfs4_unlockdata *calldata = data; |
3222 | struct rpc_message msg = { | ||
3223 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], | ||
3224 | .rpc_argp = &calldata->arg, | ||
3225 | .rpc_resp = &calldata->res, | ||
3226 | .rpc_cred = calldata->lsp->ls_state->owner->so_cred, | ||
3227 | }; | ||
3228 | 3206 | ||
3229 | if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) | 3207 | if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) |
3230 | return; | 3208 | return; |
@@ -3234,7 +3212,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) | |||
3234 | return; | 3212 | return; |
3235 | } | 3213 | } |
3236 | calldata->timestamp = jiffies; | 3214 | calldata->timestamp = jiffies; |
3237 | rpc_call_setup(task, &msg, 0); | 3215 | rpc_call_start(task); |
3238 | } | 3216 | } |
3239 | 3217 | ||
3240 | static const struct rpc_call_ops nfs4_locku_ops = { | 3218 | static const struct rpc_call_ops nfs4_locku_ops = { |
@@ -3249,6 +3227,16 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, | |||
3249 | struct nfs_seqid *seqid) | 3227 | struct nfs_seqid *seqid) |
3250 | { | 3228 | { |
3251 | struct nfs4_unlockdata *data; | 3229 | struct nfs4_unlockdata *data; |
3230 | struct rpc_message msg = { | ||
3231 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], | ||
3232 | .rpc_cred = ctx->cred, | ||
3233 | }; | ||
3234 | struct rpc_task_setup task_setup_data = { | ||
3235 | .rpc_client = NFS_CLIENT(lsp->ls_state->inode), | ||
3236 | .rpc_message = &msg, | ||
3237 | .callback_ops = &nfs4_locku_ops, | ||
3238 | .flags = RPC_TASK_ASYNC, | ||
3239 | }; | ||
3252 | 3240 | ||
3253 | /* Ensure this is an unlock - when canceling a lock, the | 3241 | /* Ensure this is an unlock - when canceling a lock, the |
3254 | * canceled lock is passed in, and it won't be an unlock. | 3242 | * canceled lock is passed in, and it won't be an unlock. |
@@ -3261,7 +3249,10 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, | |||
3261 | return ERR_PTR(-ENOMEM); | 3249 | return ERR_PTR(-ENOMEM); |
3262 | } | 3250 | } |
3263 | 3251 | ||
3264 | return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data); | 3252 | msg.rpc_argp = &data->arg, |
3253 | msg.rpc_resp = &data->res, | ||
3254 | task_setup_data.callback_data = data; | ||
3255 | return rpc_run_task(&task_setup_data); | ||
3265 | } | 3256 | } |
3266 | 3257 | ||
3267 | static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) | 3258 | static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) |
@@ -3320,9 +3311,12 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, | |||
3320 | 3311 | ||
3321 | p->arg.fh = NFS_FH(inode); | 3312 | p->arg.fh = NFS_FH(inode); |
3322 | p->arg.fl = &p->fl; | 3313 | p->arg.fl = &p->fl; |
3314 | p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid); | ||
3315 | if (p->arg.open_seqid == NULL) | ||
3316 | goto out_free; | ||
3323 | p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); | 3317 | p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); |
3324 | if (p->arg.lock_seqid == NULL) | 3318 | if (p->arg.lock_seqid == NULL) |
3325 | goto out_free; | 3319 | goto out_free_seqid; |
3326 | p->arg.lock_stateid = &lsp->ls_stateid; | 3320 | p->arg.lock_stateid = &lsp->ls_stateid; |
3327 | p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; | 3321 | p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; |
3328 | p->arg.lock_owner.id = lsp->ls_id.id; | 3322 | p->arg.lock_owner.id = lsp->ls_id.id; |
@@ -3331,6 +3325,8 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, | |||
3331 | p->ctx = get_nfs_open_context(ctx); | 3325 | p->ctx = get_nfs_open_context(ctx); |
3332 | memcpy(&p->fl, fl, sizeof(p->fl)); | 3326 | memcpy(&p->fl, fl, sizeof(p->fl)); |
3333 | return p; | 3327 | return p; |
3328 | out_free_seqid: | ||
3329 | nfs_free_seqid(p->arg.open_seqid); | ||
3334 | out_free: | 3330 | out_free: |
3335 | kfree(p); | 3331 | kfree(p); |
3336 | return NULL; | 3332 | return NULL; |
@@ -3340,31 +3336,20 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) | |||
3340 | { | 3336 | { |
3341 | struct nfs4_lockdata *data = calldata; | 3337 | struct nfs4_lockdata *data = calldata; |
3342 | struct nfs4_state *state = data->lsp->ls_state; | 3338 | struct nfs4_state *state = data->lsp->ls_state; |
3343 | struct nfs4_state_owner *sp = state->owner; | ||
3344 | struct rpc_message msg = { | ||
3345 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK], | ||
3346 | .rpc_argp = &data->arg, | ||
3347 | .rpc_resp = &data->res, | ||
3348 | .rpc_cred = sp->so_cred, | ||
3349 | }; | ||
3350 | 3339 | ||
3340 | dprintk("%s: begin!\n", __FUNCTION__); | ||
3351 | if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) | 3341 | if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) |
3352 | return; | 3342 | return; |
3353 | dprintk("%s: begin!\n", __FUNCTION__); | ||
3354 | /* Do we need to do an open_to_lock_owner? */ | 3343 | /* Do we need to do an open_to_lock_owner? */ |
3355 | if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { | 3344 | if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { |
3356 | data->arg.open_seqid = nfs_alloc_seqid(&sp->so_seqid); | 3345 | if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) |
3357 | if (data->arg.open_seqid == NULL) { | 3346 | return; |
3358 | data->rpc_status = -ENOMEM; | ||
3359 | task->tk_action = NULL; | ||
3360 | goto out; | ||
3361 | } | ||
3362 | data->arg.open_stateid = &state->stateid; | 3347 | data->arg.open_stateid = &state->stateid; |
3363 | data->arg.new_lock_owner = 1; | 3348 | data->arg.new_lock_owner = 1; |
3364 | } | 3349 | } else |
3350 | data->arg.new_lock_owner = 0; | ||
3365 | data->timestamp = jiffies; | 3351 | data->timestamp = jiffies; |
3366 | rpc_call_setup(task, &msg, 0); | 3352 | rpc_call_start(task); |
3367 | out: | ||
3368 | dprintk("%s: done!, ret = %d\n", __FUNCTION__, data->rpc_status); | 3353 | dprintk("%s: done!, ret = %d\n", __FUNCTION__, data->rpc_status); |
3369 | } | 3354 | } |
3370 | 3355 | ||
@@ -3400,8 +3385,7 @@ static void nfs4_lock_release(void *calldata) | |||
3400 | struct nfs4_lockdata *data = calldata; | 3385 | struct nfs4_lockdata *data = calldata; |
3401 | 3386 | ||
3402 | dprintk("%s: begin!\n", __FUNCTION__); | 3387 | dprintk("%s: begin!\n", __FUNCTION__); |
3403 | if (data->arg.open_seqid != NULL) | 3388 | nfs_free_seqid(data->arg.open_seqid); |
3404 | nfs_free_seqid(data->arg.open_seqid); | ||
3405 | if (data->cancelled != 0) { | 3389 | if (data->cancelled != 0) { |
3406 | struct rpc_task *task; | 3390 | struct rpc_task *task; |
3407 | task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, | 3391 | task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, |
@@ -3427,6 +3411,16 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f | |||
3427 | { | 3411 | { |
3428 | struct nfs4_lockdata *data; | 3412 | struct nfs4_lockdata *data; |
3429 | struct rpc_task *task; | 3413 | struct rpc_task *task; |
3414 | struct rpc_message msg = { | ||
3415 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK], | ||
3416 | .rpc_cred = state->owner->so_cred, | ||
3417 | }; | ||
3418 | struct rpc_task_setup task_setup_data = { | ||
3419 | .rpc_client = NFS_CLIENT(state->inode), | ||
3420 | .rpc_message = &msg, | ||
3421 | .callback_ops = &nfs4_lock_ops, | ||
3422 | .flags = RPC_TASK_ASYNC, | ||
3423 | }; | ||
3430 | int ret; | 3424 | int ret; |
3431 | 3425 | ||
3432 | dprintk("%s: begin!\n", __FUNCTION__); | 3426 | dprintk("%s: begin!\n", __FUNCTION__); |
@@ -3438,8 +3432,10 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f | |||
3438 | data->arg.block = 1; | 3432 | data->arg.block = 1; |
3439 | if (reclaim != 0) | 3433 | if (reclaim != 0) |
3440 | data->arg.reclaim = 1; | 3434 | data->arg.reclaim = 1; |
3441 | task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC, | 3435 | msg.rpc_argp = &data->arg, |
3442 | &nfs4_lock_ops, data); | 3436 | msg.rpc_resp = &data->res, |
3437 | task_setup_data.callback_data = data; | ||
3438 | task = rpc_run_task(&task_setup_data); | ||
3443 | if (IS_ERR(task)) | 3439 | if (IS_ERR(task)) |
3444 | return PTR_ERR(task); | 3440 | return PTR_ERR(task); |
3445 | ret = nfs4_wait_for_completion_rpc_task(task); | 3441 | ret = nfs4_wait_for_completion_rpc_task(task); |
@@ -3612,10 +3608,6 @@ int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, | |||
3612 | if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) | 3608 | if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) |
3613 | return -EOPNOTSUPP; | 3609 | return -EOPNOTSUPP; |
3614 | 3610 | ||
3615 | if (!S_ISREG(inode->i_mode) && | ||
3616 | (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) | ||
3617 | return -EPERM; | ||
3618 | |||
3619 | return nfs4_proc_set_acl(inode, buf, buflen); | 3611 | return nfs4_proc_set_acl(inode, buf, buflen); |
3620 | } | 3612 | } |
3621 | 3613 | ||
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 3ea352d82eba..5e2e4af1a0e6 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c | |||
@@ -133,9 +133,7 @@ nfs4_renewd_prepare_shutdown(struct nfs_server *server) | |||
133 | void | 133 | void |
134 | nfs4_kill_renewd(struct nfs_client *clp) | 134 | nfs4_kill_renewd(struct nfs_client *clp) |
135 | { | 135 | { |
136 | down_read(&clp->cl_sem); | ||
137 | cancel_delayed_work_sync(&clp->cl_renewd); | 136 | cancel_delayed_work_sync(&clp->cl_renewd); |
138 | up_read(&clp->cl_sem); | ||
139 | } | 137 | } |
140 | 138 | ||
141 | /* | 139 | /* |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 23a9a36556bf..f9c7432471dc 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -509,7 +509,10 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f | |||
509 | lsp = kzalloc(sizeof(*lsp), GFP_KERNEL); | 509 | lsp = kzalloc(sizeof(*lsp), GFP_KERNEL); |
510 | if (lsp == NULL) | 510 | if (lsp == NULL) |
511 | return NULL; | 511 | return NULL; |
512 | lsp->ls_seqid.sequence = &state->owner->so_sequence; | 512 | rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue"); |
513 | spin_lock_init(&lsp->ls_sequence.lock); | ||
514 | INIT_LIST_HEAD(&lsp->ls_sequence.list); | ||
515 | lsp->ls_seqid.sequence = &lsp->ls_sequence; | ||
513 | atomic_set(&lsp->ls_count, 1); | 516 | atomic_set(&lsp->ls_count, 1); |
514 | lsp->ls_owner = fl_owner; | 517 | lsp->ls_owner = fl_owner; |
515 | spin_lock(&clp->cl_lock); | 518 | spin_lock(&clp->cl_lock); |
@@ -641,27 +644,26 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f | |||
641 | 644 | ||
642 | struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter) | 645 | struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter) |
643 | { | 646 | { |
644 | struct rpc_sequence *sequence = counter->sequence; | ||
645 | struct nfs_seqid *new; | 647 | struct nfs_seqid *new; |
646 | 648 | ||
647 | new = kmalloc(sizeof(*new), GFP_KERNEL); | 649 | new = kmalloc(sizeof(*new), GFP_KERNEL); |
648 | if (new != NULL) { | 650 | if (new != NULL) { |
649 | new->sequence = counter; | 651 | new->sequence = counter; |
650 | spin_lock(&sequence->lock); | 652 | INIT_LIST_HEAD(&new->list); |
651 | list_add_tail(&new->list, &sequence->list); | ||
652 | spin_unlock(&sequence->lock); | ||
653 | } | 653 | } |
654 | return new; | 654 | return new; |
655 | } | 655 | } |
656 | 656 | ||
657 | void nfs_free_seqid(struct nfs_seqid *seqid) | 657 | void nfs_free_seqid(struct nfs_seqid *seqid) |
658 | { | 658 | { |
659 | struct rpc_sequence *sequence = seqid->sequence->sequence; | 659 | if (!list_empty(&seqid->list)) { |
660 | struct rpc_sequence *sequence = seqid->sequence->sequence; | ||
660 | 661 | ||
661 | spin_lock(&sequence->lock); | 662 | spin_lock(&sequence->lock); |
662 | list_del(&seqid->list); | 663 | list_del(&seqid->list); |
663 | spin_unlock(&sequence->lock); | 664 | spin_unlock(&sequence->lock); |
664 | rpc_wake_up(&sequence->wait); | 665 | rpc_wake_up(&sequence->wait); |
666 | } | ||
665 | kfree(seqid); | 667 | kfree(seqid); |
666 | } | 668 | } |
667 | 669 | ||
@@ -672,6 +674,7 @@ void nfs_free_seqid(struct nfs_seqid *seqid) | |||
672 | */ | 674 | */ |
673 | static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) | 675 | static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) |
674 | { | 676 | { |
677 | BUG_ON(list_first_entry(&seqid->sequence->sequence->list, struct nfs_seqid, list) != seqid); | ||
675 | switch (status) { | 678 | switch (status) { |
676 | case 0: | 679 | case 0: |
677 | break; | 680 | break; |
@@ -723,15 +726,15 @@ int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) | |||
723 | struct rpc_sequence *sequence = seqid->sequence->sequence; | 726 | struct rpc_sequence *sequence = seqid->sequence->sequence; |
724 | int status = 0; | 727 | int status = 0; |
725 | 728 | ||
726 | if (sequence->list.next == &seqid->list) | ||
727 | goto out; | ||
728 | spin_lock(&sequence->lock); | 729 | spin_lock(&sequence->lock); |
729 | if (sequence->list.next != &seqid->list) { | 730 | if (list_empty(&seqid->list)) |
730 | rpc_sleep_on(&sequence->wait, task, NULL, NULL); | 731 | list_add_tail(&seqid->list, &sequence->list); |
731 | status = -EAGAIN; | 732 | if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid) |
732 | } | 733 | goto unlock; |
734 | rpc_sleep_on(&sequence->wait, task, NULL, NULL); | ||
735 | status = -EAGAIN; | ||
736 | unlock: | ||
733 | spin_unlock(&sequence->lock); | 737 | spin_unlock(&sequence->lock); |
734 | out: | ||
735 | return status; | 738 | return status; |
736 | } | 739 | } |
737 | 740 | ||
@@ -755,8 +758,9 @@ static void nfs4_recover_state(struct nfs_client *clp) | |||
755 | 758 | ||
756 | __module_get(THIS_MODULE); | 759 | __module_get(THIS_MODULE); |
757 | atomic_inc(&clp->cl_count); | 760 | atomic_inc(&clp->cl_count); |
758 | task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim", | 761 | task = kthread_run(reclaimer, clp, "%s-reclaim", |
759 | NIPQUAD(clp->cl_addr.sin_addr)); | 762 | rpc_peeraddr2str(clp->cl_rpcclient, |
763 | RPC_DISPLAY_ADDR)); | ||
760 | if (!IS_ERR(task)) | 764 | if (!IS_ERR(task)) |
761 | return; | 765 | return; |
762 | nfs4_clear_recover_bit(clp); | 766 | nfs4_clear_recover_bit(clp); |
@@ -967,8 +971,8 @@ out: | |||
967 | module_put_and_exit(0); | 971 | module_put_and_exit(0); |
968 | return 0; | 972 | return 0; |
969 | out_error: | 973 | out_error: |
970 | printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n", | 974 | printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %s" |
971 | NIPQUAD(clp->cl_addr.sin_addr), -status); | 975 | " with error %d\n", clp->cl_hostname, -status); |
972 | set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); | 976 | set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); |
973 | goto out; | 977 | goto out; |
974 | } | 978 | } |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 51dd3804866f..db1ed9c46ede 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -116,10 +116,12 @@ static int nfs4_stat_to_errno(int); | |||
116 | #define decode_renew_maxsz (op_decode_hdr_maxsz) | 116 | #define decode_renew_maxsz (op_decode_hdr_maxsz) |
117 | #define encode_setclientid_maxsz \ | 117 | #define encode_setclientid_maxsz \ |
118 | (op_encode_hdr_maxsz + \ | 118 | (op_encode_hdr_maxsz + \ |
119 | 4 /*server->ip_addr*/ + \ | 119 | XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ |
120 | 1 /*Netid*/ + \ | 120 | XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \ |
121 | 6 /*uaddr*/ + \ | 121 | 1 /* sc_prog */ + \ |
122 | 6 + (NFS4_VERIFIER_SIZE >> 2)) | 122 | XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ |
123 | XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ | ||
124 | 1) /* sc_cb_ident */ | ||
123 | #define decode_setclientid_maxsz \ | 125 | #define decode_setclientid_maxsz \ |
124 | (op_decode_hdr_maxsz + \ | 126 | (op_decode_hdr_maxsz + \ |
125 | 2 + \ | 127 | 2 + \ |
@@ -2515,14 +2517,12 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin | |||
2515 | 2517 | ||
2516 | static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) | 2518 | static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) |
2517 | { | 2519 | { |
2518 | int n; | 2520 | u32 n; |
2519 | __be32 *p; | 2521 | __be32 *p; |
2520 | int status = 0; | 2522 | int status = 0; |
2521 | 2523 | ||
2522 | READ_BUF(4); | 2524 | READ_BUF(4); |
2523 | READ32(n); | 2525 | READ32(n); |
2524 | if (n < 0) | ||
2525 | goto out_eio; | ||
2526 | if (n == 0) | 2526 | if (n == 0) |
2527 | goto root_path; | 2527 | goto root_path; |
2528 | dprintk("path "); | 2528 | dprintk("path "); |
@@ -2579,13 +2579,11 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st | |||
2579 | goto out_eio; | 2579 | goto out_eio; |
2580 | res->nlocations = 0; | 2580 | res->nlocations = 0; |
2581 | while (res->nlocations < n) { | 2581 | while (res->nlocations < n) { |
2582 | int m; | 2582 | u32 m; |
2583 | struct nfs4_fs_location *loc = &res->locations[res->nlocations]; | 2583 | struct nfs4_fs_location *loc = &res->locations[res->nlocations]; |
2584 | 2584 | ||
2585 | READ_BUF(4); | 2585 | READ_BUF(4); |
2586 | READ32(m); | 2586 | READ32(m); |
2587 | if (m <= 0) | ||
2588 | goto out_eio; | ||
2589 | 2587 | ||
2590 | loc->nservers = 0; | 2588 | loc->nservers = 0; |
2591 | dprintk("%s: servers ", __FUNCTION__); | 2589 | dprintk("%s: servers ", __FUNCTION__); |
@@ -2598,8 +2596,12 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st | |||
2598 | if (loc->nservers < NFS4_FS_LOCATION_MAXSERVERS) | 2596 | if (loc->nservers < NFS4_FS_LOCATION_MAXSERVERS) |
2599 | loc->nservers++; | 2597 | loc->nservers++; |
2600 | else { | 2598 | else { |
2601 | int i; | 2599 | unsigned int i; |
2602 | dprintk("%s: using first %d of %d servers returned for location %d\n", __FUNCTION__, NFS4_FS_LOCATION_MAXSERVERS, m, res->nlocations); | 2600 | dprintk("%s: using first %u of %u servers " |
2601 | "returned for location %u\n", | ||
2602 | __FUNCTION__, | ||
2603 | NFS4_FS_LOCATION_MAXSERVERS, | ||
2604 | m, res->nlocations); | ||
2603 | for (i = loc->nservers; i < m; i++) { | 2605 | for (i = loc->nservers; i < m; i++) { |
2604 | unsigned int len; | 2606 | unsigned int len; |
2605 | char *data; | 2607 | char *data; |
@@ -3476,10 +3478,11 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n | |||
3476 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; | 3478 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; |
3477 | struct page *page = *rcvbuf->pages; | 3479 | struct page *page = *rcvbuf->pages; |
3478 | struct kvec *iov = rcvbuf->head; | 3480 | struct kvec *iov = rcvbuf->head; |
3479 | unsigned int nr, pglen = rcvbuf->page_len; | 3481 | size_t hdrlen; |
3482 | u32 recvd, pglen = rcvbuf->page_len; | ||
3480 | __be32 *end, *entry, *p, *kaddr; | 3483 | __be32 *end, *entry, *p, *kaddr; |
3481 | uint32_t len, attrlen, xlen; | 3484 | unsigned int nr; |
3482 | int hdrlen, recvd, status; | 3485 | int status; |
3483 | 3486 | ||
3484 | status = decode_op_hdr(xdr, OP_READDIR); | 3487 | status = decode_op_hdr(xdr, OP_READDIR); |
3485 | if (status) | 3488 | if (status) |
@@ -3503,6 +3506,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n | |||
3503 | end = p + ((pglen + readdir->pgbase) >> 2); | 3506 | end = p + ((pglen + readdir->pgbase) >> 2); |
3504 | entry = p; | 3507 | entry = p; |
3505 | for (nr = 0; *p++; nr++) { | 3508 | for (nr = 0; *p++; nr++) { |
3509 | u32 len, attrlen, xlen; | ||
3506 | if (end - p < 3) | 3510 | if (end - p < 3) |
3507 | goto short_pkt; | 3511 | goto short_pkt; |
3508 | dprintk("cookie = %Lu, ", *((unsigned long long *)p)); | 3512 | dprintk("cookie = %Lu, ", *((unsigned long long *)p)); |
@@ -3551,7 +3555,8 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) | |||
3551 | { | 3555 | { |
3552 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; | 3556 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; |
3553 | struct kvec *iov = rcvbuf->head; | 3557 | struct kvec *iov = rcvbuf->head; |
3554 | int hdrlen, len, recvd; | 3558 | size_t hdrlen; |
3559 | u32 len, recvd; | ||
3555 | __be32 *p; | 3560 | __be32 *p; |
3556 | char *kaddr; | 3561 | char *kaddr; |
3557 | int status; | 3562 | int status; |
@@ -3646,7 +3651,8 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, | |||
3646 | if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U))) | 3651 | if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U))) |
3647 | return -EIO; | 3652 | return -EIO; |
3648 | if (likely(bitmap[0] & FATTR4_WORD0_ACL)) { | 3653 | if (likely(bitmap[0] & FATTR4_WORD0_ACL)) { |
3649 | int hdrlen, recvd; | 3654 | size_t hdrlen; |
3655 | u32 recvd; | ||
3650 | 3656 | ||
3651 | /* We ignore &savep and don't do consistency checks on | 3657 | /* We ignore &savep and don't do consistency checks on |
3652 | * the attr length. Let userspace figure it out.... */ | 3658 | * the attr length. Let userspace figure it out.... */ |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 2dff469f04fe..7f079209d70a 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -58,7 +58,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, | |||
58 | struct page *page, | 58 | struct page *page, |
59 | unsigned int offset, unsigned int count) | 59 | unsigned int offset, unsigned int count) |
60 | { | 60 | { |
61 | struct nfs_server *server = NFS_SERVER(inode); | ||
62 | struct nfs_page *req; | 61 | struct nfs_page *req; |
63 | 62 | ||
64 | for (;;) { | 63 | for (;;) { |
@@ -111,13 +110,14 @@ void nfs_unlock_request(struct nfs_page *req) | |||
111 | * nfs_set_page_tag_locked - Tag a request as locked | 110 | * nfs_set_page_tag_locked - Tag a request as locked |
112 | * @req: | 111 | * @req: |
113 | */ | 112 | */ |
114 | static int nfs_set_page_tag_locked(struct nfs_page *req) | 113 | int nfs_set_page_tag_locked(struct nfs_page *req) |
115 | { | 114 | { |
116 | struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode); | 115 | struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode); |
117 | 116 | ||
118 | if (!nfs_lock_request(req)) | 117 | if (!nfs_lock_request_dontget(req)) |
119 | return 0; | 118 | return 0; |
120 | radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); | 119 | if (req->wb_page != NULL) |
120 | radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); | ||
121 | return 1; | 121 | return 1; |
122 | } | 122 | } |
123 | 123 | ||
@@ -132,9 +132,10 @@ void nfs_clear_page_tag_locked(struct nfs_page *req) | |||
132 | if (req->wb_page != NULL) { | 132 | if (req->wb_page != NULL) { |
133 | spin_lock(&inode->i_lock); | 133 | spin_lock(&inode->i_lock); |
134 | radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); | 134 | radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); |
135 | nfs_unlock_request(req); | ||
135 | spin_unlock(&inode->i_lock); | 136 | spin_unlock(&inode->i_lock); |
136 | } | 137 | } else |
137 | nfs_unlock_request(req); | 138 | nfs_unlock_request(req); |
138 | } | 139 | } |
139 | 140 | ||
140 | /** | 141 | /** |
@@ -413,6 +414,7 @@ int nfs_scan_list(struct nfs_inode *nfsi, | |||
413 | goto out; | 414 | goto out; |
414 | idx_start = req->wb_index + 1; | 415 | idx_start = req->wb_index + 1; |
415 | if (nfs_set_page_tag_locked(req)) { | 416 | if (nfs_set_page_tag_locked(req)) { |
417 | kref_get(&req->wb_kref); | ||
416 | nfs_list_remove_request(req); | 418 | nfs_list_remove_request(req); |
417 | radix_tree_tag_clear(&nfsi->nfs_page_tree, | 419 | radix_tree_tag_clear(&nfsi->nfs_page_tree, |
418 | req->wb_index, tag); | 420 | req->wb_index, tag); |
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 4f80d88e9fee..5ccf7faee19c 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
@@ -565,16 +565,9 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) | |||
565 | return 0; | 565 | return 0; |
566 | } | 566 | } |
567 | 567 | ||
568 | static void nfs_proc_read_setup(struct nfs_read_data *data) | 568 | static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) |
569 | { | 569 | { |
570 | struct rpc_message msg = { | 570 | msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; |
571 | .rpc_proc = &nfs_procedures[NFSPROC_READ], | ||
572 | .rpc_argp = &data->args, | ||
573 | .rpc_resp = &data->res, | ||
574 | .rpc_cred = data->cred, | ||
575 | }; | ||
576 | |||
577 | rpc_call_setup(&data->task, &msg, 0); | ||
578 | } | 571 | } |
579 | 572 | ||
580 | static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) | 573 | static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) |
@@ -584,24 +577,15 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) | |||
584 | return 0; | 577 | return 0; |
585 | } | 578 | } |
586 | 579 | ||
587 | static void nfs_proc_write_setup(struct nfs_write_data *data, int how) | 580 | static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) |
588 | { | 581 | { |
589 | struct rpc_message msg = { | ||
590 | .rpc_proc = &nfs_procedures[NFSPROC_WRITE], | ||
591 | .rpc_argp = &data->args, | ||
592 | .rpc_resp = &data->res, | ||
593 | .rpc_cred = data->cred, | ||
594 | }; | ||
595 | |||
596 | /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ | 582 | /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ |
597 | data->args.stable = NFS_FILE_SYNC; | 583 | data->args.stable = NFS_FILE_SYNC; |
598 | 584 | msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; | |
599 | /* Finalize the task. */ | ||
600 | rpc_call_setup(&data->task, &msg, 0); | ||
601 | } | 585 | } |
602 | 586 | ||
603 | static void | 587 | static void |
604 | nfs_proc_commit_setup(struct nfs_write_data *data, int how) | 588 | nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) |
605 | { | 589 | { |
606 | BUG(); | 590 | BUG(); |
607 | } | 591 | } |
@@ -609,7 +593,9 @@ nfs_proc_commit_setup(struct nfs_write_data *data, int how) | |||
609 | static int | 593 | static int |
610 | nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) | 594 | nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) |
611 | { | 595 | { |
612 | return nlmclnt_proc(filp->f_path.dentry->d_inode, cmd, fl); | 596 | struct inode *inode = filp->f_path.dentry->d_inode; |
597 | |||
598 | return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); | ||
613 | } | 599 | } |
614 | 600 | ||
615 | 601 | ||
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 3dcaa6a73261..8fd6dfbe1bc3 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -160,12 +160,26 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, | |||
160 | const struct rpc_call_ops *call_ops, | 160 | const struct rpc_call_ops *call_ops, |
161 | unsigned int count, unsigned int offset) | 161 | unsigned int count, unsigned int offset) |
162 | { | 162 | { |
163 | struct inode *inode; | 163 | struct inode *inode = req->wb_context->path.dentry->d_inode; |
164 | int flags; | 164 | int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; |
165 | struct rpc_task *task; | ||
166 | struct rpc_message msg = { | ||
167 | .rpc_argp = &data->args, | ||
168 | .rpc_resp = &data->res, | ||
169 | .rpc_cred = req->wb_context->cred, | ||
170 | }; | ||
171 | struct rpc_task_setup task_setup_data = { | ||
172 | .task = &data->task, | ||
173 | .rpc_client = NFS_CLIENT(inode), | ||
174 | .rpc_message = &msg, | ||
175 | .callback_ops = call_ops, | ||
176 | .callback_data = data, | ||
177 | .flags = RPC_TASK_ASYNC | swap_flags, | ||
178 | }; | ||
165 | 179 | ||
166 | data->req = req; | 180 | data->req = req; |
167 | data->inode = inode = req->wb_context->path.dentry->d_inode; | 181 | data->inode = inode; |
168 | data->cred = req->wb_context->cred; | 182 | data->cred = msg.rpc_cred; |
169 | 183 | ||
170 | data->args.fh = NFS_FH(inode); | 184 | data->args.fh = NFS_FH(inode); |
171 | data->args.offset = req_offset(req) + offset; | 185 | data->args.offset = req_offset(req) + offset; |
@@ -180,11 +194,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, | |||
180 | nfs_fattr_init(&data->fattr); | 194 | nfs_fattr_init(&data->fattr); |
181 | 195 | ||
182 | /* Set up the initial task struct. */ | 196 | /* Set up the initial task struct. */ |
183 | flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); | 197 | NFS_PROTO(inode)->read_setup(data, &msg); |
184 | rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data); | ||
185 | NFS_PROTO(inode)->read_setup(data); | ||
186 | |||
187 | data->task.tk_cookie = (unsigned long)inode; | ||
188 | 198 | ||
189 | dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", | 199 | dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", |
190 | data->task.tk_pid, | 200 | data->task.tk_pid, |
@@ -192,6 +202,10 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, | |||
192 | (long long)NFS_FILEID(inode), | 202 | (long long)NFS_FILEID(inode), |
193 | count, | 203 | count, |
194 | (unsigned long long)data->args.offset); | 204 | (unsigned long long)data->args.offset); |
205 | |||
206 | task = rpc_run_task(&task_setup_data); | ||
207 | if (!IS_ERR(task)) | ||
208 | rpc_put_task(task); | ||
195 | } | 209 | } |
196 | 210 | ||
197 | static void | 211 | static void |
@@ -208,14 +222,6 @@ nfs_async_read_error(struct list_head *head) | |||
208 | } | 222 | } |
209 | 223 | ||
210 | /* | 224 | /* |
211 | * Start an async read operation | ||
212 | */ | ||
213 | static void nfs_execute_read(struct nfs_read_data *data) | ||
214 | { | ||
215 | rpc_execute(&data->task); | ||
216 | } | ||
217 | |||
218 | /* | ||
219 | * Generate multiple requests to fill a single page. | 225 | * Generate multiple requests to fill a single page. |
220 | * | 226 | * |
221 | * We optimize to reduce the number of read operations on the wire. If we | 227 | * We optimize to reduce the number of read operations on the wire. If we |
@@ -269,7 +275,6 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne | |||
269 | rsize, offset); | 275 | rsize, offset); |
270 | offset += rsize; | 276 | offset += rsize; |
271 | nbytes -= rsize; | 277 | nbytes -= rsize; |
272 | nfs_execute_read(data); | ||
273 | } while (nbytes != 0); | 278 | } while (nbytes != 0); |
274 | 279 | ||
275 | return 0; | 280 | return 0; |
@@ -307,8 +312,6 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned | |||
307 | req = nfs_list_entry(data->pages.next); | 312 | req = nfs_list_entry(data->pages.next); |
308 | 313 | ||
309 | nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); | 314 | nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); |
310 | |||
311 | nfs_execute_read(data); | ||
312 | return 0; | 315 | return 0; |
313 | out_bad: | 316 | out_bad: |
314 | nfs_async_read_error(head); | 317 | nfs_async_read_error(head); |
@@ -333,7 +336,7 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) | |||
333 | nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); | 336 | nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); |
334 | 337 | ||
335 | if (task->tk_status == -ESTALE) { | 338 | if (task->tk_status == -ESTALE) { |
336 | set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode)); | 339 | set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags); |
337 | nfs_mark_for_revalidate(data->inode); | 340 | nfs_mark_for_revalidate(data->inode); |
338 | } | 341 | } |
339 | return 0; | 342 | return 0; |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5b6339f70a4c..7f4505f6ac6f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -45,6 +45,8 @@ | |||
45 | #include <linux/nfs_idmap.h> | 45 | #include <linux/nfs_idmap.h> |
46 | #include <linux/vfs.h> | 46 | #include <linux/vfs.h> |
47 | #include <linux/inet.h> | 47 | #include <linux/inet.h> |
48 | #include <linux/in6.h> | ||
49 | #include <net/ipv6.h> | ||
48 | #include <linux/nfs_xdr.h> | 50 | #include <linux/nfs_xdr.h> |
49 | #include <linux/magic.h> | 51 | #include <linux/magic.h> |
50 | #include <linux/parser.h> | 52 | #include <linux/parser.h> |
@@ -83,11 +85,11 @@ enum { | |||
83 | Opt_actimeo, | 85 | Opt_actimeo, |
84 | Opt_namelen, | 86 | Opt_namelen, |
85 | Opt_mountport, | 87 | Opt_mountport, |
86 | Opt_mountprog, Opt_mountvers, | 88 | Opt_mountvers, |
87 | Opt_nfsprog, Opt_nfsvers, | 89 | Opt_nfsvers, |
88 | 90 | ||
89 | /* Mount options that take string arguments */ | 91 | /* Mount options that take string arguments */ |
90 | Opt_sec, Opt_proto, Opt_mountproto, | 92 | Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, |
91 | Opt_addr, Opt_mountaddr, Opt_clientaddr, | 93 | Opt_addr, Opt_mountaddr, Opt_clientaddr, |
92 | 94 | ||
93 | /* Mount options that are ignored */ | 95 | /* Mount options that are ignored */ |
@@ -137,9 +139,7 @@ static match_table_t nfs_mount_option_tokens = { | |||
137 | { Opt_userspace, "retry=%u" }, | 139 | { Opt_userspace, "retry=%u" }, |
138 | { Opt_namelen, "namlen=%u" }, | 140 | { Opt_namelen, "namlen=%u" }, |
139 | { Opt_mountport, "mountport=%u" }, | 141 | { Opt_mountport, "mountport=%u" }, |
140 | { Opt_mountprog, "mountprog=%u" }, | ||
141 | { Opt_mountvers, "mountvers=%u" }, | 142 | { Opt_mountvers, "mountvers=%u" }, |
142 | { Opt_nfsprog, "nfsprog=%u" }, | ||
143 | { Opt_nfsvers, "nfsvers=%u" }, | 143 | { Opt_nfsvers, "nfsvers=%u" }, |
144 | { Opt_nfsvers, "vers=%u" }, | 144 | { Opt_nfsvers, "vers=%u" }, |
145 | 145 | ||
@@ -148,7 +148,7 @@ static match_table_t nfs_mount_option_tokens = { | |||
148 | { Opt_mountproto, "mountproto=%s" }, | 148 | { Opt_mountproto, "mountproto=%s" }, |
149 | { Opt_addr, "addr=%s" }, | 149 | { Opt_addr, "addr=%s" }, |
150 | { Opt_clientaddr, "clientaddr=%s" }, | 150 | { Opt_clientaddr, "clientaddr=%s" }, |
151 | { Opt_userspace, "mounthost=%s" }, | 151 | { Opt_mounthost, "mounthost=%s" }, |
152 | { Opt_mountaddr, "mountaddr=%s" }, | 152 | { Opt_mountaddr, "mountaddr=%s" }, |
153 | 153 | ||
154 | { Opt_err, NULL } | 154 | { Opt_err, NULL } |
@@ -202,6 +202,7 @@ static int nfs_get_sb(struct file_system_type *, int, const char *, void *, stru | |||
202 | static int nfs_xdev_get_sb(struct file_system_type *fs_type, | 202 | static int nfs_xdev_get_sb(struct file_system_type *fs_type, |
203 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); | 203 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); |
204 | static void nfs_kill_super(struct super_block *); | 204 | static void nfs_kill_super(struct super_block *); |
205 | static void nfs_put_super(struct super_block *); | ||
205 | 206 | ||
206 | static struct file_system_type nfs_fs_type = { | 207 | static struct file_system_type nfs_fs_type = { |
207 | .owner = THIS_MODULE, | 208 | .owner = THIS_MODULE, |
@@ -223,6 +224,7 @@ static const struct super_operations nfs_sops = { | |||
223 | .alloc_inode = nfs_alloc_inode, | 224 | .alloc_inode = nfs_alloc_inode, |
224 | .destroy_inode = nfs_destroy_inode, | 225 | .destroy_inode = nfs_destroy_inode, |
225 | .write_inode = nfs_write_inode, | 226 | .write_inode = nfs_write_inode, |
227 | .put_super = nfs_put_super, | ||
226 | .statfs = nfs_statfs, | 228 | .statfs = nfs_statfs, |
227 | .clear_inode = nfs_clear_inode, | 229 | .clear_inode = nfs_clear_inode, |
228 | .umount_begin = nfs_umount_begin, | 230 | .umount_begin = nfs_umount_begin, |
@@ -325,6 +327,28 @@ void __exit unregister_nfs_fs(void) | |||
325 | unregister_filesystem(&nfs_fs_type); | 327 | unregister_filesystem(&nfs_fs_type); |
326 | } | 328 | } |
327 | 329 | ||
330 | void nfs_sb_active(struct nfs_server *server) | ||
331 | { | ||
332 | atomic_inc(&server->active); | ||
333 | } | ||
334 | |||
335 | void nfs_sb_deactive(struct nfs_server *server) | ||
336 | { | ||
337 | if (atomic_dec_and_test(&server->active)) | ||
338 | wake_up(&server->active_wq); | ||
339 | } | ||
340 | |||
341 | static void nfs_put_super(struct super_block *sb) | ||
342 | { | ||
343 | struct nfs_server *server = NFS_SB(sb); | ||
344 | /* | ||
345 | * Make sure there are no outstanding ops to this server. | ||
346 | * If so, wait for them to finish before allowing the | ||
347 | * unmount to continue. | ||
348 | */ | ||
349 | wait_event(server->active_wq, atomic_read(&server->active) == 0); | ||
350 | } | ||
351 | |||
328 | /* | 352 | /* |
329 | * Deliver file system statistics to userspace | 353 | * Deliver file system statistics to userspace |
330 | */ | 354 | */ |
@@ -454,8 +478,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, | |||
454 | } | 478 | } |
455 | seq_printf(m, ",proto=%s", | 479 | seq_printf(m, ",proto=%s", |
456 | rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO)); | 480 | rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO)); |
457 | seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ); | 481 | seq_printf(m, ",timeo=%lu", 10U * nfss->client->cl_timeout->to_initval / HZ); |
458 | seq_printf(m, ",retrans=%u", clp->retrans_count); | 482 | seq_printf(m, ",retrans=%u", nfss->client->cl_timeout->to_retries); |
459 | seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); | 483 | seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); |
460 | } | 484 | } |
461 | 485 | ||
@@ -468,8 +492,9 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
468 | 492 | ||
469 | nfs_show_mount_options(m, nfss, 0); | 493 | nfs_show_mount_options(m, nfss, 0); |
470 | 494 | ||
471 | seq_printf(m, ",addr="NIPQUAD_FMT, | 495 | seq_printf(m, ",addr=%s", |
472 | NIPQUAD(nfss->nfs_client->cl_addr.sin_addr)); | 496 | rpc_peeraddr2str(nfss->nfs_client->cl_rpcclient, |
497 | RPC_DISPLAY_ADDR)); | ||
473 | 498 | ||
474 | return 0; | 499 | return 0; |
475 | } | 500 | } |
@@ -506,7 +531,7 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) | |||
506 | seq_printf(m, ",namelen=%d", nfss->namelen); | 531 | seq_printf(m, ",namelen=%d", nfss->namelen); |
507 | 532 | ||
508 | #ifdef CONFIG_NFS_V4 | 533 | #ifdef CONFIG_NFS_V4 |
509 | if (nfss->nfs_client->cl_nfsversion == 4) { | 534 | if (nfss->nfs_client->rpc_ops->version == 4) { |
510 | seq_printf(m, "\n\tnfsv4:\t"); | 535 | seq_printf(m, "\n\tnfsv4:\t"); |
511 | seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); | 536 | seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); |
512 | seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); | 537 | seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); |
@@ -574,16 +599,40 @@ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) | |||
574 | } | 599 | } |
575 | 600 | ||
576 | /* | 601 | /* |
577 | * Sanity-check a server address provided by the mount command | 602 | * Set the port number in an address. Be agnostic about the address family. |
603 | */ | ||
604 | static void nfs_set_port(struct sockaddr *sap, unsigned short port) | ||
605 | { | ||
606 | switch (sap->sa_family) { | ||
607 | case AF_INET: { | ||
608 | struct sockaddr_in *ap = (struct sockaddr_in *)sap; | ||
609 | ap->sin_port = htons(port); | ||
610 | break; | ||
611 | } | ||
612 | case AF_INET6: { | ||
613 | struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; | ||
614 | ap->sin6_port = htons(port); | ||
615 | break; | ||
616 | } | ||
617 | } | ||
618 | } | ||
619 | |||
620 | /* | ||
621 | * Sanity-check a server address provided by the mount command. | ||
622 | * | ||
623 | * Address family must be initialized, and address must not be | ||
624 | * the ANY address for that family. | ||
578 | */ | 625 | */ |
579 | static int nfs_verify_server_address(struct sockaddr *addr) | 626 | static int nfs_verify_server_address(struct sockaddr *addr) |
580 | { | 627 | { |
581 | switch (addr->sa_family) { | 628 | switch (addr->sa_family) { |
582 | case AF_INET: { | 629 | case AF_INET: { |
583 | struct sockaddr_in *sa = (struct sockaddr_in *) addr; | 630 | struct sockaddr_in *sa = (struct sockaddr_in *)addr; |
584 | if (sa->sin_addr.s_addr != INADDR_ANY) | 631 | return sa->sin_addr.s_addr != INADDR_ANY; |
585 | return 1; | 632 | } |
586 | break; | 633 | case AF_INET6: { |
634 | struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr; | ||
635 | return !ipv6_addr_any(sa); | ||
587 | } | 636 | } |
588 | } | 637 | } |
589 | 638 | ||
@@ -591,6 +640,40 @@ static int nfs_verify_server_address(struct sockaddr *addr) | |||
591 | } | 640 | } |
592 | 641 | ||
593 | /* | 642 | /* |
643 | * Parse string addresses passed in via a mount option, | ||
644 | * and construct a sockaddr based on the result. | ||
645 | * | ||
646 | * If address parsing fails, set the sockaddr's address | ||
647 | * family to AF_UNSPEC to force nfs_verify_server_address() | ||
648 | * to punt the mount. | ||
649 | */ | ||
650 | static void nfs_parse_server_address(char *value, | ||
651 | struct sockaddr *sap, | ||
652 | size_t *len) | ||
653 | { | ||
654 | if (strchr(value, ':')) { | ||
655 | struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; | ||
656 | u8 *addr = (u8 *)&ap->sin6_addr.in6_u; | ||
657 | |||
658 | ap->sin6_family = AF_INET6; | ||
659 | *len = sizeof(*ap); | ||
660 | if (in6_pton(value, -1, addr, '\0', NULL)) | ||
661 | return; | ||
662 | } else { | ||
663 | struct sockaddr_in *ap = (struct sockaddr_in *)sap; | ||
664 | u8 *addr = (u8 *)&ap->sin_addr.s_addr; | ||
665 | |||
666 | ap->sin_family = AF_INET; | ||
667 | *len = sizeof(*ap); | ||
668 | if (in4_pton(value, -1, addr, '\0', NULL)) | ||
669 | return; | ||
670 | } | ||
671 | |||
672 | sap->sa_family = AF_UNSPEC; | ||
673 | *len = 0; | ||
674 | } | ||
675 | |||
676 | /* | ||
594 | * Error-check and convert a string of mount options from user space into | 677 | * Error-check and convert a string of mount options from user space into |
595 | * a data structure | 678 | * a data structure |
596 | */ | 679 | */ |
@@ -598,6 +681,7 @@ static int nfs_parse_mount_options(char *raw, | |||
598 | struct nfs_parsed_mount_data *mnt) | 681 | struct nfs_parsed_mount_data *mnt) |
599 | { | 682 | { |
600 | char *p, *string; | 683 | char *p, *string; |
684 | unsigned short port = 0; | ||
601 | 685 | ||
602 | if (!raw) { | 686 | if (!raw) { |
603 | dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); | 687 | dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); |
@@ -697,7 +781,7 @@ static int nfs_parse_mount_options(char *raw, | |||
697 | return 0; | 781 | return 0; |
698 | if (option < 0 || option > 65535) | 782 | if (option < 0 || option > 65535) |
699 | return 0; | 783 | return 0; |
700 | mnt->nfs_server.address.sin_port = htons(option); | 784 | port = option; |
701 | break; | 785 | break; |
702 | case Opt_rsize: | 786 | case Opt_rsize: |
703 | if (match_int(args, &mnt->rsize)) | 787 | if (match_int(args, &mnt->rsize)) |
@@ -759,13 +843,6 @@ static int nfs_parse_mount_options(char *raw, | |||
759 | return 0; | 843 | return 0; |
760 | mnt->mount_server.port = option; | 844 | mnt->mount_server.port = option; |
761 | break; | 845 | break; |
762 | case Opt_mountprog: | ||
763 | if (match_int(args, &option)) | ||
764 | return 0; | ||
765 | if (option < 0) | ||
766 | return 0; | ||
767 | mnt->mount_server.program = option; | ||
768 | break; | ||
769 | case Opt_mountvers: | 846 | case Opt_mountvers: |
770 | if (match_int(args, &option)) | 847 | if (match_int(args, &option)) |
771 | return 0; | 848 | return 0; |
@@ -773,13 +850,6 @@ static int nfs_parse_mount_options(char *raw, | |||
773 | return 0; | 850 | return 0; |
774 | mnt->mount_server.version = option; | 851 | mnt->mount_server.version = option; |
775 | break; | 852 | break; |
776 | case Opt_nfsprog: | ||
777 | if (match_int(args, &option)) | ||
778 | return 0; | ||
779 | if (option < 0) | ||
780 | return 0; | ||
781 | mnt->nfs_server.program = option; | ||
782 | break; | ||
783 | case Opt_nfsvers: | 853 | case Opt_nfsvers: |
784 | if (match_int(args, &option)) | 854 | if (match_int(args, &option)) |
785 | return 0; | 855 | return 0; |
@@ -923,24 +993,32 @@ static int nfs_parse_mount_options(char *raw, | |||
923 | string = match_strdup(args); | 993 | string = match_strdup(args); |
924 | if (string == NULL) | 994 | if (string == NULL) |
925 | goto out_nomem; | 995 | goto out_nomem; |
926 | mnt->nfs_server.address.sin_family = AF_INET; | 996 | nfs_parse_server_address(string, (struct sockaddr *) |
927 | mnt->nfs_server.address.sin_addr.s_addr = | 997 | &mnt->nfs_server.address, |
928 | in_aton(string); | 998 | &mnt->nfs_server.addrlen); |
929 | kfree(string); | 999 | kfree(string); |
930 | break; | 1000 | break; |
931 | case Opt_clientaddr: | 1001 | case Opt_clientaddr: |
932 | string = match_strdup(args); | 1002 | string = match_strdup(args); |
933 | if (string == NULL) | 1003 | if (string == NULL) |
934 | goto out_nomem; | 1004 | goto out_nomem; |
1005 | kfree(mnt->client_address); | ||
935 | mnt->client_address = string; | 1006 | mnt->client_address = string; |
936 | break; | 1007 | break; |
1008 | case Opt_mounthost: | ||
1009 | string = match_strdup(args); | ||
1010 | if (string == NULL) | ||
1011 | goto out_nomem; | ||
1012 | kfree(mnt->mount_server.hostname); | ||
1013 | mnt->mount_server.hostname = string; | ||
1014 | break; | ||
937 | case Opt_mountaddr: | 1015 | case Opt_mountaddr: |
938 | string = match_strdup(args); | 1016 | string = match_strdup(args); |
939 | if (string == NULL) | 1017 | if (string == NULL) |
940 | goto out_nomem; | 1018 | goto out_nomem; |
941 | mnt->mount_server.address.sin_family = AF_INET; | 1019 | nfs_parse_server_address(string, (struct sockaddr *) |
942 | mnt->mount_server.address.sin_addr.s_addr = | 1020 | &mnt->mount_server.address, |
943 | in_aton(string); | 1021 | &mnt->mount_server.addrlen); |
944 | kfree(string); | 1022 | kfree(string); |
945 | break; | 1023 | break; |
946 | 1024 | ||
@@ -953,6 +1031,8 @@ static int nfs_parse_mount_options(char *raw, | |||
953 | } | 1031 | } |
954 | } | 1032 | } |
955 | 1033 | ||
1034 | nfs_set_port((struct sockaddr *)&mnt->nfs_server.address, port); | ||
1035 | |||
956 | return 1; | 1036 | return 1; |
957 | 1037 | ||
958 | out_nomem: | 1038 | out_nomem: |
@@ -983,7 +1063,8 @@ out_unknown: | |||
983 | static int nfs_try_mount(struct nfs_parsed_mount_data *args, | 1063 | static int nfs_try_mount(struct nfs_parsed_mount_data *args, |
984 | struct nfs_fh *root_fh) | 1064 | struct nfs_fh *root_fh) |
985 | { | 1065 | { |
986 | struct sockaddr_in sin; | 1066 | struct sockaddr *sap = (struct sockaddr *)&args->mount_server.address; |
1067 | char *hostname; | ||
987 | int status; | 1068 | int status; |
988 | 1069 | ||
989 | if (args->mount_server.version == 0) { | 1070 | if (args->mount_server.version == 0) { |
@@ -993,25 +1074,32 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, | |||
993 | args->mount_server.version = NFS_MNT_VERSION; | 1074 | args->mount_server.version = NFS_MNT_VERSION; |
994 | } | 1075 | } |
995 | 1076 | ||
1077 | if (args->mount_server.hostname) | ||
1078 | hostname = args->mount_server.hostname; | ||
1079 | else | ||
1080 | hostname = args->nfs_server.hostname; | ||
1081 | |||
996 | /* | 1082 | /* |
997 | * Construct the mount server's address. | 1083 | * Construct the mount server's address. |
998 | */ | 1084 | */ |
999 | if (args->mount_server.address.sin_addr.s_addr != INADDR_ANY) | 1085 | if (args->mount_server.address.ss_family == AF_UNSPEC) { |
1000 | sin = args->mount_server.address; | 1086 | memcpy(sap, &args->nfs_server.address, |
1001 | else | 1087 | args->nfs_server.addrlen); |
1002 | sin = args->nfs_server.address; | 1088 | args->mount_server.addrlen = args->nfs_server.addrlen; |
1089 | } | ||
1090 | |||
1003 | /* | 1091 | /* |
1004 | * autobind will be used if mount_server.port == 0 | 1092 | * autobind will be used if mount_server.port == 0 |
1005 | */ | 1093 | */ |
1006 | sin.sin_port = htons(args->mount_server.port); | 1094 | nfs_set_port(sap, args->mount_server.port); |
1007 | 1095 | ||
1008 | /* | 1096 | /* |
1009 | * Now ask the mount server to map our export path | 1097 | * Now ask the mount server to map our export path |
1010 | * to a file handle. | 1098 | * to a file handle. |
1011 | */ | 1099 | */ |
1012 | status = nfs_mount((struct sockaddr *) &sin, | 1100 | status = nfs_mount(sap, |
1013 | sizeof(sin), | 1101 | args->mount_server.addrlen, |
1014 | args->nfs_server.hostname, | 1102 | hostname, |
1015 | args->nfs_server.export_path, | 1103 | args->nfs_server.export_path, |
1016 | args->mount_server.version, | 1104 | args->mount_server.version, |
1017 | args->mount_server.protocol, | 1105 | args->mount_server.protocol, |
@@ -1019,8 +1107,8 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, | |||
1019 | if (status == 0) | 1107 | if (status == 0) |
1020 | return 0; | 1108 | return 0; |
1021 | 1109 | ||
1022 | dfprintk(MOUNT, "NFS: unable to mount server " NIPQUAD_FMT | 1110 | dfprintk(MOUNT, "NFS: unable to mount server %s, error %d", |
1023 | ", error %d\n", NIPQUAD(sin.sin_addr.s_addr), status); | 1111 | hostname, status); |
1024 | return status; | 1112 | return status; |
1025 | } | 1113 | } |
1026 | 1114 | ||
@@ -1039,9 +1127,6 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, | |||
1039 | * | 1127 | * |
1040 | * + breaking back: trying proto=udp after proto=tcp, v2 after v3, | 1128 | * + breaking back: trying proto=udp after proto=tcp, v2 after v3, |
1041 | * mountproto=tcp after mountproto=udp, and so on | 1129 | * mountproto=tcp after mountproto=udp, and so on |
1042 | * | ||
1043 | * XXX: as far as I can tell, changing the NFS program number is not | ||
1044 | * supported in the NFS client. | ||
1045 | */ | 1130 | */ |
1046 | static int nfs_validate_mount_data(void *options, | 1131 | static int nfs_validate_mount_data(void *options, |
1047 | struct nfs_parsed_mount_data *args, | 1132 | struct nfs_parsed_mount_data *args, |
@@ -1065,9 +1150,7 @@ static int nfs_validate_mount_data(void *options, | |||
1065 | args->acdirmin = 30; | 1150 | args->acdirmin = 30; |
1066 | args->acdirmax = 60; | 1151 | args->acdirmax = 60; |
1067 | args->mount_server.protocol = XPRT_TRANSPORT_UDP; | 1152 | args->mount_server.protocol = XPRT_TRANSPORT_UDP; |
1068 | args->mount_server.program = NFS_MNT_PROGRAM; | ||
1069 | args->nfs_server.protocol = XPRT_TRANSPORT_TCP; | 1153 | args->nfs_server.protocol = XPRT_TRANSPORT_TCP; |
1070 | args->nfs_server.program = NFS_PROGRAM; | ||
1071 | 1154 | ||
1072 | switch (data->version) { | 1155 | switch (data->version) { |
1073 | case 1: | 1156 | case 1: |
@@ -1098,9 +1181,6 @@ static int nfs_validate_mount_data(void *options, | |||
1098 | memset(mntfh->data + mntfh->size, 0, | 1181 | memset(mntfh->data + mntfh->size, 0, |
1099 | sizeof(mntfh->data) - mntfh->size); | 1182 | sizeof(mntfh->data) - mntfh->size); |
1100 | 1183 | ||
1101 | if (!nfs_verify_server_address((struct sockaddr *) &data->addr)) | ||
1102 | goto out_no_address; | ||
1103 | |||
1104 | /* | 1184 | /* |
1105 | * Translate to nfs_parsed_mount_data, which nfs_fill_super | 1185 | * Translate to nfs_parsed_mount_data, which nfs_fill_super |
1106 | * can deal with. | 1186 | * can deal with. |
@@ -1115,7 +1195,14 @@ static int nfs_validate_mount_data(void *options, | |||
1115 | args->acregmax = data->acregmax; | 1195 | args->acregmax = data->acregmax; |
1116 | args->acdirmin = data->acdirmin; | 1196 | args->acdirmin = data->acdirmin; |
1117 | args->acdirmax = data->acdirmax; | 1197 | args->acdirmax = data->acdirmax; |
1118 | args->nfs_server.address = data->addr; | 1198 | |
1199 | memcpy(&args->nfs_server.address, &data->addr, | ||
1200 | sizeof(data->addr)); | ||
1201 | args->nfs_server.addrlen = sizeof(data->addr); | ||
1202 | if (!nfs_verify_server_address((struct sockaddr *) | ||
1203 | &args->nfs_server.address)) | ||
1204 | goto out_no_address; | ||
1205 | |||
1119 | if (!(data->flags & NFS_MOUNT_TCP)) | 1206 | if (!(data->flags & NFS_MOUNT_TCP)) |
1120 | args->nfs_server.protocol = XPRT_TRANSPORT_UDP; | 1207 | args->nfs_server.protocol = XPRT_TRANSPORT_UDP; |
1121 | /* N.B. caller will free nfs_server.hostname in all cases */ | 1208 | /* N.B. caller will free nfs_server.hostname in all cases */ |
@@ -1318,15 +1405,50 @@ static int nfs_set_super(struct super_block *s, void *data) | |||
1318 | return ret; | 1405 | return ret; |
1319 | } | 1406 | } |
1320 | 1407 | ||
1408 | static int nfs_compare_super_address(struct nfs_server *server1, | ||
1409 | struct nfs_server *server2) | ||
1410 | { | ||
1411 | struct sockaddr *sap1, *sap2; | ||
1412 | |||
1413 | sap1 = (struct sockaddr *)&server1->nfs_client->cl_addr; | ||
1414 | sap2 = (struct sockaddr *)&server2->nfs_client->cl_addr; | ||
1415 | |||
1416 | if (sap1->sa_family != sap2->sa_family) | ||
1417 | return 0; | ||
1418 | |||
1419 | switch (sap1->sa_family) { | ||
1420 | case AF_INET: { | ||
1421 | struct sockaddr_in *sin1 = (struct sockaddr_in *)sap1; | ||
1422 | struct sockaddr_in *sin2 = (struct sockaddr_in *)sap2; | ||
1423 | if (sin1->sin_addr.s_addr != sin2->sin_addr.s_addr) | ||
1424 | return 0; | ||
1425 | if (sin1->sin_port != sin2->sin_port) | ||
1426 | return 0; | ||
1427 | break; | ||
1428 | } | ||
1429 | case AF_INET6: { | ||
1430 | struct sockaddr_in6 *sin1 = (struct sockaddr_in6 *)sap1; | ||
1431 | struct sockaddr_in6 *sin2 = (struct sockaddr_in6 *)sap2; | ||
1432 | if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr)) | ||
1433 | return 0; | ||
1434 | if (sin1->sin6_port != sin2->sin6_port) | ||
1435 | return 0; | ||
1436 | break; | ||
1437 | } | ||
1438 | default: | ||
1439 | return 0; | ||
1440 | } | ||
1441 | |||
1442 | return 1; | ||
1443 | } | ||
1444 | |||
1321 | static int nfs_compare_super(struct super_block *sb, void *data) | 1445 | static int nfs_compare_super(struct super_block *sb, void *data) |
1322 | { | 1446 | { |
1323 | struct nfs_sb_mountdata *sb_mntdata = data; | 1447 | struct nfs_sb_mountdata *sb_mntdata = data; |
1324 | struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb); | 1448 | struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb); |
1325 | int mntflags = sb_mntdata->mntflags; | 1449 | int mntflags = sb_mntdata->mntflags; |
1326 | 1450 | ||
1327 | if (memcmp(&old->nfs_client->cl_addr, | 1451 | if (!nfs_compare_super_address(old, server)) |
1328 | &server->nfs_client->cl_addr, | ||
1329 | sizeof(old->nfs_client->cl_addr)) != 0) | ||
1330 | return 0; | 1452 | return 0; |
1331 | /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */ | 1453 | /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */ |
1332 | if (old->flags & NFS_MOUNT_UNSHARED) | 1454 | if (old->flags & NFS_MOUNT_UNSHARED) |
@@ -1396,6 +1518,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, | |||
1396 | 1518 | ||
1397 | out: | 1519 | out: |
1398 | kfree(data.nfs_server.hostname); | 1520 | kfree(data.nfs_server.hostname); |
1521 | kfree(data.mount_server.hostname); | ||
1399 | return error; | 1522 | return error; |
1400 | 1523 | ||
1401 | out_err_nosb: | 1524 | out_err_nosb: |
@@ -1471,7 +1594,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, | |||
1471 | error = PTR_ERR(mntroot); | 1594 | error = PTR_ERR(mntroot); |
1472 | goto error_splat_super; | 1595 | goto error_splat_super; |
1473 | } | 1596 | } |
1474 | if (mntroot->d_inode->i_op != &nfs_dir_inode_operations) { | 1597 | if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) { |
1475 | dput(mntroot); | 1598 | dput(mntroot); |
1476 | error = -ESTALE; | 1599 | error = -ESTALE; |
1477 | goto error_splat_super; | 1600 | goto error_splat_super; |
@@ -1524,12 +1647,35 @@ static void nfs4_fill_super(struct super_block *sb) | |||
1524 | } | 1647 | } |
1525 | 1648 | ||
1526 | /* | 1649 | /* |
1650 | * If the user didn't specify a port, set the port number to | ||
1651 | * the NFS version 4 default port. | ||
1652 | */ | ||
1653 | static void nfs4_default_port(struct sockaddr *sap) | ||
1654 | { | ||
1655 | switch (sap->sa_family) { | ||
1656 | case AF_INET: { | ||
1657 | struct sockaddr_in *ap = (struct sockaddr_in *)sap; | ||
1658 | if (ap->sin_port == 0) | ||
1659 | ap->sin_port = htons(NFS_PORT); | ||
1660 | break; | ||
1661 | } | ||
1662 | case AF_INET6: { | ||
1663 | struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; | ||
1664 | if (ap->sin6_port == 0) | ||
1665 | ap->sin6_port = htons(NFS_PORT); | ||
1666 | break; | ||
1667 | } | ||
1668 | } | ||
1669 | } | ||
1670 | |||
1671 | /* | ||
1527 | * Validate NFSv4 mount options | 1672 | * Validate NFSv4 mount options |
1528 | */ | 1673 | */ |
1529 | static int nfs4_validate_mount_data(void *options, | 1674 | static int nfs4_validate_mount_data(void *options, |
1530 | struct nfs_parsed_mount_data *args, | 1675 | struct nfs_parsed_mount_data *args, |
1531 | const char *dev_name) | 1676 | const char *dev_name) |
1532 | { | 1677 | { |
1678 | struct sockaddr_in *ap; | ||
1533 | struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; | 1679 | struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; |
1534 | char *c; | 1680 | char *c; |
1535 | 1681 | ||
@@ -1550,18 +1696,21 @@ static int nfs4_validate_mount_data(void *options, | |||
1550 | 1696 | ||
1551 | switch (data->version) { | 1697 | switch (data->version) { |
1552 | case 1: | 1698 | case 1: |
1553 | if (data->host_addrlen != sizeof(args->nfs_server.address)) | 1699 | ap = (struct sockaddr_in *)&args->nfs_server.address; |
1700 | if (data->host_addrlen > sizeof(args->nfs_server.address)) | ||
1701 | goto out_no_address; | ||
1702 | if (data->host_addrlen == 0) | ||
1554 | goto out_no_address; | 1703 | goto out_no_address; |
1555 | if (copy_from_user(&args->nfs_server.address, | 1704 | args->nfs_server.addrlen = data->host_addrlen; |
1556 | data->host_addr, | 1705 | if (copy_from_user(ap, data->host_addr, data->host_addrlen)) |
1557 | sizeof(args->nfs_server.address))) | ||
1558 | return -EFAULT; | 1706 | return -EFAULT; |
1559 | if (args->nfs_server.address.sin_port == 0) | ||
1560 | args->nfs_server.address.sin_port = htons(NFS_PORT); | ||
1561 | if (!nfs_verify_server_address((struct sockaddr *) | 1707 | if (!nfs_verify_server_address((struct sockaddr *) |
1562 | &args->nfs_server.address)) | 1708 | &args->nfs_server.address)) |
1563 | goto out_no_address; | 1709 | goto out_no_address; |
1564 | 1710 | ||
1711 | nfs4_default_port((struct sockaddr *) | ||
1712 | &args->nfs_server.address); | ||
1713 | |||
1565 | switch (data->auth_flavourlen) { | 1714 | switch (data->auth_flavourlen) { |
1566 | case 0: | 1715 | case 0: |
1567 | args->auth_flavors[0] = RPC_AUTH_UNIX; | 1716 | args->auth_flavors[0] = RPC_AUTH_UNIX; |
@@ -1619,6 +1768,9 @@ static int nfs4_validate_mount_data(void *options, | |||
1619 | &args->nfs_server.address)) | 1768 | &args->nfs_server.address)) |
1620 | return -EINVAL; | 1769 | return -EINVAL; |
1621 | 1770 | ||
1771 | nfs4_default_port((struct sockaddr *) | ||
1772 | &args->nfs_server.address); | ||
1773 | |||
1622 | switch (args->auth_flavor_len) { | 1774 | switch (args->auth_flavor_len) { |
1623 | case 0: | 1775 | case 0: |
1624 | args->auth_flavors[0] = RPC_AUTH_UNIX; | 1776 | args->auth_flavors[0] = RPC_AUTH_UNIX; |
@@ -1639,21 +1791,16 @@ static int nfs4_validate_mount_data(void *options, | |||
1639 | len = c - dev_name; | 1791 | len = c - dev_name; |
1640 | if (len > NFS4_MAXNAMLEN) | 1792 | if (len > NFS4_MAXNAMLEN) |
1641 | return -ENAMETOOLONG; | 1793 | return -ENAMETOOLONG; |
1642 | args->nfs_server.hostname = kzalloc(len, GFP_KERNEL); | 1794 | /* N.B. caller will free nfs_server.hostname in all cases */ |
1643 | if (args->nfs_server.hostname == NULL) | 1795 | args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL); |
1644 | return -ENOMEM; | ||
1645 | strncpy(args->nfs_server.hostname, dev_name, len - 1); | ||
1646 | 1796 | ||
1647 | c++; /* step over the ':' */ | 1797 | c++; /* step over the ':' */ |
1648 | len = strlen(c); | 1798 | len = strlen(c); |
1649 | if (len > NFS4_MAXPATHLEN) | 1799 | if (len > NFS4_MAXPATHLEN) |
1650 | return -ENAMETOOLONG; | 1800 | return -ENAMETOOLONG; |
1651 | args->nfs_server.export_path = kzalloc(len + 1, GFP_KERNEL); | 1801 | args->nfs_server.export_path = kstrndup(c, len, GFP_KERNEL); |
1652 | if (args->nfs_server.export_path == NULL) | ||
1653 | return -ENOMEM; | ||
1654 | strncpy(args->nfs_server.export_path, c, len); | ||
1655 | 1802 | ||
1656 | dprintk("MNTPATH: %s\n", args->nfs_server.export_path); | 1803 | dprintk("NFS: MNTPATH: '%s'\n", args->nfs_server.export_path); |
1657 | 1804 | ||
1658 | if (args->client_address == NULL) | 1805 | if (args->client_address == NULL) |
1659 | goto out_no_client_address; | 1806 | goto out_no_client_address; |
@@ -1822,6 +1969,11 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, | |||
1822 | error = PTR_ERR(mntroot); | 1969 | error = PTR_ERR(mntroot); |
1823 | goto error_splat_super; | 1970 | goto error_splat_super; |
1824 | } | 1971 | } |
1972 | if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) { | ||
1973 | dput(mntroot); | ||
1974 | error = -ESTALE; | ||
1975 | goto error_splat_super; | ||
1976 | } | ||
1825 | 1977 | ||
1826 | s->s_flags |= MS_ACTIVE; | 1978 | s->s_flags |= MS_ACTIVE; |
1827 | mnt->mnt_sb = s; | 1979 | mnt->mnt_sb = s; |
@@ -1896,6 +2048,11 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, | |||
1896 | error = PTR_ERR(mntroot); | 2048 | error = PTR_ERR(mntroot); |
1897 | goto error_splat_super; | 2049 | goto error_splat_super; |
1898 | } | 2050 | } |
2051 | if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) { | ||
2052 | dput(mntroot); | ||
2053 | error = -ESTALE; | ||
2054 | goto error_splat_super; | ||
2055 | } | ||
1899 | 2056 | ||
1900 | s->s_flags |= MS_ACTIVE; | 2057 | s->s_flags |= MS_ACTIVE; |
1901 | mnt->mnt_sb = s; | 2058 | mnt->mnt_sb = s; |
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 233ad38161f9..757415363422 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c | |||
@@ -14,6 +14,8 @@ | |||
14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
15 | #include <linux/wait.h> | 15 | #include <linux/wait.h> |
16 | 16 | ||
17 | #include "internal.h" | ||
18 | |||
17 | struct nfs_unlinkdata { | 19 | struct nfs_unlinkdata { |
18 | struct hlist_node list; | 20 | struct hlist_node list; |
19 | struct nfs_removeargs args; | 21 | struct nfs_removeargs args; |
@@ -69,24 +71,6 @@ static void nfs_dec_sillycount(struct inode *dir) | |||
69 | } | 71 | } |
70 | 72 | ||
71 | /** | 73 | /** |
72 | * nfs_async_unlink_init - Initialize the RPC info | ||
73 | * task: rpc_task of the sillydelete | ||
74 | */ | ||
75 | static void nfs_async_unlink_init(struct rpc_task *task, void *calldata) | ||
76 | { | ||
77 | struct nfs_unlinkdata *data = calldata; | ||
78 | struct inode *dir = data->dir; | ||
79 | struct rpc_message msg = { | ||
80 | .rpc_argp = &data->args, | ||
81 | .rpc_resp = &data->res, | ||
82 | .rpc_cred = data->cred, | ||
83 | }; | ||
84 | |||
85 | NFS_PROTO(dir)->unlink_setup(&msg, dir); | ||
86 | rpc_call_setup(task, &msg, 0); | ||
87 | } | ||
88 | |||
89 | /** | ||
90 | * nfs_async_unlink_done - Sillydelete post-processing | 74 | * nfs_async_unlink_done - Sillydelete post-processing |
91 | * @task: rpc_task of the sillydelete | 75 | * @task: rpc_task of the sillydelete |
92 | * | 76 | * |
@@ -113,32 +97,45 @@ static void nfs_async_unlink_release(void *calldata) | |||
113 | struct nfs_unlinkdata *data = calldata; | 97 | struct nfs_unlinkdata *data = calldata; |
114 | 98 | ||
115 | nfs_dec_sillycount(data->dir); | 99 | nfs_dec_sillycount(data->dir); |
100 | nfs_sb_deactive(NFS_SERVER(data->dir)); | ||
116 | nfs_free_unlinkdata(data); | 101 | nfs_free_unlinkdata(data); |
117 | } | 102 | } |
118 | 103 | ||
119 | static const struct rpc_call_ops nfs_unlink_ops = { | 104 | static const struct rpc_call_ops nfs_unlink_ops = { |
120 | .rpc_call_prepare = nfs_async_unlink_init, | ||
121 | .rpc_call_done = nfs_async_unlink_done, | 105 | .rpc_call_done = nfs_async_unlink_done, |
122 | .rpc_release = nfs_async_unlink_release, | 106 | .rpc_release = nfs_async_unlink_release, |
123 | }; | 107 | }; |
124 | 108 | ||
125 | static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data) | 109 | static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data) |
126 | { | 110 | { |
111 | struct rpc_message msg = { | ||
112 | .rpc_argp = &data->args, | ||
113 | .rpc_resp = &data->res, | ||
114 | .rpc_cred = data->cred, | ||
115 | }; | ||
116 | struct rpc_task_setup task_setup_data = { | ||
117 | .rpc_message = &msg, | ||
118 | .callback_ops = &nfs_unlink_ops, | ||
119 | .callback_data = data, | ||
120 | .flags = RPC_TASK_ASYNC, | ||
121 | }; | ||
127 | struct rpc_task *task; | 122 | struct rpc_task *task; |
128 | struct dentry *alias; | 123 | struct dentry *alias; |
129 | 124 | ||
130 | alias = d_lookup(parent, &data->args.name); | 125 | alias = d_lookup(parent, &data->args.name); |
131 | if (alias != NULL) { | 126 | if (alias != NULL) { |
132 | int ret = 0; | 127 | int ret = 0; |
128 | |||
133 | /* | 129 | /* |
134 | * Hey, we raced with lookup... See if we need to transfer | 130 | * Hey, we raced with lookup... See if we need to transfer |
135 | * the sillyrename information to the aliased dentry. | 131 | * the sillyrename information to the aliased dentry. |
136 | */ | 132 | */ |
137 | nfs_free_dname(data); | 133 | nfs_free_dname(data); |
138 | spin_lock(&alias->d_lock); | 134 | spin_lock(&alias->d_lock); |
139 | if (!(alias->d_flags & DCACHE_NFSFS_RENAMED)) { | 135 | if (alias->d_inode != NULL && |
136 | !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { | ||
140 | alias->d_fsdata = data; | 137 | alias->d_fsdata = data; |
141 | alias->d_flags ^= DCACHE_NFSFS_RENAMED; | 138 | alias->d_flags |= DCACHE_NFSFS_RENAMED; |
142 | ret = 1; | 139 | ret = 1; |
143 | } | 140 | } |
144 | spin_unlock(&alias->d_lock); | 141 | spin_unlock(&alias->d_lock); |
@@ -151,10 +148,14 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n | |||
151 | nfs_dec_sillycount(dir); | 148 | nfs_dec_sillycount(dir); |
152 | return 0; | 149 | return 0; |
153 | } | 150 | } |
151 | nfs_sb_active(NFS_SERVER(dir)); | ||
154 | data->args.fh = NFS_FH(dir); | 152 | data->args.fh = NFS_FH(dir); |
155 | nfs_fattr_init(&data->res.dir_attr); | 153 | nfs_fattr_init(&data->res.dir_attr); |
156 | 154 | ||
157 | task = rpc_run_task(NFS_CLIENT(dir), RPC_TASK_ASYNC, &nfs_unlink_ops, data); | 155 | NFS_PROTO(dir)->unlink_setup(&msg, dir); |
156 | |||
157 | task_setup_data.rpc_client = NFS_CLIENT(dir); | ||
158 | task = rpc_run_task(&task_setup_data); | ||
158 | if (!IS_ERR(task)) | 159 | if (!IS_ERR(task)) |
159 | rpc_put_task(task); | 160 | rpc_put_task(task); |
160 | return 1; | 161 | return 1; |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 60e3e870ada4..522efff3e2c5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -196,7 +196,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, | |||
196 | } | 196 | } |
197 | /* Update file length */ | 197 | /* Update file length */ |
198 | nfs_grow_file(page, offset, count); | 198 | nfs_grow_file(page, offset, count); |
199 | nfs_unlock_request(req); | 199 | nfs_clear_page_tag_locked(req); |
200 | return 0; | 200 | return 0; |
201 | } | 201 | } |
202 | 202 | ||
@@ -252,7 +252,6 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, | |||
252 | struct page *page) | 252 | struct page *page) |
253 | { | 253 | { |
254 | struct inode *inode = page->mapping->host; | 254 | struct inode *inode = page->mapping->host; |
255 | struct nfs_inode *nfsi = NFS_I(inode); | ||
256 | struct nfs_page *req; | 255 | struct nfs_page *req; |
257 | int ret; | 256 | int ret; |
258 | 257 | ||
@@ -263,10 +262,10 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, | |||
263 | spin_unlock(&inode->i_lock); | 262 | spin_unlock(&inode->i_lock); |
264 | return 0; | 263 | return 0; |
265 | } | 264 | } |
266 | if (nfs_lock_request_dontget(req)) | 265 | if (nfs_set_page_tag_locked(req)) |
267 | break; | 266 | break; |
268 | /* Note: If we hold the page lock, as is the case in nfs_writepage, | 267 | /* Note: If we hold the page lock, as is the case in nfs_writepage, |
269 | * then the call to nfs_lock_request_dontget() will always | 268 | * then the call to nfs_set_page_tag_locked() will always |
270 | * succeed provided that someone hasn't already marked the | 269 | * succeed provided that someone hasn't already marked the |
271 | * request as dirty (in which case we don't care). | 270 | * request as dirty (in which case we don't care). |
272 | */ | 271 | */ |
@@ -280,7 +279,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, | |||
280 | if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { | 279 | if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { |
281 | /* This request is marked for commit */ | 280 | /* This request is marked for commit */ |
282 | spin_unlock(&inode->i_lock); | 281 | spin_unlock(&inode->i_lock); |
283 | nfs_unlock_request(req); | 282 | nfs_clear_page_tag_locked(req); |
284 | nfs_pageio_complete(pgio); | 283 | nfs_pageio_complete(pgio); |
285 | return 0; | 284 | return 0; |
286 | } | 285 | } |
@@ -288,8 +287,6 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, | |||
288 | spin_unlock(&inode->i_lock); | 287 | spin_unlock(&inode->i_lock); |
289 | BUG(); | 288 | BUG(); |
290 | } | 289 | } |
291 | radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, | ||
292 | NFS_PAGE_TAG_LOCKED); | ||
293 | spin_unlock(&inode->i_lock); | 290 | spin_unlock(&inode->i_lock); |
294 | nfs_pageio_add_request(pgio, req); | 291 | nfs_pageio_add_request(pgio, req); |
295 | return 0; | 292 | return 0; |
@@ -381,6 +378,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | |||
381 | set_page_private(req->wb_page, (unsigned long)req); | 378 | set_page_private(req->wb_page, (unsigned long)req); |
382 | nfsi->npages++; | 379 | nfsi->npages++; |
383 | kref_get(&req->wb_kref); | 380 | kref_get(&req->wb_kref); |
381 | radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); | ||
384 | return 0; | 382 | return 0; |
385 | } | 383 | } |
386 | 384 | ||
@@ -596,7 +594,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, | |||
596 | spin_lock(&inode->i_lock); | 594 | spin_lock(&inode->i_lock); |
597 | req = nfs_page_find_request_locked(page); | 595 | req = nfs_page_find_request_locked(page); |
598 | if (req) { | 596 | if (req) { |
599 | if (!nfs_lock_request_dontget(req)) { | 597 | if (!nfs_set_page_tag_locked(req)) { |
600 | int error; | 598 | int error; |
601 | 599 | ||
602 | spin_unlock(&inode->i_lock); | 600 | spin_unlock(&inode->i_lock); |
@@ -646,7 +644,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, | |||
646 | || req->wb_page != page | 644 | || req->wb_page != page |
647 | || !nfs_dirty_request(req) | 645 | || !nfs_dirty_request(req) |
648 | || offset > rqend || end < req->wb_offset) { | 646 | || offset > rqend || end < req->wb_offset) { |
649 | nfs_unlock_request(req); | 647 | nfs_clear_page_tag_locked(req); |
650 | return ERR_PTR(-EBUSY); | 648 | return ERR_PTR(-EBUSY); |
651 | } | 649 | } |
652 | 650 | ||
@@ -755,7 +753,7 @@ static void nfs_writepage_release(struct nfs_page *req) | |||
755 | nfs_clear_page_tag_locked(req); | 753 | nfs_clear_page_tag_locked(req); |
756 | } | 754 | } |
757 | 755 | ||
758 | static inline int flush_task_priority(int how) | 756 | static int flush_task_priority(int how) |
759 | { | 757 | { |
760 | switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { | 758 | switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { |
761 | case FLUSH_HIGHPRI: | 759 | case FLUSH_HIGHPRI: |
@@ -775,15 +773,31 @@ static void nfs_write_rpcsetup(struct nfs_page *req, | |||
775 | unsigned int count, unsigned int offset, | 773 | unsigned int count, unsigned int offset, |
776 | int how) | 774 | int how) |
777 | { | 775 | { |
778 | struct inode *inode; | 776 | struct inode *inode = req->wb_context->path.dentry->d_inode; |
779 | int flags; | 777 | int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; |
778 | int priority = flush_task_priority(how); | ||
779 | struct rpc_task *task; | ||
780 | struct rpc_message msg = { | ||
781 | .rpc_argp = &data->args, | ||
782 | .rpc_resp = &data->res, | ||
783 | .rpc_cred = req->wb_context->cred, | ||
784 | }; | ||
785 | struct rpc_task_setup task_setup_data = { | ||
786 | .rpc_client = NFS_CLIENT(inode), | ||
787 | .task = &data->task, | ||
788 | .rpc_message = &msg, | ||
789 | .callback_ops = call_ops, | ||
790 | .callback_data = data, | ||
791 | .flags = flags, | ||
792 | .priority = priority, | ||
793 | }; | ||
780 | 794 | ||
781 | /* Set up the RPC argument and reply structs | 795 | /* Set up the RPC argument and reply structs |
782 | * NB: take care not to mess about with data->commit et al. */ | 796 | * NB: take care not to mess about with data->commit et al. */ |
783 | 797 | ||
784 | data->req = req; | 798 | data->req = req; |
785 | data->inode = inode = req->wb_context->path.dentry->d_inode; | 799 | data->inode = inode = req->wb_context->path.dentry->d_inode; |
786 | data->cred = req->wb_context->cred; | 800 | data->cred = msg.rpc_cred; |
787 | 801 | ||
788 | data->args.fh = NFS_FH(inode); | 802 | data->args.fh = NFS_FH(inode); |
789 | data->args.offset = req_offset(req) + offset; | 803 | data->args.offset = req_offset(req) + offset; |
@@ -791,6 +805,12 @@ static void nfs_write_rpcsetup(struct nfs_page *req, | |||
791 | data->args.pages = data->pagevec; | 805 | data->args.pages = data->pagevec; |
792 | data->args.count = count; | 806 | data->args.count = count; |
793 | data->args.context = req->wb_context; | 807 | data->args.context = req->wb_context; |
808 | data->args.stable = NFS_UNSTABLE; | ||
809 | if (how & FLUSH_STABLE) { | ||
810 | data->args.stable = NFS_DATA_SYNC; | ||
811 | if (!NFS_I(inode)->ncommit) | ||
812 | data->args.stable = NFS_FILE_SYNC; | ||
813 | } | ||
794 | 814 | ||
795 | data->res.fattr = &data->fattr; | 815 | data->res.fattr = &data->fattr; |
796 | data->res.count = count; | 816 | data->res.count = count; |
@@ -798,12 +818,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req, | |||
798 | nfs_fattr_init(&data->fattr); | 818 | nfs_fattr_init(&data->fattr); |
799 | 819 | ||
800 | /* Set up the initial task struct. */ | 820 | /* Set up the initial task struct. */ |
801 | flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; | 821 | NFS_PROTO(inode)->write_setup(data, &msg); |
802 | rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data); | ||
803 | NFS_PROTO(inode)->write_setup(data, how); | ||
804 | |||
805 | data->task.tk_priority = flush_task_priority(how); | ||
806 | data->task.tk_cookie = (unsigned long)inode; | ||
807 | 822 | ||
808 | dprintk("NFS: %5u initiated write call " | 823 | dprintk("NFS: %5u initiated write call " |
809 | "(req %s/%Ld, %u bytes @ offset %Lu)\n", | 824 | "(req %s/%Ld, %u bytes @ offset %Lu)\n", |
@@ -812,11 +827,10 @@ static void nfs_write_rpcsetup(struct nfs_page *req, | |||
812 | (long long)NFS_FILEID(inode), | 827 | (long long)NFS_FILEID(inode), |
813 | count, | 828 | count, |
814 | (unsigned long long)data->args.offset); | 829 | (unsigned long long)data->args.offset); |
815 | } | ||
816 | 830 | ||
817 | static void nfs_execute_write(struct nfs_write_data *data) | 831 | task = rpc_run_task(&task_setup_data); |
818 | { | 832 | if (!IS_ERR(task)) |
819 | rpc_execute(&data->task); | 833 | rpc_put_task(task); |
820 | } | 834 | } |
821 | 835 | ||
822 | /* | 836 | /* |
@@ -863,7 +877,6 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned | |||
863 | wsize, offset, how); | 877 | wsize, offset, how); |
864 | offset += wsize; | 878 | offset += wsize; |
865 | nbytes -= wsize; | 879 | nbytes -= wsize; |
866 | nfs_execute_write(data); | ||
867 | } while (nbytes != 0); | 880 | } while (nbytes != 0); |
868 | 881 | ||
869 | return 0; | 882 | return 0; |
@@ -911,7 +924,6 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i | |||
911 | /* Set up the argument struct */ | 924 | /* Set up the argument struct */ |
912 | nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); | 925 | nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); |
913 | 926 | ||
914 | nfs_execute_write(data); | ||
915 | return 0; | 927 | return 0; |
916 | out_bad: | 928 | out_bad: |
917 | while (!list_empty(head)) { | 929 | while (!list_empty(head)) { |
@@ -927,7 +939,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i | |||
927 | static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, | 939 | static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, |
928 | struct inode *inode, int ioflags) | 940 | struct inode *inode, int ioflags) |
929 | { | 941 | { |
930 | int wsize = NFS_SERVER(inode)->wsize; | 942 | size_t wsize = NFS_SERVER(inode)->wsize; |
931 | 943 | ||
932 | if (wsize < PAGE_CACHE_SIZE) | 944 | if (wsize < PAGE_CACHE_SIZE) |
933 | nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); | 945 | nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); |
@@ -1141,19 +1153,33 @@ static void nfs_commit_rpcsetup(struct list_head *head, | |||
1141 | struct nfs_write_data *data, | 1153 | struct nfs_write_data *data, |
1142 | int how) | 1154 | int how) |
1143 | { | 1155 | { |
1144 | struct nfs_page *first; | 1156 | struct nfs_page *first = nfs_list_entry(head->next); |
1145 | struct inode *inode; | 1157 | struct inode *inode = first->wb_context->path.dentry->d_inode; |
1146 | int flags; | 1158 | int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; |
1159 | int priority = flush_task_priority(how); | ||
1160 | struct rpc_task *task; | ||
1161 | struct rpc_message msg = { | ||
1162 | .rpc_argp = &data->args, | ||
1163 | .rpc_resp = &data->res, | ||
1164 | .rpc_cred = first->wb_context->cred, | ||
1165 | }; | ||
1166 | struct rpc_task_setup task_setup_data = { | ||
1167 | .task = &data->task, | ||
1168 | .rpc_client = NFS_CLIENT(inode), | ||
1169 | .rpc_message = &msg, | ||
1170 | .callback_ops = &nfs_commit_ops, | ||
1171 | .callback_data = data, | ||
1172 | .flags = flags, | ||
1173 | .priority = priority, | ||
1174 | }; | ||
1147 | 1175 | ||
1148 | /* Set up the RPC argument and reply structs | 1176 | /* Set up the RPC argument and reply structs |
1149 | * NB: take care not to mess about with data->commit et al. */ | 1177 | * NB: take care not to mess about with data->commit et al. */ |
1150 | 1178 | ||
1151 | list_splice_init(head, &data->pages); | 1179 | list_splice_init(head, &data->pages); |
1152 | first = nfs_list_entry(data->pages.next); | ||
1153 | inode = first->wb_context->path.dentry->d_inode; | ||
1154 | 1180 | ||
1155 | data->inode = inode; | 1181 | data->inode = inode; |
1156 | data->cred = first->wb_context->cred; | 1182 | data->cred = msg.rpc_cred; |
1157 | 1183 | ||
1158 | data->args.fh = NFS_FH(data->inode); | 1184 | data->args.fh = NFS_FH(data->inode); |
1159 | /* Note: we always request a commit of the entire inode */ | 1185 | /* Note: we always request a commit of the entire inode */ |
@@ -1165,14 +1191,13 @@ static void nfs_commit_rpcsetup(struct list_head *head, | |||
1165 | nfs_fattr_init(&data->fattr); | 1191 | nfs_fattr_init(&data->fattr); |
1166 | 1192 | ||
1167 | /* Set up the initial task struct. */ | 1193 | /* Set up the initial task struct. */ |
1168 | flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; | 1194 | NFS_PROTO(inode)->commit_setup(data, &msg); |
1169 | rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data); | ||
1170 | NFS_PROTO(inode)->commit_setup(data, how); | ||
1171 | 1195 | ||
1172 | data->task.tk_priority = flush_task_priority(how); | ||
1173 | data->task.tk_cookie = (unsigned long)inode; | ||
1174 | |||
1175 | dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); | 1196 | dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); |
1197 | |||
1198 | task = rpc_run_task(&task_setup_data); | ||
1199 | if (!IS_ERR(task)) | ||
1200 | rpc_put_task(task); | ||
1176 | } | 1201 | } |
1177 | 1202 | ||
1178 | /* | 1203 | /* |
@@ -1192,7 +1217,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) | |||
1192 | /* Set up the argument struct */ | 1217 | /* Set up the argument struct */ |
1193 | nfs_commit_rpcsetup(head, data, how); | 1218 | nfs_commit_rpcsetup(head, data, how); |
1194 | 1219 | ||
1195 | nfs_execute_write(data); | ||
1196 | return 0; | 1220 | return 0; |
1197 | out_bad: | 1221 | out_bad: |
1198 | while (!list_empty(head)) { | 1222 | while (!list_empty(head)) { |
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 2d116d2298f8..f917fd25858a 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c | |||
@@ -388,8 +388,11 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, | |||
388 | * Round the length of the data which was specified up to | 388 | * Round the length of the data which was specified up to |
389 | * the next multiple of XDR units and then compare that | 389 | * the next multiple of XDR units and then compare that |
390 | * against the length which was actually received. | 390 | * against the length which was actually received. |
391 | * Note that when RPCSEC/GSS (for example) is used, the | ||
392 | * data buffer can be padded so dlen might be larger | ||
393 | * than required. It must never be smaller. | ||
391 | */ | 394 | */ |
392 | if (dlen != XDR_QUADLEN(len)*4) | 395 | if (dlen < XDR_QUADLEN(len)*4) |
393 | return 0; | 396 | return 0; |
394 | 397 | ||
395 | if (args->count > max_blocksize) { | 398 | if (args->count > max_blocksize) { |
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 986f9b32083c..b86e3658a0af 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c | |||
@@ -313,8 +313,11 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, | |||
313 | * Round the length of the data which was specified up to | 313 | * Round the length of the data which was specified up to |
314 | * the next multiple of XDR units and then compare that | 314 | * the next multiple of XDR units and then compare that |
315 | * against the length which was actually received. | 315 | * against the length which was actually received. |
316 | * Note that when RPCSEC/GSS (for example) is used, the | ||
317 | * data buffer can be padded so dlen might be larger | ||
318 | * than required. It must never be smaller. | ||
316 | */ | 319 | */ |
317 | if (dlen != XDR_QUADLEN(len)*4) | 320 | if (dlen < XDR_QUADLEN(len)*4) |
318 | return 0; | 321 | return 0; |
319 | 322 | ||
320 | rqstp->rq_vec[0].iov_base = (void*)p; | 323 | rqstp->rq_vec[0].iov_base = (void*)p; |
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 9fb8132f19b0..4d4ce48bb42c 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile | |||
@@ -19,16 +19,17 @@ ocfs2-objs := \ | |||
19 | ioctl.o \ | 19 | ioctl.o \ |
20 | journal.o \ | 20 | journal.o \ |
21 | localalloc.o \ | 21 | localalloc.o \ |
22 | locks.o \ | ||
22 | mmap.o \ | 23 | mmap.o \ |
23 | namei.o \ | 24 | namei.o \ |
25 | resize.o \ | ||
24 | slot_map.o \ | 26 | slot_map.o \ |
25 | suballoc.o \ | 27 | suballoc.o \ |
26 | super.o \ | 28 | super.o \ |
27 | symlink.o \ | 29 | symlink.o \ |
28 | sysfile.o \ | 30 | sysfile.o \ |
29 | uptodate.o \ | 31 | uptodate.o \ |
30 | ver.o \ | 32 | ver.o |
31 | vote.o | ||
32 | 33 | ||
33 | obj-$(CONFIG_OCFS2_FS) += cluster/ | 34 | obj-$(CONFIG_OCFS2_FS) += cluster/ |
34 | obj-$(CONFIG_OCFS2_FS) += dlm/ | 35 | obj-$(CONFIG_OCFS2_FS) += dlm/ |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index ce62c152823d..e6df06ac6405 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -2389,6 +2389,18 @@ static int __ocfs2_rotate_tree_left(struct inode *inode, | |||
2389 | goto out; | 2389 | goto out; |
2390 | } | 2390 | } |
2391 | 2391 | ||
2392 | /* | ||
2393 | * Caller might still want to make changes to the | ||
2394 | * tree root, so re-add it to the journal here. | ||
2395 | */ | ||
2396 | ret = ocfs2_journal_access(handle, inode, | ||
2397 | path_root_bh(left_path), | ||
2398 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2399 | if (ret) { | ||
2400 | mlog_errno(ret); | ||
2401 | goto out; | ||
2402 | } | ||
2403 | |||
2392 | ret = ocfs2_rotate_subtree_left(inode, handle, left_path, | 2404 | ret = ocfs2_rotate_subtree_left(inode, handle, left_path, |
2393 | right_path, subtree_root, | 2405 | right_path, subtree_root, |
2394 | dealloc, &deleted); | 2406 | dealloc, &deleted); |
@@ -3289,16 +3301,6 @@ static int ocfs2_insert_path(struct inode *inode, | |||
3289 | int ret, subtree_index; | 3301 | int ret, subtree_index; |
3290 | struct buffer_head *leaf_bh = path_leaf_bh(right_path); | 3302 | struct buffer_head *leaf_bh = path_leaf_bh(right_path); |
3291 | 3303 | ||
3292 | /* | ||
3293 | * Pass both paths to the journal. The majority of inserts | ||
3294 | * will be touching all components anyway. | ||
3295 | */ | ||
3296 | ret = ocfs2_journal_access_path(inode, handle, right_path); | ||
3297 | if (ret < 0) { | ||
3298 | mlog_errno(ret); | ||
3299 | goto out; | ||
3300 | } | ||
3301 | |||
3302 | if (left_path) { | 3304 | if (left_path) { |
3303 | int credits = handle->h_buffer_credits; | 3305 | int credits = handle->h_buffer_credits; |
3304 | 3306 | ||
@@ -3323,6 +3325,16 @@ static int ocfs2_insert_path(struct inode *inode, | |||
3323 | } | 3325 | } |
3324 | } | 3326 | } |
3325 | 3327 | ||
3328 | /* | ||
3329 | * Pass both paths to the journal. The majority of inserts | ||
3330 | * will be touching all components anyway. | ||
3331 | */ | ||
3332 | ret = ocfs2_journal_access_path(inode, handle, right_path); | ||
3333 | if (ret < 0) { | ||
3334 | mlog_errno(ret); | ||
3335 | goto out; | ||
3336 | } | ||
3337 | |||
3326 | if (insert->ins_split != SPLIT_NONE) { | 3338 | if (insert->ins_split != SPLIT_NONE) { |
3327 | /* | 3339 | /* |
3328 | * We could call ocfs2_insert_at_leaf() for some types | 3340 | * We could call ocfs2_insert_at_leaf() for some types |
@@ -3331,6 +3343,17 @@ static int ocfs2_insert_path(struct inode *inode, | |||
3331 | */ | 3343 | */ |
3332 | ocfs2_split_record(inode, left_path, right_path, | 3344 | ocfs2_split_record(inode, left_path, right_path, |
3333 | insert_rec, insert->ins_split); | 3345 | insert_rec, insert->ins_split); |
3346 | |||
3347 | /* | ||
3348 | * Split might have modified either leaf and we don't | ||
3349 | * have a guarantee that the later edge insert will | ||
3350 | * dirty this for us. | ||
3351 | */ | ||
3352 | if (left_path) | ||
3353 | ret = ocfs2_journal_dirty(handle, | ||
3354 | path_leaf_bh(left_path)); | ||
3355 | if (ret) | ||
3356 | mlog_errno(ret); | ||
3334 | } else | 3357 | } else |
3335 | ocfs2_insert_at_leaf(insert_rec, path_leaf_el(right_path), | 3358 | ocfs2_insert_at_leaf(insert_rec, path_leaf_el(right_path), |
3336 | insert, inode); | 3359 | insert, inode); |
@@ -3430,6 +3453,17 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
3430 | mlog_errno(ret); | 3453 | mlog_errno(ret); |
3431 | goto out; | 3454 | goto out; |
3432 | } | 3455 | } |
3456 | |||
3457 | /* | ||
3458 | * ocfs2_rotate_tree_right() might have extended the | ||
3459 | * transaction without re-journaling our tree root. | ||
3460 | */ | ||
3461 | ret = ocfs2_journal_access(handle, inode, di_bh, | ||
3462 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3463 | if (ret) { | ||
3464 | mlog_errno(ret); | ||
3465 | goto out; | ||
3466 | } | ||
3433 | } else if (type->ins_appending == APPEND_TAIL | 3467 | } else if (type->ins_appending == APPEND_TAIL |
3434 | && type->ins_contig != CONTIG_LEFT) { | 3468 | && type->ins_contig != CONTIG_LEFT) { |
3435 | ret = ocfs2_append_rec_to_path(inode, handle, insert_rec, | 3469 | ret = ocfs2_append_rec_to_path(inode, handle, insert_rec, |
@@ -3941,7 +3975,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode, | |||
3941 | { | 3975 | { |
3942 | int ret = 0; | 3976 | int ret = 0; |
3943 | struct ocfs2_extent_list *el = path_leaf_el(path); | 3977 | struct ocfs2_extent_list *el = path_leaf_el(path); |
3944 | struct buffer_head *eb_bh, *last_eb_bh = NULL; | 3978 | struct buffer_head *last_eb_bh = NULL; |
3945 | struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; | 3979 | struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; |
3946 | struct ocfs2_merge_ctxt ctxt; | 3980 | struct ocfs2_merge_ctxt ctxt; |
3947 | struct ocfs2_extent_list *rightmost_el; | 3981 | struct ocfs2_extent_list *rightmost_el; |
@@ -3960,14 +3994,6 @@ static int __ocfs2_mark_extent_written(struct inode *inode, | |||
3960 | goto out; | 3994 | goto out; |
3961 | } | 3995 | } |
3962 | 3996 | ||
3963 | eb_bh = path_leaf_bh(path); | ||
3964 | ret = ocfs2_journal_access(handle, inode, eb_bh, | ||
3965 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3966 | if (ret) { | ||
3967 | mlog_errno(ret); | ||
3968 | goto out; | ||
3969 | } | ||
3970 | |||
3971 | ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el, | 3997 | ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el, |
3972 | split_index, | 3998 | split_index, |
3973 | split_rec); | 3999 | split_rec); |
@@ -4029,8 +4055,6 @@ static int __ocfs2_mark_extent_written(struct inode *inode, | |||
4029 | mlog_errno(ret); | 4055 | mlog_errno(ret); |
4030 | } | 4056 | } |
4031 | 4057 | ||
4032 | ocfs2_journal_dirty(handle, eb_bh); | ||
4033 | |||
4034 | out: | 4058 | out: |
4035 | brelse(last_eb_bh); | 4059 | brelse(last_eb_bh); |
4036 | return ret; | 4060 | return ret; |
@@ -4707,7 +4731,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) | |||
4707 | 4731 | ||
4708 | mutex_lock(&data_alloc_inode->i_mutex); | 4732 | mutex_lock(&data_alloc_inode->i_mutex); |
4709 | 4733 | ||
4710 | status = ocfs2_meta_lock(data_alloc_inode, &data_alloc_bh, 1); | 4734 | status = ocfs2_inode_lock(data_alloc_inode, &data_alloc_bh, 1); |
4711 | if (status < 0) { | 4735 | if (status < 0) { |
4712 | mlog_errno(status); | 4736 | mlog_errno(status); |
4713 | goto out_mutex; | 4737 | goto out_mutex; |
@@ -4729,7 +4753,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) | |||
4729 | 4753 | ||
4730 | out_unlock: | 4754 | out_unlock: |
4731 | brelse(data_alloc_bh); | 4755 | brelse(data_alloc_bh); |
4732 | ocfs2_meta_unlock(data_alloc_inode, 1); | 4756 | ocfs2_inode_unlock(data_alloc_inode, 1); |
4733 | 4757 | ||
4734 | out_mutex: | 4758 | out_mutex: |
4735 | mutex_unlock(&data_alloc_inode->i_mutex); | 4759 | mutex_unlock(&data_alloc_inode->i_mutex); |
@@ -5053,7 +5077,7 @@ static int ocfs2_free_cached_items(struct ocfs2_super *osb, | |||
5053 | 5077 | ||
5054 | mutex_lock(&inode->i_mutex); | 5078 | mutex_lock(&inode->i_mutex); |
5055 | 5079 | ||
5056 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | 5080 | ret = ocfs2_inode_lock(inode, &di_bh, 1); |
5057 | if (ret) { | 5081 | if (ret) { |
5058 | mlog_errno(ret); | 5082 | mlog_errno(ret); |
5059 | goto out_mutex; | 5083 | goto out_mutex; |
@@ -5094,7 +5118,7 @@ out_journal: | |||
5094 | ocfs2_commit_trans(osb, handle); | 5118 | ocfs2_commit_trans(osb, handle); |
5095 | 5119 | ||
5096 | out_unlock: | 5120 | out_unlock: |
5097 | ocfs2_meta_unlock(inode, 1); | 5121 | ocfs2_inode_unlock(inode, 1); |
5098 | brelse(di_bh); | 5122 | brelse(di_bh); |
5099 | out_mutex: | 5123 | out_mutex: |
5100 | mutex_unlock(&inode->i_mutex); | 5124 | mutex_unlock(&inode->i_mutex); |
@@ -6093,8 +6117,6 @@ start: | |||
6093 | mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n", | 6117 | mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n", |
6094 | clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr); | 6118 | clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr); |
6095 | 6119 | ||
6096 | BUG_ON(clusters_to_del == 0); | ||
6097 | |||
6098 | mutex_lock(&tl_inode->i_mutex); | 6120 | mutex_lock(&tl_inode->i_mutex); |
6099 | tl_sem = 1; | 6121 | tl_sem = 1; |
6100 | /* ocfs2_truncate_log_needs_flush guarantees us at least one | 6122 | /* ocfs2_truncate_log_needs_flush guarantees us at least one |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 56f7790cad46..bc7b4cbbe8ec 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <asm/byteorder.h> | 26 | #include <asm/byteorder.h> |
27 | #include <linux/swap.h> | 27 | #include <linux/swap.h> |
28 | #include <linux/pipe_fs_i.h> | 28 | #include <linux/pipe_fs_i.h> |
29 | #include <linux/mpage.h> | ||
29 | 30 | ||
30 | #define MLOG_MASK_PREFIX ML_FILE_IO | 31 | #define MLOG_MASK_PREFIX ML_FILE_IO |
31 | #include <cluster/masklog.h> | 32 | #include <cluster/masklog.h> |
@@ -139,7 +140,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
139 | { | 140 | { |
140 | int err = 0; | 141 | int err = 0; |
141 | unsigned int ext_flags; | 142 | unsigned int ext_flags; |
142 | u64 p_blkno, past_eof; | 143 | u64 max_blocks = bh_result->b_size >> inode->i_blkbits; |
144 | u64 p_blkno, count, past_eof; | ||
143 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 145 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
144 | 146 | ||
145 | mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, | 147 | mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, |
@@ -155,7 +157,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
155 | goto bail; | 157 | goto bail; |
156 | } | 158 | } |
157 | 159 | ||
158 | err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, NULL, | 160 | err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count, |
159 | &ext_flags); | 161 | &ext_flags); |
160 | if (err) { | 162 | if (err) { |
161 | mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " | 163 | mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " |
@@ -164,6 +166,9 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
164 | goto bail; | 166 | goto bail; |
165 | } | 167 | } |
166 | 168 | ||
169 | if (max_blocks < count) | ||
170 | count = max_blocks; | ||
171 | |||
167 | /* | 172 | /* |
168 | * ocfs2 never allocates in this function - the only time we | 173 | * ocfs2 never allocates in this function - the only time we |
169 | * need to use BH_New is when we're extending i_size on a file | 174 | * need to use BH_New is when we're extending i_size on a file |
@@ -178,6 +183,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
178 | if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) | 183 | if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) |
179 | map_bh(bh_result, inode->i_sb, p_blkno); | 184 | map_bh(bh_result, inode->i_sb, p_blkno); |
180 | 185 | ||
186 | bh_result->b_size = count << inode->i_blkbits; | ||
187 | |||
181 | if (!ocfs2_sparse_alloc(osb)) { | 188 | if (!ocfs2_sparse_alloc(osb)) { |
182 | if (p_blkno == 0) { | 189 | if (p_blkno == 0) { |
183 | err = -EIO; | 190 | err = -EIO; |
@@ -210,7 +217,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page, | |||
210 | struct buffer_head *di_bh) | 217 | struct buffer_head *di_bh) |
211 | { | 218 | { |
212 | void *kaddr; | 219 | void *kaddr; |
213 | unsigned int size; | 220 | loff_t size; |
214 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 221 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; |
215 | 222 | ||
216 | if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) { | 223 | if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) { |
@@ -224,8 +231,9 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page, | |||
224 | if (size > PAGE_CACHE_SIZE || | 231 | if (size > PAGE_CACHE_SIZE || |
225 | size > ocfs2_max_inline_data(inode->i_sb)) { | 232 | size > ocfs2_max_inline_data(inode->i_sb)) { |
226 | ocfs2_error(inode->i_sb, | 233 | ocfs2_error(inode->i_sb, |
227 | "Inode %llu has with inline data has bad size: %u", | 234 | "Inode %llu has with inline data has bad size: %Lu", |
228 | (unsigned long long)OCFS2_I(inode)->ip_blkno, size); | 235 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
236 | (unsigned long long)size); | ||
229 | return -EROFS; | 237 | return -EROFS; |
230 | } | 238 | } |
231 | 239 | ||
@@ -275,7 +283,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
275 | 283 | ||
276 | mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); | 284 | mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); |
277 | 285 | ||
278 | ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page); | 286 | ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page); |
279 | if (ret != 0) { | 287 | if (ret != 0) { |
280 | if (ret == AOP_TRUNCATED_PAGE) | 288 | if (ret == AOP_TRUNCATED_PAGE) |
281 | unlock = 0; | 289 | unlock = 0; |
@@ -285,7 +293,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
285 | 293 | ||
286 | if (down_read_trylock(&oi->ip_alloc_sem) == 0) { | 294 | if (down_read_trylock(&oi->ip_alloc_sem) == 0) { |
287 | ret = AOP_TRUNCATED_PAGE; | 295 | ret = AOP_TRUNCATED_PAGE; |
288 | goto out_meta_unlock; | 296 | goto out_inode_unlock; |
289 | } | 297 | } |
290 | 298 | ||
291 | /* | 299 | /* |
@@ -305,25 +313,16 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
305 | goto out_alloc; | 313 | goto out_alloc; |
306 | } | 314 | } |
307 | 315 | ||
308 | ret = ocfs2_data_lock_with_page(inode, 0, page); | ||
309 | if (ret != 0) { | ||
310 | if (ret == AOP_TRUNCATED_PAGE) | ||
311 | unlock = 0; | ||
312 | mlog_errno(ret); | ||
313 | goto out_alloc; | ||
314 | } | ||
315 | |||
316 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 316 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
317 | ret = ocfs2_readpage_inline(inode, page); | 317 | ret = ocfs2_readpage_inline(inode, page); |
318 | else | 318 | else |
319 | ret = block_read_full_page(page, ocfs2_get_block); | 319 | ret = block_read_full_page(page, ocfs2_get_block); |
320 | unlock = 0; | 320 | unlock = 0; |
321 | 321 | ||
322 | ocfs2_data_unlock(inode, 0); | ||
323 | out_alloc: | 322 | out_alloc: |
324 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 323 | up_read(&OCFS2_I(inode)->ip_alloc_sem); |
325 | out_meta_unlock: | 324 | out_inode_unlock: |
326 | ocfs2_meta_unlock(inode, 0); | 325 | ocfs2_inode_unlock(inode, 0); |
327 | out: | 326 | out: |
328 | if (unlock) | 327 | if (unlock) |
329 | unlock_page(page); | 328 | unlock_page(page); |
@@ -331,6 +330,62 @@ out: | |||
331 | return ret; | 330 | return ret; |
332 | } | 331 | } |
333 | 332 | ||
333 | /* | ||
334 | * This is used only for read-ahead. Failures or difficult to handle | ||
335 | * situations are safe to ignore. | ||
336 | * | ||
337 | * Right now, we don't bother with BH_Boundary - in-inode extent lists | ||
338 | * are quite large (243 extents on 4k blocks), so most inodes don't | ||
339 | * grow out to a tree. If need be, detecting boundary extents could | ||
340 | * trivially be added in a future version of ocfs2_get_block(). | ||
341 | */ | ||
342 | static int ocfs2_readpages(struct file *filp, struct address_space *mapping, | ||
343 | struct list_head *pages, unsigned nr_pages) | ||
344 | { | ||
345 | int ret, err = -EIO; | ||
346 | struct inode *inode = mapping->host; | ||
347 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
348 | loff_t start; | ||
349 | struct page *last; | ||
350 | |||
351 | /* | ||
352 | * Use the nonblocking flag for the dlm code to avoid page | ||
353 | * lock inversion, but don't bother with retrying. | ||
354 | */ | ||
355 | ret = ocfs2_inode_lock_full(inode, NULL, 0, OCFS2_LOCK_NONBLOCK); | ||
356 | if (ret) | ||
357 | return err; | ||
358 | |||
359 | if (down_read_trylock(&oi->ip_alloc_sem) == 0) { | ||
360 | ocfs2_inode_unlock(inode, 0); | ||
361 | return err; | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * Don't bother with inline-data. There isn't anything | ||
366 | * to read-ahead in that case anyway... | ||
367 | */ | ||
368 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) | ||
369 | goto out_unlock; | ||
370 | |||
371 | /* | ||
372 | * Check whether a remote node truncated this file - we just | ||
373 | * drop out in that case as it's not worth handling here. | ||
374 | */ | ||
375 | last = list_entry(pages->prev, struct page, lru); | ||
376 | start = (loff_t)last->index << PAGE_CACHE_SHIFT; | ||
377 | if (start >= i_size_read(inode)) | ||
378 | goto out_unlock; | ||
379 | |||
380 | err = mpage_readpages(mapping, pages, nr_pages, ocfs2_get_block); | ||
381 | |||
382 | out_unlock: | ||
383 | up_read(&oi->ip_alloc_sem); | ||
384 | ocfs2_inode_unlock(inode, 0); | ||
385 | |||
386 | return err; | ||
387 | } | ||
388 | |||
334 | /* Note: Because we don't support holes, our allocation has | 389 | /* Note: Because we don't support holes, our allocation has |
335 | * already happened (allocation writes zeros to the file data) | 390 | * already happened (allocation writes zeros to the file data) |
336 | * so we don't have to worry about ordered writes in | 391 | * so we don't have to worry about ordered writes in |
@@ -452,7 +507,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) | |||
452 | * accessed concurrently from multiple nodes. | 507 | * accessed concurrently from multiple nodes. |
453 | */ | 508 | */ |
454 | if (!INODE_JOURNAL(inode)) { | 509 | if (!INODE_JOURNAL(inode)) { |
455 | err = ocfs2_meta_lock(inode, NULL, 0); | 510 | err = ocfs2_inode_lock(inode, NULL, 0); |
456 | if (err) { | 511 | if (err) { |
457 | if (err != -ENOENT) | 512 | if (err != -ENOENT) |
458 | mlog_errno(err); | 513 | mlog_errno(err); |
@@ -467,7 +522,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) | |||
467 | 522 | ||
468 | if (!INODE_JOURNAL(inode)) { | 523 | if (!INODE_JOURNAL(inode)) { |
469 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 524 | up_read(&OCFS2_I(inode)->ip_alloc_sem); |
470 | ocfs2_meta_unlock(inode, 0); | 525 | ocfs2_inode_unlock(inode, 0); |
471 | } | 526 | } |
472 | 527 | ||
473 | if (err) { | 528 | if (err) { |
@@ -638,34 +693,12 @@ static ssize_t ocfs2_direct_IO(int rw, | |||
638 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 693 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
639 | return 0; | 694 | return 0; |
640 | 695 | ||
641 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { | ||
642 | /* | ||
643 | * We get PR data locks even for O_DIRECT. This | ||
644 | * allows concurrent O_DIRECT I/O but doesn't let | ||
645 | * O_DIRECT with extending and buffered zeroing writes | ||
646 | * race. If they did race then the buffered zeroing | ||
647 | * could be written back after the O_DIRECT I/O. It's | ||
648 | * one thing to tell people not to mix buffered and | ||
649 | * O_DIRECT writes, but expecting them to understand | ||
650 | * that file extension is also an implicit buffered | ||
651 | * write is too much. By getting the PR we force | ||
652 | * writeback of the buffered zeroing before | ||
653 | * proceeding. | ||
654 | */ | ||
655 | ret = ocfs2_data_lock(inode, 0); | ||
656 | if (ret < 0) { | ||
657 | mlog_errno(ret); | ||
658 | goto out; | ||
659 | } | ||
660 | ocfs2_data_unlock(inode, 0); | ||
661 | } | ||
662 | |||
663 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | 696 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, |
664 | inode->i_sb->s_bdev, iov, offset, | 697 | inode->i_sb->s_bdev, iov, offset, |
665 | nr_segs, | 698 | nr_segs, |
666 | ocfs2_direct_IO_get_blocks, | 699 | ocfs2_direct_IO_get_blocks, |
667 | ocfs2_dio_end_io); | 700 | ocfs2_dio_end_io); |
668 | out: | 701 | |
669 | mlog_exit(ret); | 702 | mlog_exit(ret); |
670 | return ret; | 703 | return ret; |
671 | } | 704 | } |
@@ -1754,7 +1787,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, | |||
1754 | struct buffer_head *di_bh = NULL; | 1787 | struct buffer_head *di_bh = NULL; |
1755 | struct inode *inode = mapping->host; | 1788 | struct inode *inode = mapping->host; |
1756 | 1789 | ||
1757 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | 1790 | ret = ocfs2_inode_lock(inode, &di_bh, 1); |
1758 | if (ret) { | 1791 | if (ret) { |
1759 | mlog_errno(ret); | 1792 | mlog_errno(ret); |
1760 | return ret; | 1793 | return ret; |
@@ -1769,30 +1802,22 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, | |||
1769 | */ | 1802 | */ |
1770 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 1803 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
1771 | 1804 | ||
1772 | ret = ocfs2_data_lock(inode, 1); | ||
1773 | if (ret) { | ||
1774 | mlog_errno(ret); | ||
1775 | goto out_fail; | ||
1776 | } | ||
1777 | |||
1778 | ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, | 1805 | ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, |
1779 | fsdata, di_bh, NULL); | 1806 | fsdata, di_bh, NULL); |
1780 | if (ret) { | 1807 | if (ret) { |
1781 | mlog_errno(ret); | 1808 | mlog_errno(ret); |
1782 | goto out_fail_data; | 1809 | goto out_fail; |
1783 | } | 1810 | } |
1784 | 1811 | ||
1785 | brelse(di_bh); | 1812 | brelse(di_bh); |
1786 | 1813 | ||
1787 | return 0; | 1814 | return 0; |
1788 | 1815 | ||
1789 | out_fail_data: | ||
1790 | ocfs2_data_unlock(inode, 1); | ||
1791 | out_fail: | 1816 | out_fail: |
1792 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 1817 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
1793 | 1818 | ||
1794 | brelse(di_bh); | 1819 | brelse(di_bh); |
1795 | ocfs2_meta_unlock(inode, 1); | 1820 | ocfs2_inode_unlock(inode, 1); |
1796 | 1821 | ||
1797 | return ret; | 1822 | return ret; |
1798 | } | 1823 | } |
@@ -1908,15 +1933,15 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping, | |||
1908 | 1933 | ||
1909 | ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata); | 1934 | ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata); |
1910 | 1935 | ||
1911 | ocfs2_data_unlock(inode, 1); | ||
1912 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 1936 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
1913 | ocfs2_meta_unlock(inode, 1); | 1937 | ocfs2_inode_unlock(inode, 1); |
1914 | 1938 | ||
1915 | return ret; | 1939 | return ret; |
1916 | } | 1940 | } |
1917 | 1941 | ||
1918 | const struct address_space_operations ocfs2_aops = { | 1942 | const struct address_space_operations ocfs2_aops = { |
1919 | .readpage = ocfs2_readpage, | 1943 | .readpage = ocfs2_readpage, |
1944 | .readpages = ocfs2_readpages, | ||
1920 | .writepage = ocfs2_writepage, | 1945 | .writepage = ocfs2_writepage, |
1921 | .write_begin = ocfs2_write_begin, | 1946 | .write_begin = ocfs2_write_begin, |
1922 | .write_end = ocfs2_write_end, | 1947 | .write_end = ocfs2_write_end, |
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index c9037414f4f6..f136639f5b41 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c | |||
@@ -79,7 +79,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, | |||
79 | * information for this bh as it's not marked locally | 79 | * information for this bh as it's not marked locally |
80 | * uptodate. */ | 80 | * uptodate. */ |
81 | ret = -EIO; | 81 | ret = -EIO; |
82 | brelse(bh); | 82 | put_bh(bh); |
83 | } | 83 | } |
84 | 84 | ||
85 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); | 85 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); |
@@ -256,7 +256,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
256 | * for this bh as it's not marked locally | 256 | * for this bh as it's not marked locally |
257 | * uptodate. */ | 257 | * uptodate. */ |
258 | status = -EIO; | 258 | status = -EIO; |
259 | brelse(bh); | 259 | put_bh(bh); |
260 | bhs[i] = NULL; | 260 | bhs[i] = NULL; |
261 | continue; | 261 | continue; |
262 | } | 262 | } |
@@ -280,3 +280,64 @@ bail: | |||
280 | mlog_exit(status); | 280 | mlog_exit(status); |
281 | return status; | 281 | return status; |
282 | } | 282 | } |
283 | |||
284 | /* Check whether the blkno is the super block or one of the backups. */ | ||
285 | static void ocfs2_check_super_or_backup(struct super_block *sb, | ||
286 | sector_t blkno) | ||
287 | { | ||
288 | int i; | ||
289 | u64 backup_blkno; | ||
290 | |||
291 | if (blkno == OCFS2_SUPER_BLOCK_BLKNO) | ||
292 | return; | ||
293 | |||
294 | for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) { | ||
295 | backup_blkno = ocfs2_backup_super_blkno(sb, i); | ||
296 | if (backup_blkno == blkno) | ||
297 | return; | ||
298 | } | ||
299 | |||
300 | BUG(); | ||
301 | } | ||
302 | |||
303 | /* | ||
304 | * Write super block and backups doesn't need to collaborate with journal, | ||
305 | * so we don't need to lock ip_io_mutex and inode doesn't need to bea passed | ||
306 | * into this function. | ||
307 | */ | ||
308 | int ocfs2_write_super_or_backup(struct ocfs2_super *osb, | ||
309 | struct buffer_head *bh) | ||
310 | { | ||
311 | int ret = 0; | ||
312 | |||
313 | mlog_entry_void(); | ||
314 | |||
315 | BUG_ON(buffer_jbd(bh)); | ||
316 | ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr); | ||
317 | |||
318 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) { | ||
319 | ret = -EROFS; | ||
320 | goto out; | ||
321 | } | ||
322 | |||
323 | lock_buffer(bh); | ||
324 | set_buffer_uptodate(bh); | ||
325 | |||
326 | /* remove from dirty list before I/O. */ | ||
327 | clear_buffer_dirty(bh); | ||
328 | |||
329 | get_bh(bh); /* for end_buffer_write_sync() */ | ||
330 | bh->b_end_io = end_buffer_write_sync; | ||
331 | submit_bh(WRITE, bh); | ||
332 | |||
333 | wait_on_buffer(bh); | ||
334 | |||
335 | if (!buffer_uptodate(bh)) { | ||
336 | ret = -EIO; | ||
337 | put_bh(bh); | ||
338 | } | ||
339 | |||
340 | out: | ||
341 | mlog_exit(ret); | ||
342 | return ret; | ||
343 | } | ||
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h index 6cc20930fac3..c2e78614c3e5 100644 --- a/fs/ocfs2/buffer_head_io.h +++ b/fs/ocfs2/buffer_head_io.h | |||
@@ -47,6 +47,8 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, | |||
47 | int flags, | 47 | int flags, |
48 | struct inode *inode); | 48 | struct inode *inode); |
49 | 49 | ||
50 | int ocfs2_write_super_or_backup(struct ocfs2_super *osb, | ||
51 | struct buffer_head *bh); | ||
50 | 52 | ||
51 | #define OCFS2_BH_CACHED 1 | 53 | #define OCFS2_BH_CACHED 1 |
52 | #define OCFS2_BH_READAHEAD 8 | 54 | #define OCFS2_BH_READAHEAD 8 |
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index 35397dd5ecdb..e511339886b3 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h | |||
@@ -35,7 +35,7 @@ | |||
35 | #define O2HB_LIVE_THRESHOLD 2 | 35 | #define O2HB_LIVE_THRESHOLD 2 |
36 | /* number of equal samples to be seen as dead */ | 36 | /* number of equal samples to be seen as dead */ |
37 | extern unsigned int o2hb_dead_threshold; | 37 | extern unsigned int o2hb_dead_threshold; |
38 | #define O2HB_DEFAULT_DEAD_THRESHOLD 7 | 38 | #define O2HB_DEFAULT_DEAD_THRESHOLD 31 |
39 | /* Otherwise MAX_WRITE_TIMEOUT will be zero... */ | 39 | /* Otherwise MAX_WRITE_TIMEOUT will be zero... */ |
40 | #define O2HB_MIN_DEAD_THRESHOLD 2 | 40 | #define O2HB_MIN_DEAD_THRESHOLD 2 |
41 | #define O2HB_MAX_WRITE_TIMEOUT_MS (O2HB_REGION_TIMEOUT_MS * (o2hb_dead_threshold - 1)) | 41 | #define O2HB_MAX_WRITE_TIMEOUT_MS (O2HB_REGION_TIMEOUT_MS * (o2hb_dead_threshold - 1)) |
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c index a4882c8df945..23c732f27529 100644 --- a/fs/ocfs2/cluster/masklog.c +++ b/fs/ocfs2/cluster/masklog.c | |||
@@ -146,7 +146,7 @@ static struct kset mlog_kset = { | |||
146 | .kobj = {.ktype = &mlog_ktype}, | 146 | .kobj = {.ktype = &mlog_ktype}, |
147 | }; | 147 | }; |
148 | 148 | ||
149 | int mlog_sys_init(struct kset *o2cb_subsys) | 149 | int mlog_sys_init(struct kset *o2cb_kset) |
150 | { | 150 | { |
151 | int i = 0; | 151 | int i = 0; |
152 | 152 | ||
@@ -157,7 +157,7 @@ int mlog_sys_init(struct kset *o2cb_subsys) | |||
157 | mlog_attr_ptrs[i] = NULL; | 157 | mlog_attr_ptrs[i] = NULL; |
158 | 158 | ||
159 | kobject_set_name(&mlog_kset.kobj, "logmask"); | 159 | kobject_set_name(&mlog_kset.kobj, "logmask"); |
160 | kobj_set_kset_s(&mlog_kset, *o2cb_subsys); | 160 | mlog_kset.kobj.kset = o2cb_kset; |
161 | return kset_register(&mlog_kset); | 161 | return kset_register(&mlog_kset); |
162 | } | 162 | } |
163 | 163 | ||
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c index 64f6f378fd09..0c095ce7723d 100644 --- a/fs/ocfs2/cluster/sys.c +++ b/fs/ocfs2/cluster/sys.c | |||
@@ -28,96 +28,55 @@ | |||
28 | #include <linux/module.h> | 28 | #include <linux/module.h> |
29 | #include <linux/kobject.h> | 29 | #include <linux/kobject.h> |
30 | #include <linux/sysfs.h> | 30 | #include <linux/sysfs.h> |
31 | #include <linux/fs.h> | ||
31 | 32 | ||
32 | #include "ocfs2_nodemanager.h" | 33 | #include "ocfs2_nodemanager.h" |
33 | #include "masklog.h" | 34 | #include "masklog.h" |
34 | #include "sys.h" | 35 | #include "sys.h" |
35 | 36 | ||
36 | struct o2cb_attribute { | ||
37 | struct attribute attr; | ||
38 | ssize_t (*show)(char *buf); | ||
39 | ssize_t (*store)(const char *buf, size_t count); | ||
40 | }; | ||
41 | |||
42 | #define O2CB_ATTR(_name, _mode, _show, _store) \ | ||
43 | struct o2cb_attribute o2cb_attr_##_name = __ATTR(_name, _mode, _show, _store) | ||
44 | |||
45 | #define to_o2cb_attr(_attr) container_of(_attr, struct o2cb_attribute, attr) | ||
46 | 37 | ||
47 | static ssize_t o2cb_interface_revision_show(char *buf) | 38 | static ssize_t version_show(struct kobject *kobj, struct kobj_attribute *attr, |
39 | char *buf) | ||
48 | { | 40 | { |
49 | return snprintf(buf, PAGE_SIZE, "%u\n", O2NM_API_VERSION); | 41 | return snprintf(buf, PAGE_SIZE, "%u\n", O2NM_API_VERSION); |
50 | } | 42 | } |
51 | 43 | static struct kobj_attribute attr_version = | |
52 | static O2CB_ATTR(interface_revision, S_IFREG | S_IRUGO, o2cb_interface_revision_show, NULL); | 44 | __ATTR(interface_revision, S_IFREG | S_IRUGO, version_show, NULL); |
53 | 45 | ||
54 | static struct attribute *o2cb_attrs[] = { | 46 | static struct attribute *o2cb_attrs[] = { |
55 | &o2cb_attr_interface_revision.attr, | 47 | &attr_version.attr, |
56 | NULL, | 48 | NULL, |
57 | }; | 49 | }; |
58 | 50 | ||
59 | static ssize_t | 51 | static struct attribute_group o2cb_attr_group = { |
60 | o2cb_show(struct kobject * kobj, struct attribute * attr, char * buffer); | 52 | .attrs = o2cb_attrs, |
61 | static ssize_t | ||
62 | o2cb_store(struct kobject * kobj, struct attribute * attr, | ||
63 | const char * buffer, size_t count); | ||
64 | static struct sysfs_ops o2cb_sysfs_ops = { | ||
65 | .show = o2cb_show, | ||
66 | .store = o2cb_store, | ||
67 | }; | 53 | }; |
68 | 54 | ||
69 | static struct kobj_type o2cb_subsys_type = { | 55 | static struct kset *o2cb_kset; |
70 | .default_attrs = o2cb_attrs, | ||
71 | .sysfs_ops = &o2cb_sysfs_ops, | ||
72 | }; | ||
73 | |||
74 | /* gives us o2cb_subsys */ | ||
75 | static decl_subsys(o2cb, NULL, NULL); | ||
76 | |||
77 | static ssize_t | ||
78 | o2cb_show(struct kobject * kobj, struct attribute * attr, char * buffer) | ||
79 | { | ||
80 | struct o2cb_attribute *o2cb_attr = to_o2cb_attr(attr); | ||
81 | struct kset *sbs = to_kset(kobj); | ||
82 | |||
83 | BUG_ON(sbs != &o2cb_subsys); | ||
84 | |||
85 | if (o2cb_attr->show) | ||
86 | return o2cb_attr->show(buffer); | ||
87 | return -EIO; | ||
88 | } | ||
89 | |||
90 | static ssize_t | ||
91 | o2cb_store(struct kobject * kobj, struct attribute * attr, | ||
92 | const char * buffer, size_t count) | ||
93 | { | ||
94 | struct o2cb_attribute *o2cb_attr = to_o2cb_attr(attr); | ||
95 | struct kset *sbs = to_kset(kobj); | ||
96 | |||
97 | BUG_ON(sbs != &o2cb_subsys); | ||
98 | |||
99 | if (o2cb_attr->store) | ||
100 | return o2cb_attr->store(buffer, count); | ||
101 | return -EIO; | ||
102 | } | ||
103 | 56 | ||
104 | void o2cb_sys_shutdown(void) | 57 | void o2cb_sys_shutdown(void) |
105 | { | 58 | { |
106 | mlog_sys_shutdown(); | 59 | mlog_sys_shutdown(); |
107 | subsystem_unregister(&o2cb_subsys); | 60 | kset_unregister(o2cb_kset); |
108 | } | 61 | } |
109 | 62 | ||
110 | int o2cb_sys_init(void) | 63 | int o2cb_sys_init(void) |
111 | { | 64 | { |
112 | int ret; | 65 | int ret; |
113 | 66 | ||
114 | o2cb_subsys.kobj.ktype = &o2cb_subsys_type; | 67 | o2cb_kset = kset_create_and_add("o2cb", NULL, NULL); |
115 | ret = subsystem_register(&o2cb_subsys); | 68 | if (!o2cb_kset) |
69 | return -ENOMEM; | ||
70 | |||
71 | ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group); | ||
116 | if (ret) | 72 | if (ret) |
117 | return ret; | 73 | goto error; |
118 | 74 | ||
119 | ret = mlog_sys_init(&o2cb_subsys); | 75 | ret = mlog_sys_init(o2cb_kset); |
120 | if (ret) | 76 | if (ret) |
121 | subsystem_unregister(&o2cb_subsys); | 77 | goto error; |
78 | return 0; | ||
79 | error: | ||
80 | kset_unregister(o2cb_kset); | ||
122 | return ret; | 81 | return ret; |
123 | } | 82 | } |
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index da880fc215f0..f36f66aab3dd 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h | |||
@@ -60,8 +60,8 @@ typedef void (o2net_post_msg_handler_func)(int status, void *data, | |||
60 | /* same as hb delay, we're waiting for another node to recognize our hb */ | 60 | /* same as hb delay, we're waiting for another node to recognize our hb */ |
61 | #define O2NET_RECONNECT_DELAY_MS_DEFAULT 2000 | 61 | #define O2NET_RECONNECT_DELAY_MS_DEFAULT 2000 |
62 | 62 | ||
63 | #define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 5000 | 63 | #define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 2000 |
64 | #define O2NET_IDLE_TIMEOUT_MS_DEFAULT 10000 | 64 | #define O2NET_IDLE_TIMEOUT_MS_DEFAULT 30000 |
65 | 65 | ||
66 | 66 | ||
67 | /* TODO: figure this out.... */ | 67 | /* TODO: figure this out.... */ |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 9606111fe89d..b2e832aca567 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
@@ -38,6 +38,12 @@ | |||
38 | * locking semantics of the file system using the protocol. It should | 38 | * locking semantics of the file system using the protocol. It should |
39 | * be somewhere else, I'm sure, but right now it isn't. | 39 | * be somewhere else, I'm sure, but right now it isn't. |
40 | * | 40 | * |
41 | * New in version 10: | ||
42 | * - Meta/data locks combined | ||
43 | * | ||
44 | * New in version 9: | ||
45 | * - All votes removed | ||
46 | * | ||
41 | * New in version 8: | 47 | * New in version 8: |
42 | * - Replace delete inode votes with a cluster lock | 48 | * - Replace delete inode votes with a cluster lock |
43 | * | 49 | * |
@@ -60,7 +66,7 @@ | |||
60 | * - full 64 bit i_size in the metadata lock lvbs | 66 | * - full 64 bit i_size in the metadata lock lvbs |
61 | * - introduction of "rw" lock and pushing meta/data locking down | 67 | * - introduction of "rw" lock and pushing meta/data locking down |
62 | */ | 68 | */ |
63 | #define O2NET_PROTOCOL_VERSION 8ULL | 69 | #define O2NET_PROTOCOL_VERSION 10ULL |
64 | struct o2net_handshake { | 70 | struct o2net_handshake { |
65 | __be64 protocol_version; | 71 | __be64 protocol_version; |
66 | __be64 connector_id; | 72 | __be64 connector_id; |
diff --git a/fs/ocfs2/cluster/ver.c b/fs/ocfs2/cluster/ver.c index 7286c48bb30d..a56eee6abad3 100644 --- a/fs/ocfs2/cluster/ver.c +++ b/fs/ocfs2/cluster/ver.c | |||
@@ -28,7 +28,7 @@ | |||
28 | 28 | ||
29 | #include "ver.h" | 29 | #include "ver.h" |
30 | 30 | ||
31 | #define CLUSTER_BUILD_VERSION "1.3.3" | 31 | #define CLUSTER_BUILD_VERSION "1.5.0" |
32 | 32 | ||
33 | #define VERSION_STR "OCFS2 Node Manager " CLUSTER_BUILD_VERSION | 33 | #define VERSION_STR "OCFS2 Node Manager " CLUSTER_BUILD_VERSION |
34 | 34 | ||
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 9923278ea6d4..b1cc7c381e88 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c | |||
@@ -128,9 +128,9 @@ static int ocfs2_match_dentry(struct dentry *dentry, | |||
128 | /* | 128 | /* |
129 | * Walk the inode alias list, and find a dentry which has a given | 129 | * Walk the inode alias list, and find a dentry which has a given |
130 | * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it | 130 | * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it |
131 | * is looking for a dentry_lock reference. The vote thread is looking | 131 | * is looking for a dentry_lock reference. The downconvert thread is |
132 | * to unhash aliases, so we allow it to skip any that already have | 132 | * looking to unhash aliases, so we allow it to skip any that already |
133 | * that property. | 133 | * have that property. |
134 | */ | 134 | */ |
135 | struct dentry *ocfs2_find_local_alias(struct inode *inode, | 135 | struct dentry *ocfs2_find_local_alias(struct inode *inode, |
136 | u64 parent_blkno, | 136 | u64 parent_blkno, |
@@ -266,7 +266,7 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, | |||
266 | dl->dl_count = 0; | 266 | dl->dl_count = 0; |
267 | /* | 267 | /* |
268 | * Does this have to happen below, for all attaches, in case | 268 | * Does this have to happen below, for all attaches, in case |
269 | * the struct inode gets blown away by votes? | 269 | * the struct inode gets blown away by the downconvert thread? |
270 | */ | 270 | */ |
271 | dl->dl_inode = igrab(inode); | 271 | dl->dl_inode = igrab(inode); |
272 | dl->dl_parent_blkno = parent_blkno; | 272 | dl->dl_parent_blkno = parent_blkno; |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 63b28fdceb4a..6b0107f21344 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -846,14 +846,14 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
846 | mlog_entry("dirino=%llu\n", | 846 | mlog_entry("dirino=%llu\n", |
847 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 847 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
848 | 848 | ||
849 | error = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); | 849 | error = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); |
850 | if (lock_level && error >= 0) { | 850 | if (lock_level && error >= 0) { |
851 | /* We release EX lock which used to update atime | 851 | /* We release EX lock which used to update atime |
852 | * and get PR lock again to reduce contention | 852 | * and get PR lock again to reduce contention |
853 | * on commonly accessed directories. */ | 853 | * on commonly accessed directories. */ |
854 | ocfs2_meta_unlock(inode, 1); | 854 | ocfs2_inode_unlock(inode, 1); |
855 | lock_level = 0; | 855 | lock_level = 0; |
856 | error = ocfs2_meta_lock(inode, NULL, 0); | 856 | error = ocfs2_inode_lock(inode, NULL, 0); |
857 | } | 857 | } |
858 | if (error < 0) { | 858 | if (error < 0) { |
859 | if (error != -ENOENT) | 859 | if (error != -ENOENT) |
@@ -865,7 +865,7 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
865 | error = ocfs2_dir_foreach_blk(inode, &filp->f_version, &filp->f_pos, | 865 | error = ocfs2_dir_foreach_blk(inode, &filp->f_version, &filp->f_pos, |
866 | dirent, filldir, NULL); | 866 | dirent, filldir, NULL); |
867 | 867 | ||
868 | ocfs2_meta_unlock(inode, lock_level); | 868 | ocfs2_inode_unlock(inode, lock_level); |
869 | 869 | ||
870 | bail_nolock: | 870 | bail_nolock: |
871 | mlog_exit(error); | 871 | mlog_exit(error); |
diff --git a/fs/ocfs2/dlm/dlmfsver.c b/fs/ocfs2/dlm/dlmfsver.c index d2be3ad841f9..a733b3321f83 100644 --- a/fs/ocfs2/dlm/dlmfsver.c +++ b/fs/ocfs2/dlm/dlmfsver.c | |||
@@ -28,7 +28,7 @@ | |||
28 | 28 | ||
29 | #include "dlmfsver.h" | 29 | #include "dlmfsver.h" |
30 | 30 | ||
31 | #define DLM_BUILD_VERSION "1.3.3" | 31 | #define DLM_BUILD_VERSION "1.5.0" |
32 | 32 | ||
33 | #define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION | 33 | #define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION |
34 | 34 | ||
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 2fde7bf91434..91f747b8a538 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -2270,6 +2270,12 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx) | |||
2270 | } | 2270 | } |
2271 | } | 2271 | } |
2272 | 2272 | ||
2273 | /* Clean up join state on node death. */ | ||
2274 | if (dlm->joining_node == idx) { | ||
2275 | mlog(0, "Clearing join state for node %u\n", idx); | ||
2276 | __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); | ||
2277 | } | ||
2278 | |||
2273 | /* check to see if the node is already considered dead */ | 2279 | /* check to see if the node is already considered dead */ |
2274 | if (!test_bit(idx, dlm->live_nodes_map)) { | 2280 | if (!test_bit(idx, dlm->live_nodes_map)) { |
2275 | mlog(0, "for domain %s, node %d is already dead. " | 2281 | mlog(0, "for domain %s, node %d is already dead. " |
@@ -2288,12 +2294,6 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx) | |||
2288 | 2294 | ||
2289 | clear_bit(idx, dlm->live_nodes_map); | 2295 | clear_bit(idx, dlm->live_nodes_map); |
2290 | 2296 | ||
2291 | /* Clean up join state on node death. */ | ||
2292 | if (dlm->joining_node == idx) { | ||
2293 | mlog(0, "Clearing join state for node %u\n", idx); | ||
2294 | __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); | ||
2295 | } | ||
2296 | |||
2297 | /* make sure local cleanup occurs before the heartbeat events */ | 2297 | /* make sure local cleanup occurs before the heartbeat events */ |
2298 | if (!test_bit(idx, dlm->recovery_map)) | 2298 | if (!test_bit(idx, dlm->recovery_map)) |
2299 | dlm_do_local_recovery_cleanup(dlm, idx); | 2299 | dlm_do_local_recovery_cleanup(dlm, idx); |
@@ -2321,6 +2321,13 @@ void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data) | |||
2321 | if (!dlm_grab(dlm)) | 2321 | if (!dlm_grab(dlm)) |
2322 | return; | 2322 | return; |
2323 | 2323 | ||
2324 | /* | ||
2325 | * This will notify any dlm users that a node in our domain | ||
2326 | * went away without notifying us first. | ||
2327 | */ | ||
2328 | if (test_bit(idx, dlm->domain_map)) | ||
2329 | dlm_fire_domain_eviction_callbacks(dlm, idx); | ||
2330 | |||
2324 | spin_lock(&dlm->spinlock); | 2331 | spin_lock(&dlm->spinlock); |
2325 | __dlm_hb_node_down(dlm, idx); | 2332 | __dlm_hb_node_down(dlm, idx); |
2326 | spin_unlock(&dlm->spinlock); | 2333 | spin_unlock(&dlm->spinlock); |
diff --git a/fs/ocfs2/dlm/dlmver.c b/fs/ocfs2/dlm/dlmver.c index 7ef2653f8f41..dfc0da4d158d 100644 --- a/fs/ocfs2/dlm/dlmver.c +++ b/fs/ocfs2/dlm/dlmver.c | |||
@@ -28,7 +28,7 @@ | |||
28 | 28 | ||
29 | #include "dlmver.h" | 29 | #include "dlmver.h" |
30 | 30 | ||
31 | #define DLM_BUILD_VERSION "1.3.3" | 31 | #define DLM_BUILD_VERSION "1.5.0" |
32 | 32 | ||
33 | #define VERSION_STR "OCFS2 DLM " DLM_BUILD_VERSION | 33 | #define VERSION_STR "OCFS2 DLM " DLM_BUILD_VERSION |
34 | 34 | ||
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 4e97dcceaf8f..3867244fb144 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -55,7 +55,6 @@ | |||
55 | #include "slot_map.h" | 55 | #include "slot_map.h" |
56 | #include "super.h" | 56 | #include "super.h" |
57 | #include "uptodate.h" | 57 | #include "uptodate.h" |
58 | #include "vote.h" | ||
59 | 58 | ||
60 | #include "buffer_head_io.h" | 59 | #include "buffer_head_io.h" |
61 | 60 | ||
@@ -69,6 +68,7 @@ struct ocfs2_mask_waiter { | |||
69 | 68 | ||
70 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); | 69 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); |
71 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); | 70 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); |
71 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); | ||
72 | 72 | ||
73 | /* | 73 | /* |
74 | * Return value from ->downconvert_worker functions. | 74 | * Return value from ->downconvert_worker functions. |
@@ -153,10 +153,10 @@ struct ocfs2_lock_res_ops { | |||
153 | struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); | 153 | struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); |
154 | 154 | ||
155 | /* | 155 | /* |
156 | * Optionally called in the downconvert (or "vote") thread | 156 | * Optionally called in the downconvert thread after a |
157 | * after a successful downconvert. The lockres will not be | 157 | * successful downconvert. The lockres will not be referenced |
158 | * referenced after this callback is called, so it is safe to | 158 | * after this callback is called, so it is safe to free |
159 | * free memory, etc. | 159 | * memory, etc. |
160 | * | 160 | * |
161 | * The exact semantics of when this is called are controlled | 161 | * The exact semantics of when this is called are controlled |
162 | * by ->downconvert_worker() | 162 | * by ->downconvert_worker() |
@@ -225,17 +225,12 @@ static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { | |||
225 | .flags = 0, | 225 | .flags = 0, |
226 | }; | 226 | }; |
227 | 227 | ||
228 | static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = { | 228 | static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { |
229 | .get_osb = ocfs2_get_inode_osb, | 229 | .get_osb = ocfs2_get_inode_osb, |
230 | .check_downconvert = ocfs2_check_meta_downconvert, | 230 | .check_downconvert = ocfs2_check_meta_downconvert, |
231 | .set_lvb = ocfs2_set_meta_lvb, | 231 | .set_lvb = ocfs2_set_meta_lvb, |
232 | .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, | ||
233 | }; | ||
234 | |||
235 | static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = { | ||
236 | .get_osb = ocfs2_get_inode_osb, | ||
237 | .downconvert_worker = ocfs2_data_convert_worker, | 232 | .downconvert_worker = ocfs2_data_convert_worker, |
238 | .flags = 0, | 233 | .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, |
239 | }; | 234 | }; |
240 | 235 | ||
241 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { | 236 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { |
@@ -258,10 +253,14 @@ static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { | |||
258 | .flags = 0, | 253 | .flags = 0, |
259 | }; | 254 | }; |
260 | 255 | ||
256 | static struct ocfs2_lock_res_ops ocfs2_flock_lops = { | ||
257 | .get_osb = ocfs2_get_file_osb, | ||
258 | .flags = 0, | ||
259 | }; | ||
260 | |||
261 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | 261 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) |
262 | { | 262 | { |
263 | return lockres->l_type == OCFS2_LOCK_TYPE_META || | 263 | return lockres->l_type == OCFS2_LOCK_TYPE_META || |
264 | lockres->l_type == OCFS2_LOCK_TYPE_DATA || | ||
265 | lockres->l_type == OCFS2_LOCK_TYPE_RW || | 264 | lockres->l_type == OCFS2_LOCK_TYPE_RW || |
266 | lockres->l_type == OCFS2_LOCK_TYPE_OPEN; | 265 | lockres->l_type == OCFS2_LOCK_TYPE_OPEN; |
267 | } | 266 | } |
@@ -310,12 +309,24 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | |||
310 | "resource %s: %s\n", dlm_errname(_stat), _func, \ | 309 | "resource %s: %s\n", dlm_errname(_stat), _func, \ |
311 | _lockres->l_name, dlm_errmsg(_stat)); \ | 310 | _lockres->l_name, dlm_errmsg(_stat)); \ |
312 | } while (0) | 311 | } while (0) |
313 | static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | 312 | static int ocfs2_downconvert_thread(void *arg); |
314 | struct ocfs2_lock_res *lockres); | 313 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, |
315 | static int ocfs2_meta_lock_update(struct inode *inode, | 314 | struct ocfs2_lock_res *lockres); |
315 | static int ocfs2_inode_lock_update(struct inode *inode, | ||
316 | struct buffer_head **bh); | 316 | struct buffer_head **bh); |
317 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); | 317 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); |
318 | static inline int ocfs2_highest_compat_lock_level(int level); | 318 | static inline int ocfs2_highest_compat_lock_level(int level); |
319 | static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | ||
320 | int new_level); | ||
321 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | ||
322 | struct ocfs2_lock_res *lockres, | ||
323 | int new_level, | ||
324 | int lvb); | ||
325 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | ||
326 | struct ocfs2_lock_res *lockres); | ||
327 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, | ||
328 | struct ocfs2_lock_res *lockres); | ||
329 | |||
319 | 330 | ||
320 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, | 331 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, |
321 | u64 blkno, | 332 | u64 blkno, |
@@ -402,10 +413,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | |||
402 | ops = &ocfs2_inode_rw_lops; | 413 | ops = &ocfs2_inode_rw_lops; |
403 | break; | 414 | break; |
404 | case OCFS2_LOCK_TYPE_META: | 415 | case OCFS2_LOCK_TYPE_META: |
405 | ops = &ocfs2_inode_meta_lops; | 416 | ops = &ocfs2_inode_inode_lops; |
406 | break; | ||
407 | case OCFS2_LOCK_TYPE_DATA: | ||
408 | ops = &ocfs2_inode_data_lops; | ||
409 | break; | 417 | break; |
410 | case OCFS2_LOCK_TYPE_OPEN: | 418 | case OCFS2_LOCK_TYPE_OPEN: |
411 | ops = &ocfs2_inode_open_lops; | 419 | ops = &ocfs2_inode_open_lops; |
@@ -428,6 +436,13 @@ static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) | |||
428 | return OCFS2_SB(inode->i_sb); | 436 | return OCFS2_SB(inode->i_sb); |
429 | } | 437 | } |
430 | 438 | ||
439 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) | ||
440 | { | ||
441 | struct ocfs2_file_private *fp = lockres->l_priv; | ||
442 | |||
443 | return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); | ||
444 | } | ||
445 | |||
431 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) | 446 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) |
432 | { | 447 | { |
433 | __be64 inode_blkno_be; | 448 | __be64 inode_blkno_be; |
@@ -508,6 +523,21 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, | |||
508 | &ocfs2_rename_lops, osb); | 523 | &ocfs2_rename_lops, osb); |
509 | } | 524 | } |
510 | 525 | ||
526 | void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, | ||
527 | struct ocfs2_file_private *fp) | ||
528 | { | ||
529 | struct inode *inode = fp->fp_file->f_mapping->host; | ||
530 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
531 | |||
532 | ocfs2_lock_res_init_once(lockres); | ||
533 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, | ||
534 | inode->i_generation, lockres->l_name); | ||
535 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, | ||
536 | OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, | ||
537 | fp); | ||
538 | lockres->l_flags |= OCFS2_LOCK_NOCACHE; | ||
539 | } | ||
540 | |||
511 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) | 541 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) |
512 | { | 542 | { |
513 | mlog_entry_void(); | 543 | mlog_entry_void(); |
@@ -724,6 +754,13 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
724 | lockres->l_name, level, lockres->l_level, | 754 | lockres->l_name, level, lockres->l_level, |
725 | ocfs2_lock_type_string(lockres->l_type)); | 755 | ocfs2_lock_type_string(lockres->l_type)); |
726 | 756 | ||
757 | /* | ||
758 | * We can skip the bast for locks which don't enable caching - | ||
759 | * they'll be dropped at the earliest possible time anyway. | ||
760 | */ | ||
761 | if (lockres->l_flags & OCFS2_LOCK_NOCACHE) | ||
762 | return; | ||
763 | |||
727 | spin_lock_irqsave(&lockres->l_lock, flags); | 764 | spin_lock_irqsave(&lockres->l_lock, flags); |
728 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); | 765 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); |
729 | if (needs_downconvert) | 766 | if (needs_downconvert) |
@@ -732,7 +769,7 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
732 | 769 | ||
733 | wake_up(&lockres->l_event); | 770 | wake_up(&lockres->l_event); |
734 | 771 | ||
735 | ocfs2_kick_vote_thread(osb); | 772 | ocfs2_wake_downconvert_thread(osb); |
736 | } | 773 | } |
737 | 774 | ||
738 | static void ocfs2_locking_ast(void *opaque) | 775 | static void ocfs2_locking_ast(void *opaque) |
@@ -935,6 +972,21 @@ static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, | |||
935 | 972 | ||
936 | } | 973 | } |
937 | 974 | ||
975 | static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, | ||
976 | struct ocfs2_lock_res *lockres) | ||
977 | { | ||
978 | int ret; | ||
979 | |||
980 | ret = wait_for_completion_interruptible(&mw->mw_complete); | ||
981 | if (ret) | ||
982 | lockres_remove_mask_waiter(lockres, mw); | ||
983 | else | ||
984 | ret = mw->mw_status; | ||
985 | /* Re-arm the completion in case we want to wait on it again */ | ||
986 | INIT_COMPLETION(mw->mw_complete); | ||
987 | return ret; | ||
988 | } | ||
989 | |||
938 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, | 990 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, |
939 | struct ocfs2_lock_res *lockres, | 991 | struct ocfs2_lock_res *lockres, |
940 | int level, | 992 | int level, |
@@ -1089,7 +1141,7 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb, | |||
1089 | mlog_entry_void(); | 1141 | mlog_entry_void(); |
1090 | spin_lock_irqsave(&lockres->l_lock, flags); | 1142 | spin_lock_irqsave(&lockres->l_lock, flags); |
1091 | ocfs2_dec_holders(lockres, level); | 1143 | ocfs2_dec_holders(lockres, level); |
1092 | ocfs2_vote_on_unlock(osb, lockres); | 1144 | ocfs2_downconvert_on_unlock(osb, lockres); |
1093 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1145 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1094 | mlog_exit_void(); | 1146 | mlog_exit_void(); |
1095 | } | 1147 | } |
@@ -1147,13 +1199,7 @@ int ocfs2_create_new_inode_locks(struct inode *inode) | |||
1147 | * We don't want to use LKM_LOCAL on a meta data lock as they | 1199 | * We don't want to use LKM_LOCAL on a meta data lock as they |
1148 | * don't use a generation in their lock names. | 1200 | * don't use a generation in their lock names. |
1149 | */ | 1201 | */ |
1150 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0); | 1202 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); |
1151 | if (ret) { | ||
1152 | mlog_errno(ret); | ||
1153 | goto bail; | ||
1154 | } | ||
1155 | |||
1156 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1); | ||
1157 | if (ret) { | 1203 | if (ret) { |
1158 | mlog_errno(ret); | 1204 | mlog_errno(ret); |
1159 | goto bail; | 1205 | goto bail; |
@@ -1311,76 +1357,221 @@ out: | |||
1311 | mlog_exit_void(); | 1357 | mlog_exit_void(); |
1312 | } | 1358 | } |
1313 | 1359 | ||
1314 | int ocfs2_data_lock_full(struct inode *inode, | 1360 | static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, |
1315 | int write, | 1361 | int level) |
1316 | int arg_flags) | ||
1317 | { | 1362 | { |
1318 | int status = 0, level; | 1363 | int ret; |
1319 | struct ocfs2_lock_res *lockres; | 1364 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); |
1320 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1365 | unsigned long flags; |
1366 | struct ocfs2_mask_waiter mw; | ||
1321 | 1367 | ||
1322 | BUG_ON(!inode); | 1368 | ocfs2_init_mask_waiter(&mw); |
1323 | 1369 | ||
1324 | mlog_entry_void(); | 1370 | retry_cancel: |
1371 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1372 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | ||
1373 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | ||
1374 | if (ret) { | ||
1375 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1376 | ret = ocfs2_cancel_convert(osb, lockres); | ||
1377 | if (ret < 0) { | ||
1378 | mlog_errno(ret); | ||
1379 | goto out; | ||
1380 | } | ||
1381 | goto retry_cancel; | ||
1382 | } | ||
1383 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
1384 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1325 | 1385 | ||
1326 | mlog(0, "inode %llu take %s DATA lock\n", | 1386 | ocfs2_wait_for_mask(&mw); |
1327 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 1387 | goto retry_cancel; |
1328 | write ? "EXMODE" : "PRMODE"); | 1388 | } |
1329 | 1389 | ||
1330 | /* We'll allow faking a readonly data lock for | 1390 | ret = -ERESTARTSYS; |
1331 | * rodevices. */ | 1391 | /* |
1332 | if (ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb))) { | 1392 | * We may still have gotten the lock, in which case there's no |
1333 | if (write) { | 1393 | * point to restarting the syscall. |
1334 | status = -EROFS; | 1394 | */ |
1335 | mlog_errno(status); | 1395 | if (lockres->l_level == level) |
1396 | ret = 0; | ||
1397 | |||
1398 | mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, | ||
1399 | lockres->l_flags, lockres->l_level, lockres->l_action); | ||
1400 | |||
1401 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1402 | |||
1403 | out: | ||
1404 | return ret; | ||
1405 | } | ||
1406 | |||
1407 | /* | ||
1408 | * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of | ||
1409 | * flock() calls. The locking approach this requires is sufficiently | ||
1410 | * different from all other cluster lock types that we implement a | ||
1411 | * seperate path to the "low-level" dlm calls. In particular: | ||
1412 | * | ||
1413 | * - No optimization of lock levels is done - we take at exactly | ||
1414 | * what's been requested. | ||
1415 | * | ||
1416 | * - No lock caching is employed. We immediately downconvert to | ||
1417 | * no-lock at unlock time. This also means flock locks never go on | ||
1418 | * the blocking list). | ||
1419 | * | ||
1420 | * - Since userspace can trivially deadlock itself with flock, we make | ||
1421 | * sure to allow cancellation of a misbehaving applications flock() | ||
1422 | * request. | ||
1423 | * | ||
1424 | * - Access to any flock lockres doesn't require concurrency, so we | ||
1425 | * can simplify the code by requiring the caller to guarantee | ||
1426 | * serialization of dlmglue flock calls. | ||
1427 | */ | ||
1428 | int ocfs2_file_lock(struct file *file, int ex, int trylock) | ||
1429 | { | ||
1430 | int ret, level = ex ? LKM_EXMODE : LKM_PRMODE; | ||
1431 | unsigned int lkm_flags = trylock ? LKM_NOQUEUE : 0; | ||
1432 | unsigned long flags; | ||
1433 | struct ocfs2_file_private *fp = file->private_data; | ||
1434 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | ||
1435 | struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); | ||
1436 | struct ocfs2_mask_waiter mw; | ||
1437 | |||
1438 | ocfs2_init_mask_waiter(&mw); | ||
1439 | |||
1440 | if ((lockres->l_flags & OCFS2_LOCK_BUSY) || | ||
1441 | (lockres->l_level > LKM_NLMODE)) { | ||
1442 | mlog(ML_ERROR, | ||
1443 | "File lock \"%s\" has busy or locked state: flags: 0x%lx, " | ||
1444 | "level: %u\n", lockres->l_name, lockres->l_flags, | ||
1445 | lockres->l_level); | ||
1446 | return -EINVAL; | ||
1447 | } | ||
1448 | |||
1449 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1450 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | ||
1451 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
1452 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1453 | |||
1454 | /* | ||
1455 | * Get the lock at NLMODE to start - that way we | ||
1456 | * can cancel the upconvert request if need be. | ||
1457 | */ | ||
1458 | ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0); | ||
1459 | if (ret < 0) { | ||
1460 | mlog_errno(ret); | ||
1461 | goto out; | ||
1336 | } | 1462 | } |
1337 | goto out; | 1463 | |
1464 | ret = ocfs2_wait_for_mask(&mw); | ||
1465 | if (ret) { | ||
1466 | mlog_errno(ret); | ||
1467 | goto out; | ||
1468 | } | ||
1469 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1338 | } | 1470 | } |
1339 | 1471 | ||
1340 | if (ocfs2_mount_local(osb)) | 1472 | lockres->l_action = OCFS2_AST_CONVERT; |
1341 | goto out; | 1473 | lkm_flags |= LKM_CONVERT; |
1474 | lockres->l_requested = level; | ||
1475 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | ||
1342 | 1476 | ||
1343 | lockres = &OCFS2_I(inode)->ip_data_lockres; | 1477 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); |
1478 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1344 | 1479 | ||
1345 | level = write ? LKM_EXMODE : LKM_PRMODE; | 1480 | ret = dlmlock(osb->dlm, level, &lockres->l_lksb, lkm_flags, |
1481 | lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, | ||
1482 | ocfs2_locking_ast, lockres, ocfs2_blocking_ast); | ||
1483 | if (ret != DLM_NORMAL) { | ||
1484 | if (trylock && ret == DLM_NOTQUEUED) | ||
1485 | ret = -EAGAIN; | ||
1486 | else { | ||
1487 | ocfs2_log_dlm_error("dlmlock", ret, lockres); | ||
1488 | ret = -EINVAL; | ||
1489 | } | ||
1346 | 1490 | ||
1347 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, | 1491 | ocfs2_recover_from_dlm_error(lockres, 1); |
1348 | 0, arg_flags); | 1492 | lockres_remove_mask_waiter(lockres, &mw); |
1349 | if (status < 0 && status != -EAGAIN) | 1493 | goto out; |
1350 | mlog_errno(status); | 1494 | } |
1495 | |||
1496 | ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); | ||
1497 | if (ret == -ERESTARTSYS) { | ||
1498 | /* | ||
1499 | * Userspace can cause deadlock itself with | ||
1500 | * flock(). Current behavior locally is to allow the | ||
1501 | * deadlock, but abort the system call if a signal is | ||
1502 | * received. We follow this example, otherwise a | ||
1503 | * poorly written program could sit in kernel until | ||
1504 | * reboot. | ||
1505 | * | ||
1506 | * Handling this is a bit more complicated for Ocfs2 | ||
1507 | * though. We can't exit this function with an | ||
1508 | * outstanding lock request, so a cancel convert is | ||
1509 | * required. We intentionally overwrite 'ret' - if the | ||
1510 | * cancel fails and the lock was granted, it's easier | ||
1511 | * to just bubble sucess back up to the user. | ||
1512 | */ | ||
1513 | ret = ocfs2_flock_handle_signal(lockres, level); | ||
1514 | } | ||
1351 | 1515 | ||
1352 | out: | 1516 | out: |
1353 | mlog_exit(status); | 1517 | |
1354 | return status; | 1518 | mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", |
1519 | lockres->l_name, ex, trylock, ret); | ||
1520 | return ret; | ||
1355 | } | 1521 | } |
1356 | 1522 | ||
1357 | /* see ocfs2_meta_lock_with_page() */ | 1523 | void ocfs2_file_unlock(struct file *file) |
1358 | int ocfs2_data_lock_with_page(struct inode *inode, | ||
1359 | int write, | ||
1360 | struct page *page) | ||
1361 | { | 1524 | { |
1362 | int ret; | 1525 | int ret; |
1526 | unsigned long flags; | ||
1527 | struct ocfs2_file_private *fp = file->private_data; | ||
1528 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | ||
1529 | struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); | ||
1530 | struct ocfs2_mask_waiter mw; | ||
1363 | 1531 | ||
1364 | ret = ocfs2_data_lock_full(inode, write, OCFS2_LOCK_NONBLOCK); | 1532 | ocfs2_init_mask_waiter(&mw); |
1365 | if (ret == -EAGAIN) { | 1533 | |
1366 | unlock_page(page); | 1534 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) |
1367 | if (ocfs2_data_lock(inode, write) == 0) | 1535 | return; |
1368 | ocfs2_data_unlock(inode, write); | 1536 | |
1369 | ret = AOP_TRUNCATED_PAGE; | 1537 | if (lockres->l_level == LKM_NLMODE) |
1538 | return; | ||
1539 | |||
1540 | mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", | ||
1541 | lockres->l_name, lockres->l_flags, lockres->l_level, | ||
1542 | lockres->l_action); | ||
1543 | |||
1544 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1545 | /* | ||
1546 | * Fake a blocking ast for the downconvert code. | ||
1547 | */ | ||
1548 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | ||
1549 | lockres->l_blocking = LKM_EXMODE; | ||
1550 | |||
1551 | ocfs2_prepare_downconvert(lockres, LKM_NLMODE); | ||
1552 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
1553 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1554 | |||
1555 | ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0); | ||
1556 | if (ret) { | ||
1557 | mlog_errno(ret); | ||
1558 | return; | ||
1370 | } | 1559 | } |
1371 | 1560 | ||
1372 | return ret; | 1561 | ret = ocfs2_wait_for_mask(&mw); |
1562 | if (ret) | ||
1563 | mlog_errno(ret); | ||
1373 | } | 1564 | } |
1374 | 1565 | ||
1375 | static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | 1566 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, |
1376 | struct ocfs2_lock_res *lockres) | 1567 | struct ocfs2_lock_res *lockres) |
1377 | { | 1568 | { |
1378 | int kick = 0; | 1569 | int kick = 0; |
1379 | 1570 | ||
1380 | mlog_entry_void(); | 1571 | mlog_entry_void(); |
1381 | 1572 | ||
1382 | /* If we know that another node is waiting on our lock, kick | 1573 | /* If we know that another node is waiting on our lock, kick |
1383 | * the vote thread * pre-emptively when we reach a release | 1574 | * the downconvert thread * pre-emptively when we reach a release |
1384 | * condition. */ | 1575 | * condition. */ |
1385 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { | 1576 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { |
1386 | switch(lockres->l_blocking) { | 1577 | switch(lockres->l_blocking) { |
@@ -1398,27 +1589,7 @@ static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | |||
1398 | } | 1589 | } |
1399 | 1590 | ||
1400 | if (kick) | 1591 | if (kick) |
1401 | ocfs2_kick_vote_thread(osb); | 1592 | ocfs2_wake_downconvert_thread(osb); |
1402 | |||
1403 | mlog_exit_void(); | ||
1404 | } | ||
1405 | |||
1406 | void ocfs2_data_unlock(struct inode *inode, | ||
1407 | int write) | ||
1408 | { | ||
1409 | int level = write ? LKM_EXMODE : LKM_PRMODE; | ||
1410 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_data_lockres; | ||
1411 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1412 | |||
1413 | mlog_entry_void(); | ||
1414 | |||
1415 | mlog(0, "inode %llu drop %s DATA lock\n", | ||
1416 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
1417 | write ? "EXMODE" : "PRMODE"); | ||
1418 | |||
1419 | if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && | ||
1420 | !ocfs2_mount_local(osb)) | ||
1421 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); | ||
1422 | 1593 | ||
1423 | mlog_exit_void(); | 1594 | mlog_exit_void(); |
1424 | } | 1595 | } |
@@ -1442,11 +1613,11 @@ static u64 ocfs2_pack_timespec(struct timespec *spec) | |||
1442 | 1613 | ||
1443 | /* Call this with the lockres locked. I am reasonably sure we don't | 1614 | /* Call this with the lockres locked. I am reasonably sure we don't |
1444 | * need ip_lock in this function as anyone who would be changing those | 1615 | * need ip_lock in this function as anyone who would be changing those |
1445 | * values is supposed to be blocked in ocfs2_meta_lock right now. */ | 1616 | * values is supposed to be blocked in ocfs2_inode_lock right now. */ |
1446 | static void __ocfs2_stuff_meta_lvb(struct inode *inode) | 1617 | static void __ocfs2_stuff_meta_lvb(struct inode *inode) |
1447 | { | 1618 | { |
1448 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1619 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1449 | struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; | 1620 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; |
1450 | struct ocfs2_meta_lvb *lvb; | 1621 | struct ocfs2_meta_lvb *lvb; |
1451 | 1622 | ||
1452 | mlog_entry_void(); | 1623 | mlog_entry_void(); |
@@ -1496,7 +1667,7 @@ static void ocfs2_unpack_timespec(struct timespec *spec, | |||
1496 | static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | 1667 | static void ocfs2_refresh_inode_from_lvb(struct inode *inode) |
1497 | { | 1668 | { |
1498 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1669 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1499 | struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; | 1670 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; |
1500 | struct ocfs2_meta_lvb *lvb; | 1671 | struct ocfs2_meta_lvb *lvb; |
1501 | 1672 | ||
1502 | mlog_entry_void(); | 1673 | mlog_entry_void(); |
@@ -1604,12 +1775,12 @@ static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockre | |||
1604 | } | 1775 | } |
1605 | 1776 | ||
1606 | /* may or may not return a bh if it went to disk. */ | 1777 | /* may or may not return a bh if it went to disk. */ |
1607 | static int ocfs2_meta_lock_update(struct inode *inode, | 1778 | static int ocfs2_inode_lock_update(struct inode *inode, |
1608 | struct buffer_head **bh) | 1779 | struct buffer_head **bh) |
1609 | { | 1780 | { |
1610 | int status = 0; | 1781 | int status = 0; |
1611 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1782 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1612 | struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; | 1783 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; |
1613 | struct ocfs2_dinode *fe; | 1784 | struct ocfs2_dinode *fe; |
1614 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1785 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1615 | 1786 | ||
@@ -1721,7 +1892,7 @@ static int ocfs2_assign_bh(struct inode *inode, | |||
1721 | * returns < 0 error if the callback will never be called, otherwise | 1892 | * returns < 0 error if the callback will never be called, otherwise |
1722 | * the result of the lock will be communicated via the callback. | 1893 | * the result of the lock will be communicated via the callback. |
1723 | */ | 1894 | */ |
1724 | int ocfs2_meta_lock_full(struct inode *inode, | 1895 | int ocfs2_inode_lock_full(struct inode *inode, |
1725 | struct buffer_head **ret_bh, | 1896 | struct buffer_head **ret_bh, |
1726 | int ex, | 1897 | int ex, |
1727 | int arg_flags) | 1898 | int arg_flags) |
@@ -1756,7 +1927,7 @@ int ocfs2_meta_lock_full(struct inode *inode, | |||
1756 | wait_event(osb->recovery_event, | 1927 | wait_event(osb->recovery_event, |
1757 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | 1928 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); |
1758 | 1929 | ||
1759 | lockres = &OCFS2_I(inode)->ip_meta_lockres; | 1930 | lockres = &OCFS2_I(inode)->ip_inode_lockres; |
1760 | level = ex ? LKM_EXMODE : LKM_PRMODE; | 1931 | level = ex ? LKM_EXMODE : LKM_PRMODE; |
1761 | dlm_flags = 0; | 1932 | dlm_flags = 0; |
1762 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) | 1933 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) |
@@ -1795,11 +1966,11 @@ local: | |||
1795 | } | 1966 | } |
1796 | 1967 | ||
1797 | /* This is fun. The caller may want a bh back, or it may | 1968 | /* This is fun. The caller may want a bh back, or it may |
1798 | * not. ocfs2_meta_lock_update definitely wants one in, but | 1969 | * not. ocfs2_inode_lock_update definitely wants one in, but |
1799 | * may or may not read one, depending on what's in the | 1970 | * may or may not read one, depending on what's in the |
1800 | * LVB. The result of all of this is that we've *only* gone to | 1971 | * LVB. The result of all of this is that we've *only* gone to |
1801 | * disk if we have to, so the complexity is worthwhile. */ | 1972 | * disk if we have to, so the complexity is worthwhile. */ |
1802 | status = ocfs2_meta_lock_update(inode, &local_bh); | 1973 | status = ocfs2_inode_lock_update(inode, &local_bh); |
1803 | if (status < 0) { | 1974 | if (status < 0) { |
1804 | if (status != -ENOENT) | 1975 | if (status != -ENOENT) |
1805 | mlog_errno(status); | 1976 | mlog_errno(status); |
@@ -1821,7 +1992,7 @@ bail: | |||
1821 | *ret_bh = NULL; | 1992 | *ret_bh = NULL; |
1822 | } | 1993 | } |
1823 | if (acquired) | 1994 | if (acquired) |
1824 | ocfs2_meta_unlock(inode, ex); | 1995 | ocfs2_inode_unlock(inode, ex); |
1825 | } | 1996 | } |
1826 | 1997 | ||
1827 | if (local_bh) | 1998 | if (local_bh) |
@@ -1832,19 +2003,20 @@ bail: | |||
1832 | } | 2003 | } |
1833 | 2004 | ||
1834 | /* | 2005 | /* |
1835 | * This is working around a lock inversion between tasks acquiring DLM locks | 2006 | * This is working around a lock inversion between tasks acquiring DLM |
1836 | * while holding a page lock and the vote thread which blocks dlm lock acquiry | 2007 | * locks while holding a page lock and the downconvert thread which |
1837 | * while acquiring page locks. | 2008 | * blocks dlm lock acquiry while acquiring page locks. |
1838 | * | 2009 | * |
1839 | * ** These _with_page variantes are only intended to be called from aop | 2010 | * ** These _with_page variantes are only intended to be called from aop |
1840 | * methods that hold page locks and return a very specific *positive* error | 2011 | * methods that hold page locks and return a very specific *positive* error |
1841 | * code that aop methods pass up to the VFS -- test for errors with != 0. ** | 2012 | * code that aop methods pass up to the VFS -- test for errors with != 0. ** |
1842 | * | 2013 | * |
1843 | * The DLM is called such that it returns -EAGAIN if it would have blocked | 2014 | * The DLM is called such that it returns -EAGAIN if it would have |
1844 | * waiting for the vote thread. In that case we unlock our page so the vote | 2015 | * blocked waiting for the downconvert thread. In that case we unlock |
1845 | * thread can make progress. Once we've done this we have to return | 2016 | * our page so the downconvert thread can make progress. Once we've |
1846 | * AOP_TRUNCATED_PAGE so the aop method that called us can bubble that back up | 2017 | * done this we have to return AOP_TRUNCATED_PAGE so the aop method |
1847 | * into the VFS who will then immediately retry the aop call. | 2018 | * that called us can bubble that back up into the VFS who will then |
2019 | * immediately retry the aop call. | ||
1848 | * | 2020 | * |
1849 | * We do a blocking lock and immediate unlock before returning, though, so that | 2021 | * We do a blocking lock and immediate unlock before returning, though, so that |
1850 | * the lock has a great chance of being cached on this node by the time the VFS | 2022 | * the lock has a great chance of being cached on this node by the time the VFS |
@@ -1852,32 +2024,32 @@ bail: | |||
1852 | * ping locks back and forth, but that's a risk we're willing to take to avoid | 2024 | * ping locks back and forth, but that's a risk we're willing to take to avoid |
1853 | * the lock inversion simply. | 2025 | * the lock inversion simply. |
1854 | */ | 2026 | */ |
1855 | int ocfs2_meta_lock_with_page(struct inode *inode, | 2027 | int ocfs2_inode_lock_with_page(struct inode *inode, |
1856 | struct buffer_head **ret_bh, | 2028 | struct buffer_head **ret_bh, |
1857 | int ex, | 2029 | int ex, |
1858 | struct page *page) | 2030 | struct page *page) |
1859 | { | 2031 | { |
1860 | int ret; | 2032 | int ret; |
1861 | 2033 | ||
1862 | ret = ocfs2_meta_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); | 2034 | ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); |
1863 | if (ret == -EAGAIN) { | 2035 | if (ret == -EAGAIN) { |
1864 | unlock_page(page); | 2036 | unlock_page(page); |
1865 | if (ocfs2_meta_lock(inode, ret_bh, ex) == 0) | 2037 | if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) |
1866 | ocfs2_meta_unlock(inode, ex); | 2038 | ocfs2_inode_unlock(inode, ex); |
1867 | ret = AOP_TRUNCATED_PAGE; | 2039 | ret = AOP_TRUNCATED_PAGE; |
1868 | } | 2040 | } |
1869 | 2041 | ||
1870 | return ret; | 2042 | return ret; |
1871 | } | 2043 | } |
1872 | 2044 | ||
1873 | int ocfs2_meta_lock_atime(struct inode *inode, | 2045 | int ocfs2_inode_lock_atime(struct inode *inode, |
1874 | struct vfsmount *vfsmnt, | 2046 | struct vfsmount *vfsmnt, |
1875 | int *level) | 2047 | int *level) |
1876 | { | 2048 | { |
1877 | int ret; | 2049 | int ret; |
1878 | 2050 | ||
1879 | mlog_entry_void(); | 2051 | mlog_entry_void(); |
1880 | ret = ocfs2_meta_lock(inode, NULL, 0); | 2052 | ret = ocfs2_inode_lock(inode, NULL, 0); |
1881 | if (ret < 0) { | 2053 | if (ret < 0) { |
1882 | mlog_errno(ret); | 2054 | mlog_errno(ret); |
1883 | return ret; | 2055 | return ret; |
@@ -1890,8 +2062,8 @@ int ocfs2_meta_lock_atime(struct inode *inode, | |||
1890 | if (ocfs2_should_update_atime(inode, vfsmnt)) { | 2062 | if (ocfs2_should_update_atime(inode, vfsmnt)) { |
1891 | struct buffer_head *bh = NULL; | 2063 | struct buffer_head *bh = NULL; |
1892 | 2064 | ||
1893 | ocfs2_meta_unlock(inode, 0); | 2065 | ocfs2_inode_unlock(inode, 0); |
1894 | ret = ocfs2_meta_lock(inode, &bh, 1); | 2066 | ret = ocfs2_inode_lock(inode, &bh, 1); |
1895 | if (ret < 0) { | 2067 | if (ret < 0) { |
1896 | mlog_errno(ret); | 2068 | mlog_errno(ret); |
1897 | return ret; | 2069 | return ret; |
@@ -1908,11 +2080,11 @@ int ocfs2_meta_lock_atime(struct inode *inode, | |||
1908 | return ret; | 2080 | return ret; |
1909 | } | 2081 | } |
1910 | 2082 | ||
1911 | void ocfs2_meta_unlock(struct inode *inode, | 2083 | void ocfs2_inode_unlock(struct inode *inode, |
1912 | int ex) | 2084 | int ex) |
1913 | { | 2085 | { |
1914 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2086 | int level = ex ? LKM_EXMODE : LKM_PRMODE; |
1915 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres; | 2087 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; |
1916 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2088 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1917 | 2089 | ||
1918 | mlog_entry_void(); | 2090 | mlog_entry_void(); |
@@ -2320,11 +2492,11 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) | |||
2320 | goto bail; | 2492 | goto bail; |
2321 | } | 2493 | } |
2322 | 2494 | ||
2323 | /* launch vote thread */ | 2495 | /* launch downconvert thread */ |
2324 | osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote"); | 2496 | osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); |
2325 | if (IS_ERR(osb->vote_task)) { | 2497 | if (IS_ERR(osb->dc_task)) { |
2326 | status = PTR_ERR(osb->vote_task); | 2498 | status = PTR_ERR(osb->dc_task); |
2327 | osb->vote_task = NULL; | 2499 | osb->dc_task = NULL; |
2328 | mlog_errno(status); | 2500 | mlog_errno(status); |
2329 | goto bail; | 2501 | goto bail; |
2330 | } | 2502 | } |
@@ -2353,8 +2525,8 @@ local: | |||
2353 | bail: | 2525 | bail: |
2354 | if (status < 0) { | 2526 | if (status < 0) { |
2355 | ocfs2_dlm_shutdown_debug(osb); | 2527 | ocfs2_dlm_shutdown_debug(osb); |
2356 | if (osb->vote_task) | 2528 | if (osb->dc_task) |
2357 | kthread_stop(osb->vote_task); | 2529 | kthread_stop(osb->dc_task); |
2358 | } | 2530 | } |
2359 | 2531 | ||
2360 | mlog_exit(status); | 2532 | mlog_exit(status); |
@@ -2369,9 +2541,9 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb) | |||
2369 | 2541 | ||
2370 | ocfs2_drop_osb_locks(osb); | 2542 | ocfs2_drop_osb_locks(osb); |
2371 | 2543 | ||
2372 | if (osb->vote_task) { | 2544 | if (osb->dc_task) { |
2373 | kthread_stop(osb->vote_task); | 2545 | kthread_stop(osb->dc_task); |
2374 | osb->vote_task = NULL; | 2546 | osb->dc_task = NULL; |
2375 | } | 2547 | } |
2376 | 2548 | ||
2377 | ocfs2_lock_res_free(&osb->osb_super_lockres); | 2549 | ocfs2_lock_res_free(&osb->osb_super_lockres); |
@@ -2527,7 +2699,7 @@ out: | |||
2527 | 2699 | ||
2528 | /* Mark the lockres as being dropped. It will no longer be | 2700 | /* Mark the lockres as being dropped. It will no longer be |
2529 | * queued if blocking, but we still may have to wait on it | 2701 | * queued if blocking, but we still may have to wait on it |
2530 | * being dequeued from the vote thread before we can consider | 2702 | * being dequeued from the downconvert thread before we can consider |
2531 | * it safe to drop. | 2703 | * it safe to drop. |
2532 | * | 2704 | * |
2533 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ | 2705 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ |
@@ -2590,14 +2762,7 @@ int ocfs2_drop_inode_locks(struct inode *inode) | |||
2590 | status = err; | 2762 | status = err; |
2591 | 2763 | ||
2592 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 2764 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), |
2593 | &OCFS2_I(inode)->ip_data_lockres); | 2765 | &OCFS2_I(inode)->ip_inode_lockres); |
2594 | if (err < 0) | ||
2595 | mlog_errno(err); | ||
2596 | if (err < 0 && !status) | ||
2597 | status = err; | ||
2598 | |||
2599 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | ||
2600 | &OCFS2_I(inode)->ip_meta_lockres); | ||
2601 | if (err < 0) | 2766 | if (err < 0) |
2602 | mlog_errno(err); | 2767 | mlog_errno(err); |
2603 | if (err < 0 && !status) | 2768 | if (err < 0 && !status) |
@@ -2850,6 +3015,9 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
2850 | inode = ocfs2_lock_res_inode(lockres); | 3015 | inode = ocfs2_lock_res_inode(lockres); |
2851 | mapping = inode->i_mapping; | 3016 | mapping = inode->i_mapping; |
2852 | 3017 | ||
3018 | if (S_ISREG(inode->i_mode)) | ||
3019 | goto out; | ||
3020 | |||
2853 | /* | 3021 | /* |
2854 | * We need this before the filemap_fdatawrite() so that it can | 3022 | * We need this before the filemap_fdatawrite() so that it can |
2855 | * transfer the dirty bit from the PTE to the | 3023 | * transfer the dirty bit from the PTE to the |
@@ -2875,6 +3043,7 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
2875 | filemap_fdatawait(mapping); | 3043 | filemap_fdatawait(mapping); |
2876 | } | 3044 | } |
2877 | 3045 | ||
3046 | out: | ||
2878 | return UNBLOCK_CONTINUE; | 3047 | return UNBLOCK_CONTINUE; |
2879 | } | 3048 | } |
2880 | 3049 | ||
@@ -2903,7 +3072,7 @@ static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) | |||
2903 | 3072 | ||
2904 | /* | 3073 | /* |
2905 | * Does the final reference drop on our dentry lock. Right now this | 3074 | * Does the final reference drop on our dentry lock. Right now this |
2906 | * happens in the vote thread, but we could choose to simplify the | 3075 | * happens in the downconvert thread, but we could choose to simplify the |
2907 | * dlmglue API and push these off to the ocfs2_wq in the future. | 3076 | * dlmglue API and push these off to the ocfs2_wq in the future. |
2908 | */ | 3077 | */ |
2909 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | 3078 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, |
@@ -3042,7 +3211,7 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | |||
3042 | mlog(0, "lockres %s blocked.\n", lockres->l_name); | 3211 | mlog(0, "lockres %s blocked.\n", lockres->l_name); |
3043 | 3212 | ||
3044 | /* Detect whether a lock has been marked as going away while | 3213 | /* Detect whether a lock has been marked as going away while |
3045 | * the vote thread was processing other things. A lock can | 3214 | * the downconvert thread was processing other things. A lock can |
3046 | * still be marked with OCFS2_LOCK_FREEING after this check, | 3215 | * still be marked with OCFS2_LOCK_FREEING after this check, |
3047 | * but short circuiting here will still save us some | 3216 | * but short circuiting here will still save us some |
3048 | * performance. */ | 3217 | * performance. */ |
@@ -3091,13 +3260,104 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | |||
3091 | 3260 | ||
3092 | lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); | 3261 | lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); |
3093 | 3262 | ||
3094 | spin_lock(&osb->vote_task_lock); | 3263 | spin_lock(&osb->dc_task_lock); |
3095 | if (list_empty(&lockres->l_blocked_list)) { | 3264 | if (list_empty(&lockres->l_blocked_list)) { |
3096 | list_add_tail(&lockres->l_blocked_list, | 3265 | list_add_tail(&lockres->l_blocked_list, |
3097 | &osb->blocked_lock_list); | 3266 | &osb->blocked_lock_list); |
3098 | osb->blocked_lock_count++; | 3267 | osb->blocked_lock_count++; |
3099 | } | 3268 | } |
3100 | spin_unlock(&osb->vote_task_lock); | 3269 | spin_unlock(&osb->dc_task_lock); |
3270 | |||
3271 | mlog_exit_void(); | ||
3272 | } | ||
3273 | |||
3274 | static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) | ||
3275 | { | ||
3276 | unsigned long processed; | ||
3277 | struct ocfs2_lock_res *lockres; | ||
3278 | |||
3279 | mlog_entry_void(); | ||
3280 | |||
3281 | spin_lock(&osb->dc_task_lock); | ||
3282 | /* grab this early so we know to try again if a state change and | ||
3283 | * wake happens part-way through our work */ | ||
3284 | osb->dc_work_sequence = osb->dc_wake_sequence; | ||
3285 | |||
3286 | processed = osb->blocked_lock_count; | ||
3287 | while (processed) { | ||
3288 | BUG_ON(list_empty(&osb->blocked_lock_list)); | ||
3289 | |||
3290 | lockres = list_entry(osb->blocked_lock_list.next, | ||
3291 | struct ocfs2_lock_res, l_blocked_list); | ||
3292 | list_del_init(&lockres->l_blocked_list); | ||
3293 | osb->blocked_lock_count--; | ||
3294 | spin_unlock(&osb->dc_task_lock); | ||
3295 | |||
3296 | BUG_ON(!processed); | ||
3297 | processed--; | ||
3298 | |||
3299 | ocfs2_process_blocked_lock(osb, lockres); | ||
3300 | |||
3301 | spin_lock(&osb->dc_task_lock); | ||
3302 | } | ||
3303 | spin_unlock(&osb->dc_task_lock); | ||
3101 | 3304 | ||
3102 | mlog_exit_void(); | 3305 | mlog_exit_void(); |
3103 | } | 3306 | } |
3307 | |||
3308 | static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) | ||
3309 | { | ||
3310 | int empty = 0; | ||
3311 | |||
3312 | spin_lock(&osb->dc_task_lock); | ||
3313 | if (list_empty(&osb->blocked_lock_list)) | ||
3314 | empty = 1; | ||
3315 | |||
3316 | spin_unlock(&osb->dc_task_lock); | ||
3317 | return empty; | ||
3318 | } | ||
3319 | |||
3320 | static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) | ||
3321 | { | ||
3322 | int should_wake = 0; | ||
3323 | |||
3324 | spin_lock(&osb->dc_task_lock); | ||
3325 | if (osb->dc_work_sequence != osb->dc_wake_sequence) | ||
3326 | should_wake = 1; | ||
3327 | spin_unlock(&osb->dc_task_lock); | ||
3328 | |||
3329 | return should_wake; | ||
3330 | } | ||
3331 | |||
3332 | int ocfs2_downconvert_thread(void *arg) | ||
3333 | { | ||
3334 | int status = 0; | ||
3335 | struct ocfs2_super *osb = arg; | ||
3336 | |||
3337 | /* only quit once we've been asked to stop and there is no more | ||
3338 | * work available */ | ||
3339 | while (!(kthread_should_stop() && | ||
3340 | ocfs2_downconvert_thread_lists_empty(osb))) { | ||
3341 | |||
3342 | wait_event_interruptible(osb->dc_event, | ||
3343 | ocfs2_downconvert_thread_should_wake(osb) || | ||
3344 | kthread_should_stop()); | ||
3345 | |||
3346 | mlog(0, "downconvert_thread: awoken\n"); | ||
3347 | |||
3348 | ocfs2_downconvert_thread_do_work(osb); | ||
3349 | } | ||
3350 | |||
3351 | osb->dc_task = NULL; | ||
3352 | return status; | ||
3353 | } | ||
3354 | |||
3355 | void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) | ||
3356 | { | ||
3357 | spin_lock(&osb->dc_task_lock); | ||
3358 | /* make sure the voting thread gets a swipe at whatever changes | ||
3359 | * the caller may have made to the voting state */ | ||
3360 | osb->dc_wake_sequence++; | ||
3361 | spin_unlock(&osb->dc_task_lock); | ||
3362 | wake_up(&osb->dc_event); | ||
3363 | } | ||
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 87a785e41205..5f17243ba501 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h | |||
@@ -49,12 +49,12 @@ struct ocfs2_meta_lvb { | |||
49 | __be32 lvb_reserved2; | 49 | __be32 lvb_reserved2; |
50 | }; | 50 | }; |
51 | 51 | ||
52 | /* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */ | 52 | /* ocfs2_inode_lock_full() 'arg_flags' flags */ |
53 | /* don't wait on recovery. */ | 53 | /* don't wait on recovery. */ |
54 | #define OCFS2_META_LOCK_RECOVERY (0x01) | 54 | #define OCFS2_META_LOCK_RECOVERY (0x01) |
55 | /* Instruct the dlm not to queue ourselves on the other node. */ | 55 | /* Instruct the dlm not to queue ourselves on the other node. */ |
56 | #define OCFS2_META_LOCK_NOQUEUE (0x02) | 56 | #define OCFS2_META_LOCK_NOQUEUE (0x02) |
57 | /* don't block waiting for the vote thread, instead return -EAGAIN */ | 57 | /* don't block waiting for the downconvert thread, instead return -EAGAIN */ |
58 | #define OCFS2_LOCK_NONBLOCK (0x04) | 58 | #define OCFS2_LOCK_NONBLOCK (0x04) |
59 | 59 | ||
60 | int ocfs2_dlm_init(struct ocfs2_super *osb); | 60 | int ocfs2_dlm_init(struct ocfs2_super *osb); |
@@ -66,38 +66,32 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | |||
66 | struct inode *inode); | 66 | struct inode *inode); |
67 | void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, | 67 | void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, |
68 | u64 parent, struct inode *inode); | 68 | u64 parent, struct inode *inode); |
69 | struct ocfs2_file_private; | ||
70 | void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, | ||
71 | struct ocfs2_file_private *fp); | ||
69 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res); | 72 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res); |
70 | int ocfs2_create_new_inode_locks(struct inode *inode); | 73 | int ocfs2_create_new_inode_locks(struct inode *inode); |
71 | int ocfs2_drop_inode_locks(struct inode *inode); | 74 | int ocfs2_drop_inode_locks(struct inode *inode); |
72 | int ocfs2_data_lock_full(struct inode *inode, | ||
73 | int write, | ||
74 | int arg_flags); | ||
75 | #define ocfs2_data_lock(inode, write) ocfs2_data_lock_full(inode, write, 0) | ||
76 | int ocfs2_data_lock_with_page(struct inode *inode, | ||
77 | int write, | ||
78 | struct page *page); | ||
79 | void ocfs2_data_unlock(struct inode *inode, | ||
80 | int write); | ||
81 | int ocfs2_rw_lock(struct inode *inode, int write); | 75 | int ocfs2_rw_lock(struct inode *inode, int write); |
82 | void ocfs2_rw_unlock(struct inode *inode, int write); | 76 | void ocfs2_rw_unlock(struct inode *inode, int write); |
83 | int ocfs2_open_lock(struct inode *inode); | 77 | int ocfs2_open_lock(struct inode *inode); |
84 | int ocfs2_try_open_lock(struct inode *inode, int write); | 78 | int ocfs2_try_open_lock(struct inode *inode, int write); |
85 | void ocfs2_open_unlock(struct inode *inode); | 79 | void ocfs2_open_unlock(struct inode *inode); |
86 | int ocfs2_meta_lock_atime(struct inode *inode, | 80 | int ocfs2_inode_lock_atime(struct inode *inode, |
87 | struct vfsmount *vfsmnt, | 81 | struct vfsmount *vfsmnt, |
88 | int *level); | 82 | int *level); |
89 | int ocfs2_meta_lock_full(struct inode *inode, | 83 | int ocfs2_inode_lock_full(struct inode *inode, |
90 | struct buffer_head **ret_bh, | 84 | struct buffer_head **ret_bh, |
91 | int ex, | 85 | int ex, |
92 | int arg_flags); | 86 | int arg_flags); |
93 | int ocfs2_meta_lock_with_page(struct inode *inode, | 87 | int ocfs2_inode_lock_with_page(struct inode *inode, |
94 | struct buffer_head **ret_bh, | 88 | struct buffer_head **ret_bh, |
95 | int ex, | 89 | int ex, |
96 | struct page *page); | 90 | struct page *page); |
97 | /* 99% of the time we don't want to supply any additional flags -- | 91 | /* 99% of the time we don't want to supply any additional flags -- |
98 | * those are for very specific cases only. */ | 92 | * those are for very specific cases only. */ |
99 | #define ocfs2_meta_lock(i, b, e) ocfs2_meta_lock_full(i, b, e, 0) | 93 | #define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full(i, b, e, 0) |
100 | void ocfs2_meta_unlock(struct inode *inode, | 94 | void ocfs2_inode_unlock(struct inode *inode, |
101 | int ex); | 95 | int ex); |
102 | int ocfs2_super_lock(struct ocfs2_super *osb, | 96 | int ocfs2_super_lock(struct ocfs2_super *osb, |
103 | int ex); | 97 | int ex); |
@@ -107,14 +101,17 @@ int ocfs2_rename_lock(struct ocfs2_super *osb); | |||
107 | void ocfs2_rename_unlock(struct ocfs2_super *osb); | 101 | void ocfs2_rename_unlock(struct ocfs2_super *osb); |
108 | int ocfs2_dentry_lock(struct dentry *dentry, int ex); | 102 | int ocfs2_dentry_lock(struct dentry *dentry, int ex); |
109 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex); | 103 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex); |
104 | int ocfs2_file_lock(struct file *file, int ex, int trylock); | ||
105 | void ocfs2_file_unlock(struct file *file); | ||
110 | 106 | ||
111 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); | 107 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); |
112 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, | 108 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, |
113 | struct ocfs2_lock_res *lockres); | 109 | struct ocfs2_lock_res *lockres); |
114 | 110 | ||
115 | /* for the vote thread */ | 111 | /* for the downconvert thread */ |
116 | void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | 112 | void ocfs2_process_blocked_lock(struct ocfs2_super *osb, |
117 | struct ocfs2_lock_res *lockres); | 113 | struct ocfs2_lock_res *lockres); |
114 | void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb); | ||
118 | 115 | ||
119 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); | 116 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); |
120 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); | 117 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); |
diff --git a/fs/ocfs2/endian.h b/fs/ocfs2/endian.h index ff257628af16..1942e09f6ee5 100644 --- a/fs/ocfs2/endian.h +++ b/fs/ocfs2/endian.h | |||
@@ -37,11 +37,6 @@ static inline void le64_add_cpu(__le64 *var, u64 val) | |||
37 | *var = cpu_to_le64(le64_to_cpu(*var) + val); | 37 | *var = cpu_to_le64(le64_to_cpu(*var) + val); |
38 | } | 38 | } |
39 | 39 | ||
40 | static inline void le32_and_cpu(__le32 *var, u32 val) | ||
41 | { | ||
42 | *var = cpu_to_le32(le32_to_cpu(*var) & val); | ||
43 | } | ||
44 | |||
45 | static inline void be32_add_cpu(__be32 *var, u32 val) | 40 | static inline void be32_add_cpu(__be32 *var, u32 val) |
46 | { | 41 | { |
47 | *var = cpu_to_be32(be32_to_cpu(*var) + val); | 42 | *var = cpu_to_be32(be32_to_cpu(*var) + val); |
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 535bfa9568a4..67527cebf214 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c | |||
@@ -58,7 +58,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, | |||
58 | return ERR_PTR(-ESTALE); | 58 | return ERR_PTR(-ESTALE); |
59 | } | 59 | } |
60 | 60 | ||
61 | inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0); | 61 | inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0, 0); |
62 | 62 | ||
63 | if (IS_ERR(inode)) | 63 | if (IS_ERR(inode)) |
64 | return (void *)inode; | 64 | return (void *)inode; |
@@ -95,7 +95,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) | |||
95 | mlog(0, "find parent of directory %llu\n", | 95 | mlog(0, "find parent of directory %llu\n", |
96 | (unsigned long long)OCFS2_I(dir)->ip_blkno); | 96 | (unsigned long long)OCFS2_I(dir)->ip_blkno); |
97 | 97 | ||
98 | status = ocfs2_meta_lock(dir, NULL, 0); | 98 | status = ocfs2_inode_lock(dir, NULL, 0); |
99 | if (status < 0) { | 99 | if (status < 0) { |
100 | if (status != -ENOENT) | 100 | if (status != -ENOENT) |
101 | mlog_errno(status); | 101 | mlog_errno(status); |
@@ -109,7 +109,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) | |||
109 | goto bail_unlock; | 109 | goto bail_unlock; |
110 | } | 110 | } |
111 | 111 | ||
112 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0); | 112 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0); |
113 | if (IS_ERR(inode)) { | 113 | if (IS_ERR(inode)) { |
114 | mlog(ML_ERROR, "Unable to create inode %llu\n", | 114 | mlog(ML_ERROR, "Unable to create inode %llu\n", |
115 | (unsigned long long)blkno); | 115 | (unsigned long long)blkno); |
@@ -126,7 +126,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) | |||
126 | parent->d_op = &ocfs2_dentry_ops; | 126 | parent->d_op = &ocfs2_dentry_ops; |
127 | 127 | ||
128 | bail_unlock: | 128 | bail_unlock: |
129 | ocfs2_meta_unlock(dir, 0); | 129 | ocfs2_inode_unlock(dir, 0); |
130 | 130 | ||
131 | bail: | 131 | bail: |
132 | mlog_exit_ptr(parent); | 132 | mlog_exit_ptr(parent); |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index b75b2e1f0e42..ed5d5232e85d 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include "inode.h" | 51 | #include "inode.h" |
52 | #include "ioctl.h" | 52 | #include "ioctl.h" |
53 | #include "journal.h" | 53 | #include "journal.h" |
54 | #include "locks.h" | ||
54 | #include "mmap.h" | 55 | #include "mmap.h" |
55 | #include "suballoc.h" | 56 | #include "suballoc.h" |
56 | #include "super.h" | 57 | #include "super.h" |
@@ -63,6 +64,35 @@ static int ocfs2_sync_inode(struct inode *inode) | |||
63 | return sync_mapping_buffers(inode->i_mapping); | 64 | return sync_mapping_buffers(inode->i_mapping); |
64 | } | 65 | } |
65 | 66 | ||
67 | static int ocfs2_init_file_private(struct inode *inode, struct file *file) | ||
68 | { | ||
69 | struct ocfs2_file_private *fp; | ||
70 | |||
71 | fp = kzalloc(sizeof(struct ocfs2_file_private), GFP_KERNEL); | ||
72 | if (!fp) | ||
73 | return -ENOMEM; | ||
74 | |||
75 | fp->fp_file = file; | ||
76 | mutex_init(&fp->fp_mutex); | ||
77 | ocfs2_file_lock_res_init(&fp->fp_flock, fp); | ||
78 | file->private_data = fp; | ||
79 | |||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | static void ocfs2_free_file_private(struct inode *inode, struct file *file) | ||
84 | { | ||
85 | struct ocfs2_file_private *fp = file->private_data; | ||
86 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
87 | |||
88 | if (fp) { | ||
89 | ocfs2_simple_drop_lockres(osb, &fp->fp_flock); | ||
90 | ocfs2_lock_res_free(&fp->fp_flock); | ||
91 | kfree(fp); | ||
92 | file->private_data = NULL; | ||
93 | } | ||
94 | } | ||
95 | |||
66 | static int ocfs2_file_open(struct inode *inode, struct file *file) | 96 | static int ocfs2_file_open(struct inode *inode, struct file *file) |
67 | { | 97 | { |
68 | int status; | 98 | int status; |
@@ -89,7 +119,18 @@ static int ocfs2_file_open(struct inode *inode, struct file *file) | |||
89 | 119 | ||
90 | oi->ip_open_count++; | 120 | oi->ip_open_count++; |
91 | spin_unlock(&oi->ip_lock); | 121 | spin_unlock(&oi->ip_lock); |
92 | status = 0; | 122 | |
123 | status = ocfs2_init_file_private(inode, file); | ||
124 | if (status) { | ||
125 | /* | ||
126 | * We want to set open count back if we're failing the | ||
127 | * open. | ||
128 | */ | ||
129 | spin_lock(&oi->ip_lock); | ||
130 | oi->ip_open_count--; | ||
131 | spin_unlock(&oi->ip_lock); | ||
132 | } | ||
133 | |||
93 | leave: | 134 | leave: |
94 | mlog_exit(status); | 135 | mlog_exit(status); |
95 | return status; | 136 | return status; |
@@ -108,11 +149,24 @@ static int ocfs2_file_release(struct inode *inode, struct file *file) | |||
108 | oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT; | 149 | oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT; |
109 | spin_unlock(&oi->ip_lock); | 150 | spin_unlock(&oi->ip_lock); |
110 | 151 | ||
152 | ocfs2_free_file_private(inode, file); | ||
153 | |||
111 | mlog_exit(0); | 154 | mlog_exit(0); |
112 | 155 | ||
113 | return 0; | 156 | return 0; |
114 | } | 157 | } |
115 | 158 | ||
159 | static int ocfs2_dir_open(struct inode *inode, struct file *file) | ||
160 | { | ||
161 | return ocfs2_init_file_private(inode, file); | ||
162 | } | ||
163 | |||
164 | static int ocfs2_dir_release(struct inode *inode, struct file *file) | ||
165 | { | ||
166 | ocfs2_free_file_private(inode, file); | ||
167 | return 0; | ||
168 | } | ||
169 | |||
116 | static int ocfs2_sync_file(struct file *file, | 170 | static int ocfs2_sync_file(struct file *file, |
117 | struct dentry *dentry, | 171 | struct dentry *dentry, |
118 | int datasync) | 172 | int datasync) |
@@ -382,18 +436,13 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
382 | 436 | ||
383 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 437 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
384 | 438 | ||
385 | /* This forces other nodes to sync and drop their pages. Do | 439 | /* |
386 | * this even if we have a truncate without allocation change - | 440 | * The inode lock forced other nodes to sync and drop their |
387 | * ocfs2 cluster sizes can be much greater than page size, so | 441 | * pages, which (correctly) happens even if we have a truncate |
388 | * we have to truncate them anyway. */ | 442 | * without allocation change - ocfs2 cluster sizes can be much |
389 | status = ocfs2_data_lock(inode, 1); | 443 | * greater than page size, so we have to truncate them |
390 | if (status < 0) { | 444 | * anyway. |
391 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 445 | */ |
392 | |||
393 | mlog_errno(status); | ||
394 | goto bail; | ||
395 | } | ||
396 | |||
397 | unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); | 446 | unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); |
398 | truncate_inode_pages(inode->i_mapping, new_i_size); | 447 | truncate_inode_pages(inode->i_mapping, new_i_size); |
399 | 448 | ||
@@ -403,7 +452,7 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
403 | if (status) | 452 | if (status) |
404 | mlog_errno(status); | 453 | mlog_errno(status); |
405 | 454 | ||
406 | goto bail_unlock_data; | 455 | goto bail_unlock_sem; |
407 | } | 456 | } |
408 | 457 | ||
409 | /* alright, we're going to need to do a full blown alloc size | 458 | /* alright, we're going to need to do a full blown alloc size |
@@ -413,25 +462,23 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
413 | status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size); | 462 | status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size); |
414 | if (status < 0) { | 463 | if (status < 0) { |
415 | mlog_errno(status); | 464 | mlog_errno(status); |
416 | goto bail_unlock_data; | 465 | goto bail_unlock_sem; |
417 | } | 466 | } |
418 | 467 | ||
419 | status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); | 468 | status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); |
420 | if (status < 0) { | 469 | if (status < 0) { |
421 | mlog_errno(status); | 470 | mlog_errno(status); |
422 | goto bail_unlock_data; | 471 | goto bail_unlock_sem; |
423 | } | 472 | } |
424 | 473 | ||
425 | status = ocfs2_commit_truncate(osb, inode, di_bh, tc); | 474 | status = ocfs2_commit_truncate(osb, inode, di_bh, tc); |
426 | if (status < 0) { | 475 | if (status < 0) { |
427 | mlog_errno(status); | 476 | mlog_errno(status); |
428 | goto bail_unlock_data; | 477 | goto bail_unlock_sem; |
429 | } | 478 | } |
430 | 479 | ||
431 | /* TODO: orphan dir cleanup here. */ | 480 | /* TODO: orphan dir cleanup here. */ |
432 | bail_unlock_data: | 481 | bail_unlock_sem: |
433 | ocfs2_data_unlock(inode, 1); | ||
434 | |||
435 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 482 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
436 | 483 | ||
437 | bail: | 484 | bail: |
@@ -579,7 +626,7 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | |||
579 | 626 | ||
580 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " | 627 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " |
581 | "clusters_to_add = %u, extents_to_split = %u\n", | 628 | "clusters_to_add = %u, extents_to_split = %u\n", |
582 | (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), | 629 | (unsigned long long)OCFS2_I(inode)->ip_blkno, (long long)i_size_read(inode), |
583 | le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split); | 630 | le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split); |
584 | 631 | ||
585 | num_free_extents = ocfs2_num_free_extents(osb, inode, di); | 632 | num_free_extents = ocfs2_num_free_extents(osb, inode, di); |
@@ -760,7 +807,7 @@ restarted_transaction: | |||
760 | le32_to_cpu(fe->i_clusters), | 807 | le32_to_cpu(fe->i_clusters), |
761 | (unsigned long long)le64_to_cpu(fe->i_size)); | 808 | (unsigned long long)le64_to_cpu(fe->i_size)); |
762 | mlog(0, "inode: ip_clusters=%u, i_size=%lld\n", | 809 | mlog(0, "inode: ip_clusters=%u, i_size=%lld\n", |
763 | OCFS2_I(inode)->ip_clusters, i_size_read(inode)); | 810 | OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode)); |
764 | 811 | ||
765 | leave: | 812 | leave: |
766 | if (handle) { | 813 | if (handle) { |
@@ -917,7 +964,7 @@ static int ocfs2_extend_file(struct inode *inode, | |||
917 | struct buffer_head *di_bh, | 964 | struct buffer_head *di_bh, |
918 | u64 new_i_size) | 965 | u64 new_i_size) |
919 | { | 966 | { |
920 | int ret = 0, data_locked = 0; | 967 | int ret = 0; |
921 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 968 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
922 | 969 | ||
923 | BUG_ON(!di_bh); | 970 | BUG_ON(!di_bh); |
@@ -943,20 +990,6 @@ static int ocfs2_extend_file(struct inode *inode, | |||
943 | && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) | 990 | && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) |
944 | goto out_update_size; | 991 | goto out_update_size; |
945 | 992 | ||
946 | /* | ||
947 | * protect the pages that ocfs2_zero_extend is going to be | ||
948 | * pulling into the page cache.. we do this before the | ||
949 | * metadata extend so that we don't get into the situation | ||
950 | * where we've extended the metadata but can't get the data | ||
951 | * lock to zero. | ||
952 | */ | ||
953 | ret = ocfs2_data_lock(inode, 1); | ||
954 | if (ret < 0) { | ||
955 | mlog_errno(ret); | ||
956 | goto out; | ||
957 | } | ||
958 | data_locked = 1; | ||
959 | |||
960 | /* | 993 | /* |
961 | * The alloc sem blocks people in read/write from reading our | 994 | * The alloc sem blocks people in read/write from reading our |
962 | * allocation until we're done changing it. We depend on | 995 | * allocation until we're done changing it. We depend on |
@@ -980,7 +1013,7 @@ static int ocfs2_extend_file(struct inode *inode, | |||
980 | up_write(&oi->ip_alloc_sem); | 1013 | up_write(&oi->ip_alloc_sem); |
981 | 1014 | ||
982 | mlog_errno(ret); | 1015 | mlog_errno(ret); |
983 | goto out_unlock; | 1016 | goto out; |
984 | } | 1017 | } |
985 | } | 1018 | } |
986 | 1019 | ||
@@ -991,7 +1024,7 @@ static int ocfs2_extend_file(struct inode *inode, | |||
991 | 1024 | ||
992 | if (ret < 0) { | 1025 | if (ret < 0) { |
993 | mlog_errno(ret); | 1026 | mlog_errno(ret); |
994 | goto out_unlock; | 1027 | goto out; |
995 | } | 1028 | } |
996 | 1029 | ||
997 | out_update_size: | 1030 | out_update_size: |
@@ -999,10 +1032,6 @@ out_update_size: | |||
999 | if (ret < 0) | 1032 | if (ret < 0) |
1000 | mlog_errno(ret); | 1033 | mlog_errno(ret); |
1001 | 1034 | ||
1002 | out_unlock: | ||
1003 | if (data_locked) | ||
1004 | ocfs2_data_unlock(inode, 1); | ||
1005 | |||
1006 | out: | 1035 | out: |
1007 | return ret; | 1036 | return ret; |
1008 | } | 1037 | } |
@@ -1050,7 +1079,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1050 | } | 1079 | } |
1051 | } | 1080 | } |
1052 | 1081 | ||
1053 | status = ocfs2_meta_lock(inode, &bh, 1); | 1082 | status = ocfs2_inode_lock(inode, &bh, 1); |
1054 | if (status < 0) { | 1083 | if (status < 0) { |
1055 | if (status != -ENOENT) | 1084 | if (status != -ENOENT) |
1056 | mlog_errno(status); | 1085 | mlog_errno(status); |
@@ -1102,7 +1131,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1102 | bail_commit: | 1131 | bail_commit: |
1103 | ocfs2_commit_trans(osb, handle); | 1132 | ocfs2_commit_trans(osb, handle); |
1104 | bail_unlock: | 1133 | bail_unlock: |
1105 | ocfs2_meta_unlock(inode, 1); | 1134 | ocfs2_inode_unlock(inode, 1); |
1106 | bail_unlock_rw: | 1135 | bail_unlock_rw: |
1107 | if (size_change) | 1136 | if (size_change) |
1108 | ocfs2_rw_unlock(inode, 1); | 1137 | ocfs2_rw_unlock(inode, 1); |
@@ -1149,7 +1178,7 @@ int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd) | |||
1149 | 1178 | ||
1150 | mlog_entry_void(); | 1179 | mlog_entry_void(); |
1151 | 1180 | ||
1152 | ret = ocfs2_meta_lock(inode, NULL, 0); | 1181 | ret = ocfs2_inode_lock(inode, NULL, 0); |
1153 | if (ret) { | 1182 | if (ret) { |
1154 | if (ret != -ENOENT) | 1183 | if (ret != -ENOENT) |
1155 | mlog_errno(ret); | 1184 | mlog_errno(ret); |
@@ -1158,7 +1187,7 @@ int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd) | |||
1158 | 1187 | ||
1159 | ret = generic_permission(inode, mask, NULL); | 1188 | ret = generic_permission(inode, mask, NULL); |
1160 | 1189 | ||
1161 | ocfs2_meta_unlock(inode, 0); | 1190 | ocfs2_inode_unlock(inode, 0); |
1162 | out: | 1191 | out: |
1163 | mlog_exit(ret); | 1192 | mlog_exit(ret); |
1164 | return ret; | 1193 | return ret; |
@@ -1630,7 +1659,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
1630 | goto out; | 1659 | goto out; |
1631 | } | 1660 | } |
1632 | 1661 | ||
1633 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | 1662 | ret = ocfs2_inode_lock(inode, &di_bh, 1); |
1634 | if (ret) { | 1663 | if (ret) { |
1635 | mlog_errno(ret); | 1664 | mlog_errno(ret); |
1636 | goto out_rw_unlock; | 1665 | goto out_rw_unlock; |
@@ -1638,7 +1667,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
1638 | 1667 | ||
1639 | if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { | 1668 | if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { |
1640 | ret = -EPERM; | 1669 | ret = -EPERM; |
1641 | goto out_meta_unlock; | 1670 | goto out_inode_unlock; |
1642 | } | 1671 | } |
1643 | 1672 | ||
1644 | switch (sr->l_whence) { | 1673 | switch (sr->l_whence) { |
@@ -1652,7 +1681,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
1652 | break; | 1681 | break; |
1653 | default: | 1682 | default: |
1654 | ret = -EINVAL; | 1683 | ret = -EINVAL; |
1655 | goto out_meta_unlock; | 1684 | goto out_inode_unlock; |
1656 | } | 1685 | } |
1657 | sr->l_whence = 0; | 1686 | sr->l_whence = 0; |
1658 | 1687 | ||
@@ -1663,14 +1692,14 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
1663 | || (sr->l_start + llen) < 0 | 1692 | || (sr->l_start + llen) < 0 |
1664 | || (sr->l_start + llen) > max_off) { | 1693 | || (sr->l_start + llen) > max_off) { |
1665 | ret = -EINVAL; | 1694 | ret = -EINVAL; |
1666 | goto out_meta_unlock; | 1695 | goto out_inode_unlock; |
1667 | } | 1696 | } |
1668 | size = sr->l_start + sr->l_len; | 1697 | size = sr->l_start + sr->l_len; |
1669 | 1698 | ||
1670 | if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) { | 1699 | if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) { |
1671 | if (sr->l_len <= 0) { | 1700 | if (sr->l_len <= 0) { |
1672 | ret = -EINVAL; | 1701 | ret = -EINVAL; |
1673 | goto out_meta_unlock; | 1702 | goto out_inode_unlock; |
1674 | } | 1703 | } |
1675 | } | 1704 | } |
1676 | 1705 | ||
@@ -1678,7 +1707,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
1678 | ret = __ocfs2_write_remove_suid(inode, di_bh); | 1707 | ret = __ocfs2_write_remove_suid(inode, di_bh); |
1679 | if (ret) { | 1708 | if (ret) { |
1680 | mlog_errno(ret); | 1709 | mlog_errno(ret); |
1681 | goto out_meta_unlock; | 1710 | goto out_inode_unlock; |
1682 | } | 1711 | } |
1683 | } | 1712 | } |
1684 | 1713 | ||
@@ -1704,7 +1733,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
1704 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 1733 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
1705 | if (ret) { | 1734 | if (ret) { |
1706 | mlog_errno(ret); | 1735 | mlog_errno(ret); |
1707 | goto out_meta_unlock; | 1736 | goto out_inode_unlock; |
1708 | } | 1737 | } |
1709 | 1738 | ||
1710 | /* | 1739 | /* |
@@ -1714,7 +1743,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
1714 | if (IS_ERR(handle)) { | 1743 | if (IS_ERR(handle)) { |
1715 | ret = PTR_ERR(handle); | 1744 | ret = PTR_ERR(handle); |
1716 | mlog_errno(ret); | 1745 | mlog_errno(ret); |
1717 | goto out_meta_unlock; | 1746 | goto out_inode_unlock; |
1718 | } | 1747 | } |
1719 | 1748 | ||
1720 | if (change_size && i_size_read(inode) < size) | 1749 | if (change_size && i_size_read(inode) < size) |
@@ -1727,9 +1756,9 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
1727 | 1756 | ||
1728 | ocfs2_commit_trans(osb, handle); | 1757 | ocfs2_commit_trans(osb, handle); |
1729 | 1758 | ||
1730 | out_meta_unlock: | 1759 | out_inode_unlock: |
1731 | brelse(di_bh); | 1760 | brelse(di_bh); |
1732 | ocfs2_meta_unlock(inode, 1); | 1761 | ocfs2_inode_unlock(inode, 1); |
1733 | out_rw_unlock: | 1762 | out_rw_unlock: |
1734 | ocfs2_rw_unlock(inode, 1); | 1763 | ocfs2_rw_unlock(inode, 1); |
1735 | 1764 | ||
@@ -1799,7 +1828,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
1799 | * if we need to make modifications here. | 1828 | * if we need to make modifications here. |
1800 | */ | 1829 | */ |
1801 | for(;;) { | 1830 | for(;;) { |
1802 | ret = ocfs2_meta_lock(inode, NULL, meta_level); | 1831 | ret = ocfs2_inode_lock(inode, NULL, meta_level); |
1803 | if (ret < 0) { | 1832 | if (ret < 0) { |
1804 | meta_level = -1; | 1833 | meta_level = -1; |
1805 | mlog_errno(ret); | 1834 | mlog_errno(ret); |
@@ -1817,7 +1846,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
1817 | * set inode->i_size at the end of a write. */ | 1846 | * set inode->i_size at the end of a write. */ |
1818 | if (should_remove_suid(dentry)) { | 1847 | if (should_remove_suid(dentry)) { |
1819 | if (meta_level == 0) { | 1848 | if (meta_level == 0) { |
1820 | ocfs2_meta_unlock(inode, meta_level); | 1849 | ocfs2_inode_unlock(inode, meta_level); |
1821 | meta_level = 1; | 1850 | meta_level = 1; |
1822 | continue; | 1851 | continue; |
1823 | } | 1852 | } |
@@ -1886,7 +1915,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
1886 | *ppos = saved_pos; | 1915 | *ppos = saved_pos; |
1887 | 1916 | ||
1888 | out_unlock: | 1917 | out_unlock: |
1889 | ocfs2_meta_unlock(inode, meta_level); | 1918 | ocfs2_inode_unlock(inode, meta_level); |
1890 | 1919 | ||
1891 | out: | 1920 | out: |
1892 | return ret; | 1921 | return ret; |
@@ -2099,12 +2128,12 @@ static ssize_t ocfs2_file_splice_read(struct file *in, | |||
2099 | /* | 2128 | /* |
2100 | * See the comment in ocfs2_file_aio_read() | 2129 | * See the comment in ocfs2_file_aio_read() |
2101 | */ | 2130 | */ |
2102 | ret = ocfs2_meta_lock(inode, NULL, 0); | 2131 | ret = ocfs2_inode_lock(inode, NULL, 0); |
2103 | if (ret < 0) { | 2132 | if (ret < 0) { |
2104 | mlog_errno(ret); | 2133 | mlog_errno(ret); |
2105 | goto bail; | 2134 | goto bail; |
2106 | } | 2135 | } |
2107 | ocfs2_meta_unlock(inode, 0); | 2136 | ocfs2_inode_unlock(inode, 0); |
2108 | 2137 | ||
2109 | ret = generic_file_splice_read(in, ppos, pipe, len, flags); | 2138 | ret = generic_file_splice_read(in, ppos, pipe, len, flags); |
2110 | 2139 | ||
@@ -2160,12 +2189,12 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, | |||
2160 | * like i_size. This allows the checks down below | 2189 | * like i_size. This allows the checks down below |
2161 | * generic_file_aio_read() a chance of actually working. | 2190 | * generic_file_aio_read() a chance of actually working. |
2162 | */ | 2191 | */ |
2163 | ret = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); | 2192 | ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); |
2164 | if (ret < 0) { | 2193 | if (ret < 0) { |
2165 | mlog_errno(ret); | 2194 | mlog_errno(ret); |
2166 | goto bail; | 2195 | goto bail; |
2167 | } | 2196 | } |
2168 | ocfs2_meta_unlock(inode, lock_level); | 2197 | ocfs2_inode_unlock(inode, lock_level); |
2169 | 2198 | ||
2170 | ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); | 2199 | ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); |
2171 | if (ret == -EINVAL) | 2200 | if (ret == -EINVAL) |
@@ -2204,6 +2233,7 @@ const struct inode_operations ocfs2_special_file_iops = { | |||
2204 | }; | 2233 | }; |
2205 | 2234 | ||
2206 | const struct file_operations ocfs2_fops = { | 2235 | const struct file_operations ocfs2_fops = { |
2236 | .llseek = generic_file_llseek, | ||
2207 | .read = do_sync_read, | 2237 | .read = do_sync_read, |
2208 | .write = do_sync_write, | 2238 | .write = do_sync_write, |
2209 | .mmap = ocfs2_mmap, | 2239 | .mmap = ocfs2_mmap, |
@@ -2216,16 +2246,21 @@ const struct file_operations ocfs2_fops = { | |||
2216 | #ifdef CONFIG_COMPAT | 2246 | #ifdef CONFIG_COMPAT |
2217 | .compat_ioctl = ocfs2_compat_ioctl, | 2247 | .compat_ioctl = ocfs2_compat_ioctl, |
2218 | #endif | 2248 | #endif |
2249 | .flock = ocfs2_flock, | ||
2219 | .splice_read = ocfs2_file_splice_read, | 2250 | .splice_read = ocfs2_file_splice_read, |
2220 | .splice_write = ocfs2_file_splice_write, | 2251 | .splice_write = ocfs2_file_splice_write, |
2221 | }; | 2252 | }; |
2222 | 2253 | ||
2223 | const struct file_operations ocfs2_dops = { | 2254 | const struct file_operations ocfs2_dops = { |
2255 | .llseek = generic_file_llseek, | ||
2224 | .read = generic_read_dir, | 2256 | .read = generic_read_dir, |
2225 | .readdir = ocfs2_readdir, | 2257 | .readdir = ocfs2_readdir, |
2226 | .fsync = ocfs2_sync_file, | 2258 | .fsync = ocfs2_sync_file, |
2259 | .release = ocfs2_dir_release, | ||
2260 | .open = ocfs2_dir_open, | ||
2227 | .ioctl = ocfs2_ioctl, | 2261 | .ioctl = ocfs2_ioctl, |
2228 | #ifdef CONFIG_COMPAT | 2262 | #ifdef CONFIG_COMPAT |
2229 | .compat_ioctl = ocfs2_compat_ioctl, | 2263 | .compat_ioctl = ocfs2_compat_ioctl, |
2230 | #endif | 2264 | #endif |
2265 | .flock = ocfs2_flock, | ||
2231 | }; | 2266 | }; |
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 066f14add3a8..048ddcaf5c80 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h | |||
@@ -32,6 +32,12 @@ extern const struct inode_operations ocfs2_file_iops; | |||
32 | extern const struct inode_operations ocfs2_special_file_iops; | 32 | extern const struct inode_operations ocfs2_special_file_iops; |
33 | struct ocfs2_alloc_context; | 33 | struct ocfs2_alloc_context; |
34 | 34 | ||
35 | struct ocfs2_file_private { | ||
36 | struct file *fp_file; | ||
37 | struct mutex fp_mutex; | ||
38 | struct ocfs2_lock_res fp_flock; | ||
39 | }; | ||
40 | |||
35 | enum ocfs2_alloc_restarted { | 41 | enum ocfs2_alloc_restarted { |
36 | RESTART_NONE = 0, | 42 | RESTART_NONE = 0, |
37 | RESTART_TRANS, | 43 | RESTART_TRANS, |
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index c4c36171240d..c0efd9489fe8 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c | |||
@@ -30,9 +30,6 @@ | |||
30 | #include <linux/highmem.h> | 30 | #include <linux/highmem.h> |
31 | #include <linux/kmod.h> | 31 | #include <linux/kmod.h> |
32 | 32 | ||
33 | #include <cluster/heartbeat.h> | ||
34 | #include <cluster/nodemanager.h> | ||
35 | |||
36 | #include <dlm/dlmapi.h> | 33 | #include <dlm/dlmapi.h> |
37 | 34 | ||
38 | #define MLOG_MASK_PREFIX ML_SUPER | 35 | #define MLOG_MASK_PREFIX ML_SUPER |
@@ -44,13 +41,9 @@ | |||
44 | #include "heartbeat.h" | 41 | #include "heartbeat.h" |
45 | #include "inode.h" | 42 | #include "inode.h" |
46 | #include "journal.h" | 43 | #include "journal.h" |
47 | #include "vote.h" | ||
48 | 44 | ||
49 | #include "buffer_head_io.h" | 45 | #include "buffer_head_io.h" |
50 | 46 | ||
51 | #define OCFS2_HB_NODE_DOWN_PRI (0x0000002) | ||
52 | #define OCFS2_HB_NODE_UP_PRI OCFS2_HB_NODE_DOWN_PRI | ||
53 | |||
54 | static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, | 47 | static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, |
55 | int bit); | 48 | int bit); |
56 | static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map, | 49 | static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map, |
@@ -64,9 +57,7 @@ static void __ocfs2_node_map_set(struct ocfs2_node_map *target, | |||
64 | void ocfs2_init_node_maps(struct ocfs2_super *osb) | 57 | void ocfs2_init_node_maps(struct ocfs2_super *osb) |
65 | { | 58 | { |
66 | spin_lock_init(&osb->node_map_lock); | 59 | spin_lock_init(&osb->node_map_lock); |
67 | ocfs2_node_map_init(&osb->mounted_map); | ||
68 | ocfs2_node_map_init(&osb->recovery_map); | 60 | ocfs2_node_map_init(&osb->recovery_map); |
69 | ocfs2_node_map_init(&osb->umount_map); | ||
70 | ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); | 61 | ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); |
71 | } | 62 | } |
72 | 63 | ||
@@ -87,24 +78,7 @@ static void ocfs2_do_node_down(int node_num, | |||
87 | return; | 78 | return; |
88 | } | 79 | } |
89 | 80 | ||
90 | if (ocfs2_node_map_test_bit(osb, &osb->umount_map, node_num)) { | ||
91 | /* If a node is in the umount map, then we've been | ||
92 | * expecting him to go down and we know ahead of time | ||
93 | * that recovery is not necessary. */ | ||
94 | ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num); | ||
95 | return; | ||
96 | } | ||
97 | |||
98 | ocfs2_recovery_thread(osb, node_num); | 81 | ocfs2_recovery_thread(osb, node_num); |
99 | |||
100 | ocfs2_remove_node_from_vote_queues(osb, node_num); | ||
101 | } | ||
102 | |||
103 | static void ocfs2_hb_node_down_cb(struct o2nm_node *node, | ||
104 | int node_num, | ||
105 | void *data) | ||
106 | { | ||
107 | ocfs2_do_node_down(node_num, (struct ocfs2_super *) data); | ||
108 | } | 82 | } |
109 | 83 | ||
110 | /* Called from the dlm when it's about to evict a node. We may also | 84 | /* Called from the dlm when it's about to evict a node. We may also |
@@ -121,27 +95,8 @@ static void ocfs2_dlm_eviction_cb(int node_num, | |||
121 | ocfs2_do_node_down(node_num, osb); | 95 | ocfs2_do_node_down(node_num, osb); |
122 | } | 96 | } |
123 | 97 | ||
124 | static void ocfs2_hb_node_up_cb(struct o2nm_node *node, | ||
125 | int node_num, | ||
126 | void *data) | ||
127 | { | ||
128 | struct ocfs2_super *osb = data; | ||
129 | |||
130 | BUG_ON(osb->node_num == node_num); | ||
131 | |||
132 | mlog(0, "node up event for %d\n", node_num); | ||
133 | ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num); | ||
134 | } | ||
135 | |||
136 | void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb) | 98 | void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb) |
137 | { | 99 | { |
138 | o2hb_setup_callback(&osb->osb_hb_down, O2HB_NODE_DOWN_CB, | ||
139 | ocfs2_hb_node_down_cb, osb, | ||
140 | OCFS2_HB_NODE_DOWN_PRI); | ||
141 | |||
142 | o2hb_setup_callback(&osb->osb_hb_up, O2HB_NODE_UP_CB, | ||
143 | ocfs2_hb_node_up_cb, osb, OCFS2_HB_NODE_UP_PRI); | ||
144 | |||
145 | /* Not exactly a heartbeat callback, but leads to essentially | 100 | /* Not exactly a heartbeat callback, but leads to essentially |
146 | * the same path so we set it up here. */ | 101 | * the same path so we set it up here. */ |
147 | dlm_setup_eviction_cb(&osb->osb_eviction_cb, | 102 | dlm_setup_eviction_cb(&osb->osb_eviction_cb, |
@@ -149,39 +104,6 @@ void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb) | |||
149 | osb); | 104 | osb); |
150 | } | 105 | } |
151 | 106 | ||
152 | /* Most functions here are just stubs for now... */ | ||
153 | int ocfs2_register_hb_callbacks(struct ocfs2_super *osb) | ||
154 | { | ||
155 | int status; | ||
156 | |||
157 | if (ocfs2_mount_local(osb)) | ||
158 | return 0; | ||
159 | |||
160 | status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_down); | ||
161 | if (status < 0) { | ||
162 | mlog_errno(status); | ||
163 | goto bail; | ||
164 | } | ||
165 | |||
166 | status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_up); | ||
167 | if (status < 0) { | ||
168 | mlog_errno(status); | ||
169 | o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down); | ||
170 | } | ||
171 | |||
172 | bail: | ||
173 | return status; | ||
174 | } | ||
175 | |||
176 | void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb) | ||
177 | { | ||
178 | if (ocfs2_mount_local(osb)) | ||
179 | return; | ||
180 | |||
181 | o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down); | ||
182 | o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_up); | ||
183 | } | ||
184 | |||
185 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb) | 107 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb) |
186 | { | 108 | { |
187 | int ret; | 109 | int ret; |
@@ -341,8 +263,6 @@ int ocfs2_recovery_map_set(struct ocfs2_super *osb, | |||
341 | 263 | ||
342 | spin_lock(&osb->node_map_lock); | 264 | spin_lock(&osb->node_map_lock); |
343 | 265 | ||
344 | __ocfs2_node_map_clear_bit(&osb->mounted_map, num); | ||
345 | |||
346 | if (!test_bit(num, osb->recovery_map.map)) { | 266 | if (!test_bit(num, osb->recovery_map.map)) { |
347 | __ocfs2_node_map_set_bit(&osb->recovery_map, num); | 267 | __ocfs2_node_map_set_bit(&osb->recovery_map, num); |
348 | set = 1; | 268 | set = 1; |
diff --git a/fs/ocfs2/heartbeat.h b/fs/ocfs2/heartbeat.h index e8fb079122e4..56859211888a 100644 --- a/fs/ocfs2/heartbeat.h +++ b/fs/ocfs2/heartbeat.h | |||
@@ -29,8 +29,6 @@ | |||
29 | void ocfs2_init_node_maps(struct ocfs2_super *osb); | 29 | void ocfs2_init_node_maps(struct ocfs2_super *osb); |
30 | 30 | ||
31 | void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb); | 31 | void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb); |
32 | int ocfs2_register_hb_callbacks(struct ocfs2_super *osb); | ||
33 | void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb); | ||
34 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb); | 32 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb); |
35 | 33 | ||
36 | /* node map functions - used to keep track of mounted and in-recovery | 34 | /* node map functions - used to keep track of mounted and in-recovery |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index ebb2bbe30f35..7e9e4c79aec7 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -49,7 +49,6 @@ | |||
49 | #include "symlink.h" | 49 | #include "symlink.h" |
50 | #include "sysfile.h" | 50 | #include "sysfile.h" |
51 | #include "uptodate.h" | 51 | #include "uptodate.h" |
52 | #include "vote.h" | ||
53 | 52 | ||
54 | #include "buffer_head_io.h" | 53 | #include "buffer_head_io.h" |
55 | 54 | ||
@@ -58,8 +57,11 @@ struct ocfs2_find_inode_args | |||
58 | u64 fi_blkno; | 57 | u64 fi_blkno; |
59 | unsigned long fi_ino; | 58 | unsigned long fi_ino; |
60 | unsigned int fi_flags; | 59 | unsigned int fi_flags; |
60 | unsigned int fi_sysfile_type; | ||
61 | }; | 61 | }; |
62 | 62 | ||
63 | static struct lock_class_key ocfs2_sysfile_lock_key[NUM_SYSTEM_INODES]; | ||
64 | |||
63 | static int ocfs2_read_locked_inode(struct inode *inode, | 65 | static int ocfs2_read_locked_inode(struct inode *inode, |
64 | struct ocfs2_find_inode_args *args); | 66 | struct ocfs2_find_inode_args *args); |
65 | static int ocfs2_init_locked_inode(struct inode *inode, void *opaque); | 67 | static int ocfs2_init_locked_inode(struct inode *inode, void *opaque); |
@@ -107,7 +109,8 @@ void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi) | |||
107 | oi->ip_attr |= OCFS2_DIRSYNC_FL; | 109 | oi->ip_attr |= OCFS2_DIRSYNC_FL; |
108 | } | 110 | } |
109 | 111 | ||
110 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags) | 112 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, |
113 | int sysfile_type) | ||
111 | { | 114 | { |
112 | struct inode *inode = NULL; | 115 | struct inode *inode = NULL; |
113 | struct super_block *sb = osb->sb; | 116 | struct super_block *sb = osb->sb; |
@@ -127,6 +130,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags) | |||
127 | args.fi_blkno = blkno; | 130 | args.fi_blkno = blkno; |
128 | args.fi_flags = flags; | 131 | args.fi_flags = flags; |
129 | args.fi_ino = ino_from_blkno(sb, blkno); | 132 | args.fi_ino = ino_from_blkno(sb, blkno); |
133 | args.fi_sysfile_type = sysfile_type; | ||
130 | 134 | ||
131 | inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, | 135 | inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, |
132 | ocfs2_init_locked_inode, &args); | 136 | ocfs2_init_locked_inode, &args); |
@@ -201,6 +205,9 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) | |||
201 | 205 | ||
202 | inode->i_ino = args->fi_ino; | 206 | inode->i_ino = args->fi_ino; |
203 | OCFS2_I(inode)->ip_blkno = args->fi_blkno; | 207 | OCFS2_I(inode)->ip_blkno = args->fi_blkno; |
208 | if (args->fi_sysfile_type != 0) | ||
209 | lockdep_set_class(&inode->i_mutex, | ||
210 | &ocfs2_sysfile_lock_key[args->fi_sysfile_type]); | ||
204 | 211 | ||
205 | mlog_exit(0); | 212 | mlog_exit(0); |
206 | return 0; | 213 | return 0; |
@@ -322,7 +329,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
322 | */ | 329 | */ |
323 | BUG_ON(le32_to_cpu(fe->i_flags) & OCFS2_SYSTEM_FL); | 330 | BUG_ON(le32_to_cpu(fe->i_flags) & OCFS2_SYSTEM_FL); |
324 | 331 | ||
325 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, | 332 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_inode_lockres, |
326 | OCFS2_LOCK_TYPE_META, 0, inode); | 333 | OCFS2_LOCK_TYPE_META, 0, inode); |
327 | 334 | ||
328 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres, | 335 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres, |
@@ -333,10 +340,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
333 | OCFS2_LOCK_TYPE_RW, inode->i_generation, | 340 | OCFS2_LOCK_TYPE_RW, inode->i_generation, |
334 | inode); | 341 | inode); |
335 | 342 | ||
336 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres, | ||
337 | OCFS2_LOCK_TYPE_DATA, inode->i_generation, | ||
338 | inode); | ||
339 | |||
340 | ocfs2_set_inode_flags(inode); | 343 | ocfs2_set_inode_flags(inode); |
341 | 344 | ||
342 | status = 0; | 345 | status = 0; |
@@ -414,7 +417,7 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
414 | if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE) | 417 | if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE) |
415 | generation = osb->fs_generation; | 418 | generation = osb->fs_generation; |
416 | 419 | ||
417 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, | 420 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_inode_lockres, |
418 | OCFS2_LOCK_TYPE_META, | 421 | OCFS2_LOCK_TYPE_META, |
419 | generation, inode); | 422 | generation, inode); |
420 | 423 | ||
@@ -429,7 +432,7 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
429 | mlog_errno(status); | 432 | mlog_errno(status); |
430 | return status; | 433 | return status; |
431 | } | 434 | } |
432 | status = ocfs2_meta_lock(inode, NULL, 0); | 435 | status = ocfs2_inode_lock(inode, NULL, 0); |
433 | if (status) { | 436 | if (status) { |
434 | make_bad_inode(inode); | 437 | make_bad_inode(inode); |
435 | mlog_errno(status); | 438 | mlog_errno(status); |
@@ -484,7 +487,7 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
484 | 487 | ||
485 | bail: | 488 | bail: |
486 | if (can_lock) | 489 | if (can_lock) |
487 | ocfs2_meta_unlock(inode, 0); | 490 | ocfs2_inode_unlock(inode, 0); |
488 | 491 | ||
489 | if (status < 0) | 492 | if (status < 0) |
490 | make_bad_inode(inode); | 493 | make_bad_inode(inode); |
@@ -586,7 +589,7 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
586 | } | 589 | } |
587 | 590 | ||
588 | mutex_lock(&inode_alloc_inode->i_mutex); | 591 | mutex_lock(&inode_alloc_inode->i_mutex); |
589 | status = ocfs2_meta_lock(inode_alloc_inode, &inode_alloc_bh, 1); | 592 | status = ocfs2_inode_lock(inode_alloc_inode, &inode_alloc_bh, 1); |
590 | if (status < 0) { | 593 | if (status < 0) { |
591 | mutex_unlock(&inode_alloc_inode->i_mutex); | 594 | mutex_unlock(&inode_alloc_inode->i_mutex); |
592 | 595 | ||
@@ -617,7 +620,7 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
617 | } | 620 | } |
618 | 621 | ||
619 | di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec); | 622 | di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec); |
620 | le32_and_cpu(&di->i_flags, ~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL)); | 623 | di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL)); |
621 | 624 | ||
622 | status = ocfs2_journal_dirty(handle, di_bh); | 625 | status = ocfs2_journal_dirty(handle, di_bh); |
623 | if (status < 0) { | 626 | if (status < 0) { |
@@ -635,7 +638,7 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
635 | bail_commit: | 638 | bail_commit: |
636 | ocfs2_commit_trans(osb, handle); | 639 | ocfs2_commit_trans(osb, handle); |
637 | bail_unlock: | 640 | bail_unlock: |
638 | ocfs2_meta_unlock(inode_alloc_inode, 1); | 641 | ocfs2_inode_unlock(inode_alloc_inode, 1); |
639 | mutex_unlock(&inode_alloc_inode->i_mutex); | 642 | mutex_unlock(&inode_alloc_inode->i_mutex); |
640 | brelse(inode_alloc_bh); | 643 | brelse(inode_alloc_bh); |
641 | bail: | 644 | bail: |
@@ -709,7 +712,7 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
709 | * delete_inode operation. We do this now to avoid races with | 712 | * delete_inode operation. We do this now to avoid races with |
710 | * recovery completion on other nodes. */ | 713 | * recovery completion on other nodes. */ |
711 | mutex_lock(&orphan_dir_inode->i_mutex); | 714 | mutex_lock(&orphan_dir_inode->i_mutex); |
712 | status = ocfs2_meta_lock(orphan_dir_inode, &orphan_dir_bh, 1); | 715 | status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); |
713 | if (status < 0) { | 716 | if (status < 0) { |
714 | mutex_unlock(&orphan_dir_inode->i_mutex); | 717 | mutex_unlock(&orphan_dir_inode->i_mutex); |
715 | 718 | ||
@@ -718,8 +721,8 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
718 | } | 721 | } |
719 | 722 | ||
720 | /* we do this while holding the orphan dir lock because we | 723 | /* we do this while holding the orphan dir lock because we |
721 | * don't want recovery being run from another node to vote for | 724 | * don't want recovery being run from another node to try an |
722 | * an inode delete on us -- this will result in two nodes | 725 | * inode delete underneath us -- this will result in two nodes |
723 | * truncating the same file! */ | 726 | * truncating the same file! */ |
724 | status = ocfs2_truncate_for_delete(osb, inode, di_bh); | 727 | status = ocfs2_truncate_for_delete(osb, inode, di_bh); |
725 | if (status < 0) { | 728 | if (status < 0) { |
@@ -733,7 +736,7 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
733 | mlog_errno(status); | 736 | mlog_errno(status); |
734 | 737 | ||
735 | bail_unlock_dir: | 738 | bail_unlock_dir: |
736 | ocfs2_meta_unlock(orphan_dir_inode, 1); | 739 | ocfs2_inode_unlock(orphan_dir_inode, 1); |
737 | mutex_unlock(&orphan_dir_inode->i_mutex); | 740 | mutex_unlock(&orphan_dir_inode->i_mutex); |
738 | brelse(orphan_dir_bh); | 741 | brelse(orphan_dir_bh); |
739 | bail: | 742 | bail: |
@@ -744,7 +747,7 @@ bail: | |||
744 | } | 747 | } |
745 | 748 | ||
746 | /* There is a series of simple checks that should be done before a | 749 | /* There is a series of simple checks that should be done before a |
747 | * vote is even considered. Encapsulate those in this function. */ | 750 | * trylock is even considered. Encapsulate those in this function. */ |
748 | static int ocfs2_inode_is_valid_to_delete(struct inode *inode) | 751 | static int ocfs2_inode_is_valid_to_delete(struct inode *inode) |
749 | { | 752 | { |
750 | int ret = 0; | 753 | int ret = 0; |
@@ -758,14 +761,14 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode) | |||
758 | goto bail; | 761 | goto bail; |
759 | } | 762 | } |
760 | 763 | ||
761 | /* If we're coming from process_vote we can't go into our own | 764 | /* If we're coming from downconvert_thread we can't go into our own |
762 | * voting [hello, deadlock city!], so unforuntately we just | 765 | * voting [hello, deadlock city!], so unforuntately we just |
763 | * have to skip deleting this guy. That's OK though because | 766 | * have to skip deleting this guy. That's OK though because |
764 | * the node who's doing the actual deleting should handle it | 767 | * the node who's doing the actual deleting should handle it |
765 | * anyway. */ | 768 | * anyway. */ |
766 | if (current == osb->vote_task) { | 769 | if (current == osb->dc_task) { |
767 | mlog(0, "Skipping delete of %lu because we're currently " | 770 | mlog(0, "Skipping delete of %lu because we're currently " |
768 | "in process_vote\n", inode->i_ino); | 771 | "in downconvert\n", inode->i_ino); |
769 | goto bail; | 772 | goto bail; |
770 | } | 773 | } |
771 | 774 | ||
@@ -779,10 +782,9 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode) | |||
779 | goto bail_unlock; | 782 | goto bail_unlock; |
780 | } | 783 | } |
781 | 784 | ||
782 | /* If we have voted "yes" on the wipe of this inode for | 785 | /* If we have allowd wipe of this inode for another node, it |
783 | * another node, it will be marked here so we can safely skip | 786 | * will be marked here so we can safely skip it. Recovery will |
784 | * it. Recovery will cleanup any inodes we might inadvertantly | 787 | * cleanup any inodes we might inadvertantly skip here. */ |
785 | * skip here. */ | ||
786 | if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE) { | 788 | if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE) { |
787 | mlog(0, "Skipping delete of %lu because another node " | 789 | mlog(0, "Skipping delete of %lu because another node " |
788 | "has done this for us.\n", inode->i_ino); | 790 | "has done this for us.\n", inode->i_ino); |
@@ -929,13 +931,13 @@ void ocfs2_delete_inode(struct inode *inode) | |||
929 | 931 | ||
930 | /* Lock down the inode. This gives us an up to date view of | 932 | /* Lock down the inode. This gives us an up to date view of |
931 | * it's metadata (for verification), and allows us to | 933 | * it's metadata (for verification), and allows us to |
932 | * serialize delete_inode votes. | 934 | * serialize delete_inode on multiple nodes. |
933 | * | 935 | * |
934 | * Even though we might be doing a truncate, we don't take the | 936 | * Even though we might be doing a truncate, we don't take the |
935 | * allocation lock here as it won't be needed - nobody will | 937 | * allocation lock here as it won't be needed - nobody will |
936 | * have the file open. | 938 | * have the file open. |
937 | */ | 939 | */ |
938 | status = ocfs2_meta_lock(inode, &di_bh, 1); | 940 | status = ocfs2_inode_lock(inode, &di_bh, 1); |
939 | if (status < 0) { | 941 | if (status < 0) { |
940 | if (status != -ENOENT) | 942 | if (status != -ENOENT) |
941 | mlog_errno(status); | 943 | mlog_errno(status); |
@@ -947,15 +949,15 @@ void ocfs2_delete_inode(struct inode *inode) | |||
947 | * before we go ahead and wipe the inode. */ | 949 | * before we go ahead and wipe the inode. */ |
948 | status = ocfs2_query_inode_wipe(inode, di_bh, &wipe); | 950 | status = ocfs2_query_inode_wipe(inode, di_bh, &wipe); |
949 | if (!wipe || status < 0) { | 951 | if (!wipe || status < 0) { |
950 | /* Error and inode busy vote both mean we won't be | 952 | /* Error and remote inode busy both mean we won't be |
951 | * removing the inode, so they take almost the same | 953 | * removing the inode, so they take almost the same |
952 | * path. */ | 954 | * path. */ |
953 | if (status < 0) | 955 | if (status < 0) |
954 | mlog_errno(status); | 956 | mlog_errno(status); |
955 | 957 | ||
956 | /* Someone in the cluster has voted to not wipe this | 958 | /* Someone in the cluster has disallowed a wipe of |
957 | * inode, or it was never completely orphaned. Write | 959 | * this inode, or it was never completely |
958 | * out the pages and exit now. */ | 960 | * orphaned. Write out the pages and exit now. */ |
959 | ocfs2_cleanup_delete_inode(inode, 1); | 961 | ocfs2_cleanup_delete_inode(inode, 1); |
960 | goto bail_unlock_inode; | 962 | goto bail_unlock_inode; |
961 | } | 963 | } |
@@ -981,7 +983,7 @@ void ocfs2_delete_inode(struct inode *inode) | |||
981 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; | 983 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; |
982 | 984 | ||
983 | bail_unlock_inode: | 985 | bail_unlock_inode: |
984 | ocfs2_meta_unlock(inode, 1); | 986 | ocfs2_inode_unlock(inode, 1); |
985 | brelse(di_bh); | 987 | brelse(di_bh); |
986 | bail_unblock: | 988 | bail_unblock: |
987 | status = sigprocmask(SIG_SETMASK, &oldset, NULL); | 989 | status = sigprocmask(SIG_SETMASK, &oldset, NULL); |
@@ -1008,15 +1010,14 @@ void ocfs2_clear_inode(struct inode *inode) | |||
1008 | mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, | 1010 | mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, |
1009 | "Inode=%lu\n", inode->i_ino); | 1011 | "Inode=%lu\n", inode->i_ino); |
1010 | 1012 | ||
1011 | /* For remove delete_inode vote, we hold open lock before, | 1013 | /* To preven remote deletes we hold open lock before, now it |
1012 | * now it is time to unlock PR and EX open locks. */ | 1014 | * is time to unlock PR and EX open locks. */ |
1013 | ocfs2_open_unlock(inode); | 1015 | ocfs2_open_unlock(inode); |
1014 | 1016 | ||
1015 | /* Do these before all the other work so that we don't bounce | 1017 | /* Do these before all the other work so that we don't bounce |
1016 | * the vote thread while waiting to destroy the locks. */ | 1018 | * the downconvert thread while waiting to destroy the locks. */ |
1017 | ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres); | 1019 | ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres); |
1018 | ocfs2_mark_lockres_freeing(&oi->ip_meta_lockres); | 1020 | ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres); |
1019 | ocfs2_mark_lockres_freeing(&oi->ip_data_lockres); | ||
1020 | ocfs2_mark_lockres_freeing(&oi->ip_open_lockres); | 1021 | ocfs2_mark_lockres_freeing(&oi->ip_open_lockres); |
1021 | 1022 | ||
1022 | /* We very well may get a clear_inode before all an inodes | 1023 | /* We very well may get a clear_inode before all an inodes |
@@ -1039,8 +1040,7 @@ void ocfs2_clear_inode(struct inode *inode) | |||
1039 | mlog_errno(status); | 1040 | mlog_errno(status); |
1040 | 1041 | ||
1041 | ocfs2_lock_res_free(&oi->ip_rw_lockres); | 1042 | ocfs2_lock_res_free(&oi->ip_rw_lockres); |
1042 | ocfs2_lock_res_free(&oi->ip_meta_lockres); | 1043 | ocfs2_lock_res_free(&oi->ip_inode_lockres); |
1043 | ocfs2_lock_res_free(&oi->ip_data_lockres); | ||
1044 | ocfs2_lock_res_free(&oi->ip_open_lockres); | 1044 | ocfs2_lock_res_free(&oi->ip_open_lockres); |
1045 | 1045 | ||
1046 | ocfs2_metadata_cache_purge(inode); | 1046 | ocfs2_metadata_cache_purge(inode); |
@@ -1184,15 +1184,15 @@ int ocfs2_inode_revalidate(struct dentry *dentry) | |||
1184 | } | 1184 | } |
1185 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 1185 | spin_unlock(&OCFS2_I(inode)->ip_lock); |
1186 | 1186 | ||
1187 | /* Let ocfs2_meta_lock do the work of updating our struct | 1187 | /* Let ocfs2_inode_lock do the work of updating our struct |
1188 | * inode for us. */ | 1188 | * inode for us. */ |
1189 | status = ocfs2_meta_lock(inode, NULL, 0); | 1189 | status = ocfs2_inode_lock(inode, NULL, 0); |
1190 | if (status < 0) { | 1190 | if (status < 0) { |
1191 | if (status != -ENOENT) | 1191 | if (status != -ENOENT) |
1192 | mlog_errno(status); | 1192 | mlog_errno(status); |
1193 | goto bail; | 1193 | goto bail; |
1194 | } | 1194 | } |
1195 | ocfs2_meta_unlock(inode, 0); | 1195 | ocfs2_inode_unlock(inode, 0); |
1196 | bail: | 1196 | bail: |
1197 | mlog_exit(status); | 1197 | mlog_exit(status); |
1198 | 1198 | ||
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 70e881c55536..390a85596aa0 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
@@ -34,8 +34,7 @@ struct ocfs2_inode_info | |||
34 | u64 ip_blkno; | 34 | u64 ip_blkno; |
35 | 35 | ||
36 | struct ocfs2_lock_res ip_rw_lockres; | 36 | struct ocfs2_lock_res ip_rw_lockres; |
37 | struct ocfs2_lock_res ip_meta_lockres; | 37 | struct ocfs2_lock_res ip_inode_lockres; |
38 | struct ocfs2_lock_res ip_data_lockres; | ||
39 | struct ocfs2_lock_res ip_open_lockres; | 38 | struct ocfs2_lock_res ip_open_lockres; |
40 | 39 | ||
41 | /* protects allocation changes on this inode. */ | 40 | /* protects allocation changes on this inode. */ |
@@ -121,9 +120,10 @@ void ocfs2_delete_inode(struct inode *inode); | |||
121 | void ocfs2_drop_inode(struct inode *inode); | 120 | void ocfs2_drop_inode(struct inode *inode); |
122 | 121 | ||
123 | /* Flags for ocfs2_iget() */ | 122 | /* Flags for ocfs2_iget() */ |
124 | #define OCFS2_FI_FLAG_SYSFILE 0x4 | 123 | #define OCFS2_FI_FLAG_SYSFILE 0x1 |
125 | #define OCFS2_FI_FLAG_ORPHAN_RECOVERY 0x8 | 124 | #define OCFS2_FI_FLAG_ORPHAN_RECOVERY 0x2 |
126 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, int flags); | 125 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags, |
126 | int sysfile_type); | ||
127 | int ocfs2_inode_init_private(struct inode *inode); | 127 | int ocfs2_inode_init_private(struct inode *inode); |
128 | int ocfs2_inode_revalidate(struct dentry *dentry); | 128 | int ocfs2_inode_revalidate(struct dentry *dentry); |
129 | int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | 129 | int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 87dcece7e1b5..5177fba5162b 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -20,6 +20,7 @@ | |||
20 | 20 | ||
21 | #include "ocfs2_fs.h" | 21 | #include "ocfs2_fs.h" |
22 | #include "ioctl.h" | 22 | #include "ioctl.h" |
23 | #include "resize.h" | ||
23 | 24 | ||
24 | #include <linux/ext2_fs.h> | 25 | #include <linux/ext2_fs.h> |
25 | 26 | ||
@@ -27,14 +28,14 @@ static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) | |||
27 | { | 28 | { |
28 | int status; | 29 | int status; |
29 | 30 | ||
30 | status = ocfs2_meta_lock(inode, NULL, 0); | 31 | status = ocfs2_inode_lock(inode, NULL, 0); |
31 | if (status < 0) { | 32 | if (status < 0) { |
32 | mlog_errno(status); | 33 | mlog_errno(status); |
33 | return status; | 34 | return status; |
34 | } | 35 | } |
35 | ocfs2_get_inode_flags(OCFS2_I(inode)); | 36 | ocfs2_get_inode_flags(OCFS2_I(inode)); |
36 | *flags = OCFS2_I(inode)->ip_attr; | 37 | *flags = OCFS2_I(inode)->ip_attr; |
37 | ocfs2_meta_unlock(inode, 0); | 38 | ocfs2_inode_unlock(inode, 0); |
38 | 39 | ||
39 | mlog_exit(status); | 40 | mlog_exit(status); |
40 | return status; | 41 | return status; |
@@ -52,7 +53,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, | |||
52 | 53 | ||
53 | mutex_lock(&inode->i_mutex); | 54 | mutex_lock(&inode->i_mutex); |
54 | 55 | ||
55 | status = ocfs2_meta_lock(inode, &bh, 1); | 56 | status = ocfs2_inode_lock(inode, &bh, 1); |
56 | if (status < 0) { | 57 | if (status < 0) { |
57 | mlog_errno(status); | 58 | mlog_errno(status); |
58 | goto bail; | 59 | goto bail; |
@@ -100,7 +101,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, | |||
100 | 101 | ||
101 | ocfs2_commit_trans(osb, handle); | 102 | ocfs2_commit_trans(osb, handle); |
102 | bail_unlock: | 103 | bail_unlock: |
103 | ocfs2_meta_unlock(inode, 1); | 104 | ocfs2_inode_unlock(inode, 1); |
104 | bail: | 105 | bail: |
105 | mutex_unlock(&inode->i_mutex); | 106 | mutex_unlock(&inode->i_mutex); |
106 | 107 | ||
@@ -115,8 +116,10 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp, | |||
115 | unsigned int cmd, unsigned long arg) | 116 | unsigned int cmd, unsigned long arg) |
116 | { | 117 | { |
117 | unsigned int flags; | 118 | unsigned int flags; |
119 | int new_clusters; | ||
118 | int status; | 120 | int status; |
119 | struct ocfs2_space_resv sr; | 121 | struct ocfs2_space_resv sr; |
122 | struct ocfs2_new_group_input input; | ||
120 | 123 | ||
121 | switch (cmd) { | 124 | switch (cmd) { |
122 | case OCFS2_IOC_GETFLAGS: | 125 | case OCFS2_IOC_GETFLAGS: |
@@ -140,6 +143,23 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp, | |||
140 | return -EFAULT; | 143 | return -EFAULT; |
141 | 144 | ||
142 | return ocfs2_change_file_space(filp, cmd, &sr); | 145 | return ocfs2_change_file_space(filp, cmd, &sr); |
146 | case OCFS2_IOC_GROUP_EXTEND: | ||
147 | if (!capable(CAP_SYS_RESOURCE)) | ||
148 | return -EPERM; | ||
149 | |||
150 | if (get_user(new_clusters, (int __user *)arg)) | ||
151 | return -EFAULT; | ||
152 | |||
153 | return ocfs2_group_extend(inode, new_clusters); | ||
154 | case OCFS2_IOC_GROUP_ADD: | ||
155 | case OCFS2_IOC_GROUP_ADD64: | ||
156 | if (!capable(CAP_SYS_RESOURCE)) | ||
157 | return -EPERM; | ||
158 | |||
159 | if (copy_from_user(&input, (int __user *) arg, sizeof(input))) | ||
160 | return -EFAULT; | ||
161 | |||
162 | return ocfs2_group_add(inode, &input); | ||
143 | default: | 163 | default: |
144 | return -ENOTTY; | 164 | return -ENOTTY; |
145 | } | 165 | } |
@@ -162,6 +182,9 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
162 | case OCFS2_IOC_RESVSP64: | 182 | case OCFS2_IOC_RESVSP64: |
163 | case OCFS2_IOC_UNRESVSP: | 183 | case OCFS2_IOC_UNRESVSP: |
164 | case OCFS2_IOC_UNRESVSP64: | 184 | case OCFS2_IOC_UNRESVSP64: |
185 | case OCFS2_IOC_GROUP_EXTEND: | ||
186 | case OCFS2_IOC_GROUP_ADD: | ||
187 | case OCFS2_IOC_GROUP_ADD64: | ||
165 | break; | 188 | break; |
166 | default: | 189 | default: |
167 | return -ENOIOCTLCMD; | 190 | return -ENOIOCTLCMD; |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index f9d01e25298d..f31c7e8c19c3 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -44,7 +44,6 @@ | |||
44 | #include "localalloc.h" | 44 | #include "localalloc.h" |
45 | #include "slot_map.h" | 45 | #include "slot_map.h" |
46 | #include "super.h" | 46 | #include "super.h" |
47 | #include "vote.h" | ||
48 | #include "sysfile.h" | 47 | #include "sysfile.h" |
49 | 48 | ||
50 | #include "buffer_head_io.h" | 49 | #include "buffer_head_io.h" |
@@ -103,7 +102,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) | |||
103 | mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n", | 102 | mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n", |
104 | journal->j_trans_id, flushed); | 103 | journal->j_trans_id, flushed); |
105 | 104 | ||
106 | ocfs2_kick_vote_thread(osb); | 105 | ocfs2_wake_downconvert_thread(osb); |
107 | wake_up(&journal->j_checkpointed); | 106 | wake_up(&journal->j_checkpointed); |
108 | finally: | 107 | finally: |
109 | mlog_exit(status); | 108 | mlog_exit(status); |
@@ -174,6 +173,12 @@ int ocfs2_commit_trans(struct ocfs2_super *osb, | |||
174 | * transaction. extend_trans will either extend the current handle by | 173 | * transaction. extend_trans will either extend the current handle by |
175 | * nblocks, or commit it and start a new one with nblocks credits. | 174 | * nblocks, or commit it and start a new one with nblocks credits. |
176 | * | 175 | * |
176 | * This might call journal_restart() which will commit dirty buffers | ||
177 | * and then restart the transaction. Before calling | ||
178 | * ocfs2_extend_trans(), any changed blocks should have been | ||
179 | * dirtied. After calling it, all blocks which need to be changed must | ||
180 | * go through another set of journal_access/journal_dirty calls. | ||
181 | * | ||
177 | * WARNING: This will not release any semaphores or disk locks taken | 182 | * WARNING: This will not release any semaphores or disk locks taken |
178 | * during the transaction, so make sure they were taken *before* | 183 | * during the transaction, so make sure they were taken *before* |
179 | * start_trans or we'll have ordering deadlocks. | 184 | * start_trans or we'll have ordering deadlocks. |
@@ -193,11 +198,15 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks) | |||
193 | 198 | ||
194 | mlog(0, "Trying to extend transaction by %d blocks\n", nblocks); | 199 | mlog(0, "Trying to extend transaction by %d blocks\n", nblocks); |
195 | 200 | ||
201 | #ifdef OCFS2_DEBUG_FS | ||
202 | status = 1; | ||
203 | #else | ||
196 | status = journal_extend(handle, nblocks); | 204 | status = journal_extend(handle, nblocks); |
197 | if (status < 0) { | 205 | if (status < 0) { |
198 | mlog_errno(status); | 206 | mlog_errno(status); |
199 | goto bail; | 207 | goto bail; |
200 | } | 208 | } |
209 | #endif | ||
201 | 210 | ||
202 | if (status > 0) { | 211 | if (status > 0) { |
203 | mlog(0, "journal_extend failed, trying journal_restart\n"); | 212 | mlog(0, "journal_extend failed, trying journal_restart\n"); |
@@ -304,14 +313,18 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
304 | return err; | 313 | return err; |
305 | } | 314 | } |
306 | 315 | ||
307 | #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * 5) | 316 | #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD_DEFAULT_MAX_COMMIT_AGE) |
308 | 317 | ||
309 | void ocfs2_set_journal_params(struct ocfs2_super *osb) | 318 | void ocfs2_set_journal_params(struct ocfs2_super *osb) |
310 | { | 319 | { |
311 | journal_t *journal = osb->journal->j_journal; | 320 | journal_t *journal = osb->journal->j_journal; |
321 | unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL; | ||
322 | |||
323 | if (osb->osb_commit_interval) | ||
324 | commit_interval = osb->osb_commit_interval; | ||
312 | 325 | ||
313 | spin_lock(&journal->j_state_lock); | 326 | spin_lock(&journal->j_state_lock); |
314 | journal->j_commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL; | 327 | journal->j_commit_interval = commit_interval; |
315 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) | 328 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) |
316 | journal->j_flags |= JFS_BARRIER; | 329 | journal->j_flags |= JFS_BARRIER; |
317 | else | 330 | else |
@@ -327,7 +340,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) | |||
327 | struct ocfs2_dinode *di = NULL; | 340 | struct ocfs2_dinode *di = NULL; |
328 | struct buffer_head *bh = NULL; | 341 | struct buffer_head *bh = NULL; |
329 | struct ocfs2_super *osb; | 342 | struct ocfs2_super *osb; |
330 | int meta_lock = 0; | 343 | int inode_lock = 0; |
331 | 344 | ||
332 | mlog_entry_void(); | 345 | mlog_entry_void(); |
333 | 346 | ||
@@ -357,14 +370,14 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) | |||
357 | /* Skip recovery waits here - journal inode metadata never | 370 | /* Skip recovery waits here - journal inode metadata never |
358 | * changes in a live cluster so it can be considered an | 371 | * changes in a live cluster so it can be considered an |
359 | * exception to the rule. */ | 372 | * exception to the rule. */ |
360 | status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); | 373 | status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); |
361 | if (status < 0) { | 374 | if (status < 0) { |
362 | if (status != -ERESTARTSYS) | 375 | if (status != -ERESTARTSYS) |
363 | mlog(ML_ERROR, "Could not get lock on journal!\n"); | 376 | mlog(ML_ERROR, "Could not get lock on journal!\n"); |
364 | goto done; | 377 | goto done; |
365 | } | 378 | } |
366 | 379 | ||
367 | meta_lock = 1; | 380 | inode_lock = 1; |
368 | di = (struct ocfs2_dinode *)bh->b_data; | 381 | di = (struct ocfs2_dinode *)bh->b_data; |
369 | 382 | ||
370 | if (inode->i_size < OCFS2_MIN_JOURNAL_SIZE) { | 383 | if (inode->i_size < OCFS2_MIN_JOURNAL_SIZE) { |
@@ -404,8 +417,8 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) | |||
404 | status = 0; | 417 | status = 0; |
405 | done: | 418 | done: |
406 | if (status < 0) { | 419 | if (status < 0) { |
407 | if (meta_lock) | 420 | if (inode_lock) |
408 | ocfs2_meta_unlock(inode, 1); | 421 | ocfs2_inode_unlock(inode, 1); |
409 | if (bh != NULL) | 422 | if (bh != NULL) |
410 | brelse(bh); | 423 | brelse(bh); |
411 | if (inode) { | 424 | if (inode) { |
@@ -534,7 +547,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) | |||
534 | OCFS2_I(inode)->ip_open_count--; | 547 | OCFS2_I(inode)->ip_open_count--; |
535 | 548 | ||
536 | /* unlock our journal */ | 549 | /* unlock our journal */ |
537 | ocfs2_meta_unlock(inode, 1); | 550 | ocfs2_inode_unlock(inode, 1); |
538 | 551 | ||
539 | brelse(journal->j_bh); | 552 | brelse(journal->j_bh); |
540 | journal->j_bh = NULL; | 553 | journal->j_bh = NULL; |
@@ -873,8 +886,8 @@ restart: | |||
873 | ocfs2_super_unlock(osb, 1); | 886 | ocfs2_super_unlock(osb, 1); |
874 | 887 | ||
875 | /* We always run recovery on our own orphan dir - the dead | 888 | /* We always run recovery on our own orphan dir - the dead |
876 | * node(s) may have voted "no" on an inode delete earlier. A | 889 | * node(s) may have disallowd a previos inode delete. Re-processing |
877 | * revote is therefore required. */ | 890 | * is therefore required. */ |
878 | ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, | 891 | ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, |
879 | NULL); | 892 | NULL); |
880 | 893 | ||
@@ -963,9 +976,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
963 | } | 976 | } |
964 | SET_INODE_JOURNAL(inode); | 977 | SET_INODE_JOURNAL(inode); |
965 | 978 | ||
966 | status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); | 979 | status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); |
967 | if (status < 0) { | 980 | if (status < 0) { |
968 | mlog(0, "status returned from ocfs2_meta_lock=%d\n", status); | 981 | mlog(0, "status returned from ocfs2_inode_lock=%d\n", status); |
969 | if (status != -ERESTARTSYS) | 982 | if (status != -ERESTARTSYS) |
970 | mlog(ML_ERROR, "Could not lock journal!\n"); | 983 | mlog(ML_ERROR, "Could not lock journal!\n"); |
971 | goto done; | 984 | goto done; |
@@ -1037,7 +1050,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
1037 | done: | 1050 | done: |
1038 | /* drop the lock on this nodes journal */ | 1051 | /* drop the lock on this nodes journal */ |
1039 | if (got_lock) | 1052 | if (got_lock) |
1040 | ocfs2_meta_unlock(inode, 1); | 1053 | ocfs2_inode_unlock(inode, 1); |
1041 | 1054 | ||
1042 | if (inode) | 1055 | if (inode) |
1043 | iput(inode); | 1056 | iput(inode); |
@@ -1152,14 +1165,14 @@ static int ocfs2_trylock_journal(struct ocfs2_super *osb, | |||
1152 | SET_INODE_JOURNAL(inode); | 1165 | SET_INODE_JOURNAL(inode); |
1153 | 1166 | ||
1154 | flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE; | 1167 | flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE; |
1155 | status = ocfs2_meta_lock_full(inode, NULL, 1, flags); | 1168 | status = ocfs2_inode_lock_full(inode, NULL, 1, flags); |
1156 | if (status < 0) { | 1169 | if (status < 0) { |
1157 | if (status != -EAGAIN) | 1170 | if (status != -EAGAIN) |
1158 | mlog_errno(status); | 1171 | mlog_errno(status); |
1159 | goto bail; | 1172 | goto bail; |
1160 | } | 1173 | } |
1161 | 1174 | ||
1162 | ocfs2_meta_unlock(inode, 1); | 1175 | ocfs2_inode_unlock(inode, 1); |
1163 | bail: | 1176 | bail: |
1164 | if (inode) | 1177 | if (inode) |
1165 | iput(inode); | 1178 | iput(inode); |
@@ -1231,7 +1244,7 @@ static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len, | |||
1231 | 1244 | ||
1232 | /* Skip bad inodes so that recovery can continue */ | 1245 | /* Skip bad inodes so that recovery can continue */ |
1233 | iter = ocfs2_iget(p->osb, ino, | 1246 | iter = ocfs2_iget(p->osb, ino, |
1234 | OCFS2_FI_FLAG_ORPHAN_RECOVERY); | 1247 | OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0); |
1235 | if (IS_ERR(iter)) | 1248 | if (IS_ERR(iter)) |
1236 | return 0; | 1249 | return 0; |
1237 | 1250 | ||
@@ -1267,7 +1280,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb, | |||
1267 | } | 1280 | } |
1268 | 1281 | ||
1269 | mutex_lock(&orphan_dir_inode->i_mutex); | 1282 | mutex_lock(&orphan_dir_inode->i_mutex); |
1270 | status = ocfs2_meta_lock(orphan_dir_inode, NULL, 0); | 1283 | status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0); |
1271 | if (status < 0) { | 1284 | if (status < 0) { |
1272 | mlog_errno(status); | 1285 | mlog_errno(status); |
1273 | goto out; | 1286 | goto out; |
@@ -1277,12 +1290,13 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb, | |||
1277 | ocfs2_orphan_filldir); | 1290 | ocfs2_orphan_filldir); |
1278 | if (status) { | 1291 | if (status) { |
1279 | mlog_errno(status); | 1292 | mlog_errno(status); |
1280 | goto out; | 1293 | goto out_cluster; |
1281 | } | 1294 | } |
1282 | 1295 | ||
1283 | *head = priv.head; | 1296 | *head = priv.head; |
1284 | 1297 | ||
1285 | ocfs2_meta_unlock(orphan_dir_inode, 0); | 1298 | out_cluster: |
1299 | ocfs2_inode_unlock(orphan_dir_inode, 0); | ||
1286 | out: | 1300 | out: |
1287 | mutex_unlock(&orphan_dir_inode->i_mutex); | 1301 | mutex_unlock(&orphan_dir_inode->i_mutex); |
1288 | iput(orphan_dir_inode); | 1302 | iput(orphan_dir_inode); |
@@ -1369,10 +1383,10 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
1369 | iter = oi->ip_next_orphan; | 1383 | iter = oi->ip_next_orphan; |
1370 | 1384 | ||
1371 | spin_lock(&oi->ip_lock); | 1385 | spin_lock(&oi->ip_lock); |
1372 | /* Delete voting may have set these on the assumption | 1386 | /* The remote delete code may have set these on the |
1373 | * that the other node would wipe them successfully. | 1387 | * assumption that the other node would wipe them |
1374 | * If they are still in the node's orphan dir, we need | 1388 | * successfully. If they are still in the node's |
1375 | * to reset that state. */ | 1389 | * orphan dir, we need to reset that state. */ |
1376 | oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE); | 1390 | oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE); |
1377 | 1391 | ||
1378 | /* Set the proper information to get us going into | 1392 | /* Set the proper information to get us going into |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 4b32e0961568..220f3e818e78 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -278,6 +278,12 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
278 | /* simple file updates like chmod, etc. */ | 278 | /* simple file updates like chmod, etc. */ |
279 | #define OCFS2_INODE_UPDATE_CREDITS 1 | 279 | #define OCFS2_INODE_UPDATE_CREDITS 1 |
280 | 280 | ||
281 | /* group extend. inode update and last group update. */ | ||
282 | #define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) | ||
283 | |||
284 | /* group add. inode update and the new group update. */ | ||
285 | #define OCFS2_GROUP_ADD_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) | ||
286 | |||
281 | /* get one bit out of a suballocator: dinode + group descriptor + | 287 | /* get one bit out of a suballocator: dinode + group descriptor + |
282 | * prev. group desc. if we relink. */ | 288 | * prev. group desc. if we relink. */ |
283 | #define OCFS2_SUBALLOC_ALLOC (3) | 289 | #define OCFS2_SUBALLOC_ALLOC (3) |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 58ea88b5af36..add1ffdc5c6c 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
@@ -75,18 +75,12 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
75 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, | 75 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, |
76 | struct inode *local_alloc_inode); | 76 | struct inode *local_alloc_inode); |
77 | 77 | ||
78 | /* | ||
79 | * Determine how large our local alloc window should be, in bits. | ||
80 | * | ||
81 | * These values (and the behavior in ocfs2_alloc_should_use_local) have | ||
82 | * been chosen so that most allocations, including new block groups go | ||
83 | * through local alloc. | ||
84 | */ | ||
85 | static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb) | 78 | static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb) |
86 | { | 79 | { |
87 | BUG_ON(osb->s_clustersize_bits < 12); | 80 | BUG_ON(osb->s_clustersize_bits > 20); |
88 | 81 | ||
89 | return 2048 >> (osb->s_clustersize_bits - 12); | 82 | /* Size local alloc windows by the megabyte */ |
83 | return osb->local_alloc_size << (20 - osb->s_clustersize_bits); | ||
90 | } | 84 | } |
91 | 85 | ||
92 | /* | 86 | /* |
@@ -96,18 +90,23 @@ static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb) | |||
96 | int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) | 90 | int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) |
97 | { | 91 | { |
98 | int la_bits = ocfs2_local_alloc_window_bits(osb); | 92 | int la_bits = ocfs2_local_alloc_window_bits(osb); |
93 | int ret = 0; | ||
99 | 94 | ||
100 | if (osb->local_alloc_state != OCFS2_LA_ENABLED) | 95 | if (osb->local_alloc_state != OCFS2_LA_ENABLED) |
101 | return 0; | 96 | goto bail; |
102 | 97 | ||
103 | /* la_bits should be at least twice the size (in clusters) of | 98 | /* la_bits should be at least twice the size (in clusters) of |
104 | * a new block group. We want to be sure block group | 99 | * a new block group. We want to be sure block group |
105 | * allocations go through the local alloc, so allow an | 100 | * allocations go through the local alloc, so allow an |
106 | * allocation to take up to half the bitmap. */ | 101 | * allocation to take up to half the bitmap. */ |
107 | if (bits > (la_bits / 2)) | 102 | if (bits > (la_bits / 2)) |
108 | return 0; | 103 | goto bail; |
109 | 104 | ||
110 | return 1; | 105 | ret = 1; |
106 | bail: | ||
107 | mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", | ||
108 | osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); | ||
109 | return ret; | ||
111 | } | 110 | } |
112 | 111 | ||
113 | int ocfs2_load_local_alloc(struct ocfs2_super *osb) | 112 | int ocfs2_load_local_alloc(struct ocfs2_super *osb) |
@@ -121,6 +120,19 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
121 | 120 | ||
122 | mlog_entry_void(); | 121 | mlog_entry_void(); |
123 | 122 | ||
123 | if (ocfs2_mount_local(osb)) | ||
124 | goto bail; | ||
125 | |||
126 | if (osb->local_alloc_size == 0) | ||
127 | goto bail; | ||
128 | |||
129 | if (ocfs2_local_alloc_window_bits(osb) >= osb->bitmap_cpg) { | ||
130 | mlog(ML_NOTICE, "Requested local alloc window %d is larger " | ||
131 | "than max possible %u. Using defaults.\n", | ||
132 | ocfs2_local_alloc_window_bits(osb), (osb->bitmap_cpg - 1)); | ||
133 | osb->local_alloc_size = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; | ||
134 | } | ||
135 | |||
124 | /* read the alloc off disk */ | 136 | /* read the alloc off disk */ |
125 | inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, | 137 | inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, |
126 | osb->slot_num); | 138 | osb->slot_num); |
@@ -181,6 +193,9 @@ bail: | |||
181 | if (inode) | 193 | if (inode) |
182 | iput(inode); | 194 | iput(inode); |
183 | 195 | ||
196 | mlog(0, "Local alloc window bits = %d\n", | ||
197 | ocfs2_local_alloc_window_bits(osb)); | ||
198 | |||
184 | mlog_exit(status); | 199 | mlog_exit(status); |
185 | return status; | 200 | return status; |
186 | } | 201 | } |
@@ -231,7 +246,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) | |||
231 | 246 | ||
232 | mutex_lock(&main_bm_inode->i_mutex); | 247 | mutex_lock(&main_bm_inode->i_mutex); |
233 | 248 | ||
234 | status = ocfs2_meta_lock(main_bm_inode, &main_bm_bh, 1); | 249 | status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); |
235 | if (status < 0) { | 250 | if (status < 0) { |
236 | mlog_errno(status); | 251 | mlog_errno(status); |
237 | goto out_mutex; | 252 | goto out_mutex; |
@@ -286,7 +301,7 @@ out_unlock: | |||
286 | if (main_bm_bh) | 301 | if (main_bm_bh) |
287 | brelse(main_bm_bh); | 302 | brelse(main_bm_bh); |
288 | 303 | ||
289 | ocfs2_meta_unlock(main_bm_inode, 1); | 304 | ocfs2_inode_unlock(main_bm_inode, 1); |
290 | 305 | ||
291 | out_mutex: | 306 | out_mutex: |
292 | mutex_unlock(&main_bm_inode->i_mutex); | 307 | mutex_unlock(&main_bm_inode->i_mutex); |
@@ -399,7 +414,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb, | |||
399 | 414 | ||
400 | mutex_lock(&main_bm_inode->i_mutex); | 415 | mutex_lock(&main_bm_inode->i_mutex); |
401 | 416 | ||
402 | status = ocfs2_meta_lock(main_bm_inode, &main_bm_bh, 1); | 417 | status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); |
403 | if (status < 0) { | 418 | if (status < 0) { |
404 | mlog_errno(status); | 419 | mlog_errno(status); |
405 | goto out_mutex; | 420 | goto out_mutex; |
@@ -424,7 +439,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb, | |||
424 | ocfs2_commit_trans(osb, handle); | 439 | ocfs2_commit_trans(osb, handle); |
425 | 440 | ||
426 | out_unlock: | 441 | out_unlock: |
427 | ocfs2_meta_unlock(main_bm_inode, 1); | 442 | ocfs2_inode_unlock(main_bm_inode, 1); |
428 | 443 | ||
429 | out_mutex: | 444 | out_mutex: |
430 | mutex_unlock(&main_bm_inode->i_mutex); | 445 | mutex_unlock(&main_bm_inode->i_mutex); |
@@ -521,6 +536,9 @@ bail: | |||
521 | iput(local_alloc_inode); | 536 | iput(local_alloc_inode); |
522 | } | 537 | } |
523 | 538 | ||
539 | mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num, | ||
540 | status); | ||
541 | |||
524 | mlog_exit(status); | 542 | mlog_exit(status); |
525 | return status; | 543 | return status; |
526 | } | 544 | } |
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c new file mode 100644 index 000000000000..203f87143877 --- /dev/null +++ b/fs/ocfs2/locks.c | |||
@@ -0,0 +1,125 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * locks.c | ||
5 | * | ||
6 | * Userspace file locking support | ||
7 | * | ||
8 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #include <linux/fs.h> | ||
27 | |||
28 | #define MLOG_MASK_PREFIX ML_INODE | ||
29 | #include <cluster/masklog.h> | ||
30 | |||
31 | #include "ocfs2.h" | ||
32 | |||
33 | #include "dlmglue.h" | ||
34 | #include "file.h" | ||
35 | #include "locks.h" | ||
36 | |||
37 | static int ocfs2_do_flock(struct file *file, struct inode *inode, | ||
38 | int cmd, struct file_lock *fl) | ||
39 | { | ||
40 | int ret = 0, level = 0, trylock = 0; | ||
41 | struct ocfs2_file_private *fp = file->private_data; | ||
42 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | ||
43 | |||
44 | if (fl->fl_type == F_WRLCK) | ||
45 | level = 1; | ||
46 | if (!IS_SETLKW(cmd)) | ||
47 | trylock = 1; | ||
48 | |||
49 | mutex_lock(&fp->fp_mutex); | ||
50 | |||
51 | if (lockres->l_flags & OCFS2_LOCK_ATTACHED && | ||
52 | lockres->l_level > LKM_NLMODE) { | ||
53 | int old_level = 0; | ||
54 | |||
55 | if (lockres->l_level == LKM_EXMODE) | ||
56 | old_level = 1; | ||
57 | |||
58 | if (level == old_level) | ||
59 | goto out; | ||
60 | |||
61 | /* | ||
62 | * Converting an existing lock is not guaranteed to be | ||
63 | * atomic, so we can get away with simply unlocking | ||
64 | * here and allowing the lock code to try at the new | ||
65 | * level. | ||
66 | */ | ||
67 | |||
68 | flock_lock_file_wait(file, | ||
69 | &(struct file_lock){.fl_type = F_UNLCK}); | ||
70 | |||
71 | ocfs2_file_unlock(file); | ||
72 | } | ||
73 | |||
74 | ret = ocfs2_file_lock(file, level, trylock); | ||
75 | if (ret) { | ||
76 | if (ret == -EAGAIN && trylock) | ||
77 | ret = -EWOULDBLOCK; | ||
78 | else | ||
79 | mlog_errno(ret); | ||
80 | goto out; | ||
81 | } | ||
82 | |||
83 | ret = flock_lock_file_wait(file, fl); | ||
84 | |||
85 | out: | ||
86 | mutex_unlock(&fp->fp_mutex); | ||
87 | |||
88 | return ret; | ||
89 | } | ||
90 | |||
91 | static int ocfs2_do_funlock(struct file *file, int cmd, struct file_lock *fl) | ||
92 | { | ||
93 | int ret; | ||
94 | struct ocfs2_file_private *fp = file->private_data; | ||
95 | |||
96 | mutex_lock(&fp->fp_mutex); | ||
97 | ocfs2_file_unlock(file); | ||
98 | ret = flock_lock_file_wait(file, fl); | ||
99 | mutex_unlock(&fp->fp_mutex); | ||
100 | |||
101 | return ret; | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | * Overall flow of ocfs2_flock() was influenced by gfs2_flock(). | ||
106 | */ | ||
107 | int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl) | ||
108 | { | ||
109 | struct inode *inode = file->f_mapping->host; | ||
110 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
111 | |||
112 | if (!(fl->fl_flags & FL_FLOCK)) | ||
113 | return -ENOLCK; | ||
114 | if (__mandatory_lock(inode)) | ||
115 | return -ENOLCK; | ||
116 | |||
117 | if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) || | ||
118 | ocfs2_mount_local(osb)) | ||
119 | return flock_lock_file_wait(file, fl); | ||
120 | |||
121 | if (fl->fl_type == F_UNLCK) | ||
122 | return ocfs2_do_funlock(file, cmd, fl); | ||
123 | else | ||
124 | return ocfs2_do_flock(file, inode, cmd, fl); | ||
125 | } | ||
diff --git a/fs/ocfs2/vote.h b/fs/ocfs2/locks.h index 9ea46f62de31..9743ef2324ec 100644 --- a/fs/ocfs2/vote.h +++ b/fs/ocfs2/locks.h | |||
@@ -1,9 +1,9 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | 1 | /* -*- mode: c; c-basic-offset: 8; -*- |
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | 2 | * vim: noexpandtab sw=8 ts=8 sts=0: |
3 | * | 3 | * |
4 | * vote.h | 4 | * locks.h |
5 | * | 5 | * |
6 | * description here | 6 | * Function prototypes for Userspace file locking support |
7 | * | 7 | * |
8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. | 8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. |
9 | * | 9 | * |
@@ -23,26 +23,9 @@ | |||
23 | * Boston, MA 021110-1307, USA. | 23 | * Boston, MA 021110-1307, USA. |
24 | */ | 24 | */ |
25 | 25 | ||
26 | #ifndef OCFS2_LOCKS_H | ||
27 | #define OCFS2_LOCKS_H | ||
26 | 28 | ||
27 | #ifndef VOTE_H | 29 | int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl); |
28 | #define VOTE_H | ||
29 | 30 | ||
30 | int ocfs2_vote_thread(void *arg); | 31 | #endif /* OCFS2_LOCKS_H */ |
31 | static inline void ocfs2_kick_vote_thread(struct ocfs2_super *osb) | ||
32 | { | ||
33 | spin_lock(&osb->vote_task_lock); | ||
34 | /* make sure the voting thread gets a swipe at whatever changes | ||
35 | * the caller may have made to the voting state */ | ||
36 | osb->vote_wake_sequence++; | ||
37 | spin_unlock(&osb->vote_task_lock); | ||
38 | wake_up(&osb->vote_event); | ||
39 | } | ||
40 | |||
41 | int ocfs2_request_mount_vote(struct ocfs2_super *osb); | ||
42 | int ocfs2_request_umount_vote(struct ocfs2_super *osb); | ||
43 | int ocfs2_register_net_handlers(struct ocfs2_super *osb); | ||
44 | void ocfs2_unregister_net_handlers(struct ocfs2_super *osb); | ||
45 | |||
46 | void ocfs2_remove_node_from_vote_queues(struct ocfs2_super *osb, | ||
47 | int node_num); | ||
48 | #endif | ||
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 98756156d298..3dc18d67557c 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
@@ -168,7 +168,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
168 | * node. Taking the data lock will also ensure that we don't | 168 | * node. Taking the data lock will also ensure that we don't |
169 | * attempt page truncation as part of a downconvert. | 169 | * attempt page truncation as part of a downconvert. |
170 | */ | 170 | */ |
171 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | 171 | ret = ocfs2_inode_lock(inode, &di_bh, 1); |
172 | if (ret < 0) { | 172 | if (ret < 0) { |
173 | mlog_errno(ret); | 173 | mlog_errno(ret); |
174 | goto out; | 174 | goto out; |
@@ -181,21 +181,12 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
181 | */ | 181 | */ |
182 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 182 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
183 | 183 | ||
184 | ret = ocfs2_data_lock(inode, 1); | ||
185 | if (ret < 0) { | ||
186 | mlog_errno(ret); | ||
187 | goto out_meta_unlock; | ||
188 | } | ||
189 | |||
190 | ret = __ocfs2_page_mkwrite(inode, di_bh, page); | 184 | ret = __ocfs2_page_mkwrite(inode, di_bh, page); |
191 | 185 | ||
192 | ocfs2_data_unlock(inode, 1); | ||
193 | |||
194 | out_meta_unlock: | ||
195 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 186 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
196 | 187 | ||
197 | brelse(di_bh); | 188 | brelse(di_bh); |
198 | ocfs2_meta_unlock(inode, 1); | 189 | ocfs2_inode_unlock(inode, 1); |
199 | 190 | ||
200 | out: | 191 | out: |
201 | ret2 = ocfs2_vm_op_unblock_sigs(&oldset); | 192 | ret2 = ocfs2_vm_op_unblock_sigs(&oldset); |
@@ -214,13 +205,13 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
214 | { | 205 | { |
215 | int ret = 0, lock_level = 0; | 206 | int ret = 0, lock_level = 0; |
216 | 207 | ||
217 | ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode, | 208 | ret = ocfs2_inode_lock_atime(file->f_dentry->d_inode, |
218 | file->f_vfsmnt, &lock_level); | 209 | file->f_vfsmnt, &lock_level); |
219 | if (ret < 0) { | 210 | if (ret < 0) { |
220 | mlog_errno(ret); | 211 | mlog_errno(ret); |
221 | goto out; | 212 | goto out; |
222 | } | 213 | } |
223 | ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level); | 214 | ocfs2_inode_unlock(file->f_dentry->d_inode, lock_level); |
224 | out: | 215 | out: |
225 | vma->vm_ops = &ocfs2_file_vm_ops; | 216 | vma->vm_ops = &ocfs2_file_vm_ops; |
226 | vma->vm_flags |= VM_CAN_NONLINEAR; | 217 | vma->vm_flags |= VM_CAN_NONLINEAR; |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 989ac2718587..ae9ad9587516 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -60,7 +60,6 @@ | |||
60 | #include "symlink.h" | 60 | #include "symlink.h" |
61 | #include "sysfile.h" | 61 | #include "sysfile.h" |
62 | #include "uptodate.h" | 62 | #include "uptodate.h" |
63 | #include "vote.h" | ||
64 | 63 | ||
65 | #include "buffer_head_io.h" | 64 | #include "buffer_head_io.h" |
66 | 65 | ||
@@ -116,7 +115,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, | |||
116 | mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len, | 115 | mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len, |
117 | dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno); | 116 | dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno); |
118 | 117 | ||
119 | status = ocfs2_meta_lock(dir, NULL, 0); | 118 | status = ocfs2_inode_lock(dir, NULL, 0); |
120 | if (status < 0) { | 119 | if (status < 0) { |
121 | if (status != -ENOENT) | 120 | if (status != -ENOENT) |
122 | mlog_errno(status); | 121 | mlog_errno(status); |
@@ -129,7 +128,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, | |||
129 | if (status < 0) | 128 | if (status < 0) |
130 | goto bail_add; | 129 | goto bail_add; |
131 | 130 | ||
132 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0); | 131 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0); |
133 | if (IS_ERR(inode)) { | 132 | if (IS_ERR(inode)) { |
134 | ret = ERR_PTR(-EACCES); | 133 | ret = ERR_PTR(-EACCES); |
135 | goto bail_unlock; | 134 | goto bail_unlock; |
@@ -176,8 +175,8 @@ bail_unlock: | |||
176 | /* Don't drop the cluster lock until *after* the d_add -- | 175 | /* Don't drop the cluster lock until *after* the d_add -- |
177 | * unlink on another node will message us to remove that | 176 | * unlink on another node will message us to remove that |
178 | * dentry under this lock so otherwise we can race this with | 177 | * dentry under this lock so otherwise we can race this with |
179 | * the vote thread and have a stale dentry. */ | 178 | * the downconvert thread and have a stale dentry. */ |
180 | ocfs2_meta_unlock(dir, 0); | 179 | ocfs2_inode_unlock(dir, 0); |
181 | 180 | ||
182 | bail: | 181 | bail: |
183 | 182 | ||
@@ -209,7 +208,7 @@ static int ocfs2_mknod(struct inode *dir, | |||
209 | /* get our super block */ | 208 | /* get our super block */ |
210 | osb = OCFS2_SB(dir->i_sb); | 209 | osb = OCFS2_SB(dir->i_sb); |
211 | 210 | ||
212 | status = ocfs2_meta_lock(dir, &parent_fe_bh, 1); | 211 | status = ocfs2_inode_lock(dir, &parent_fe_bh, 1); |
213 | if (status < 0) { | 212 | if (status < 0) { |
214 | if (status != -ENOENT) | 213 | if (status != -ENOENT) |
215 | mlog_errno(status); | 214 | mlog_errno(status); |
@@ -323,7 +322,7 @@ leave: | |||
323 | if (handle) | 322 | if (handle) |
324 | ocfs2_commit_trans(osb, handle); | 323 | ocfs2_commit_trans(osb, handle); |
325 | 324 | ||
326 | ocfs2_meta_unlock(dir, 1); | 325 | ocfs2_inode_unlock(dir, 1); |
327 | 326 | ||
328 | if (status == -ENOSPC) | 327 | if (status == -ENOSPC) |
329 | mlog(0, "Disk is full\n"); | 328 | mlog(0, "Disk is full\n"); |
@@ -553,7 +552,7 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
553 | if (S_ISDIR(inode->i_mode)) | 552 | if (S_ISDIR(inode->i_mode)) |
554 | return -EPERM; | 553 | return -EPERM; |
555 | 554 | ||
556 | err = ocfs2_meta_lock(dir, &parent_fe_bh, 1); | 555 | err = ocfs2_inode_lock(dir, &parent_fe_bh, 1); |
557 | if (err < 0) { | 556 | if (err < 0) { |
558 | if (err != -ENOENT) | 557 | if (err != -ENOENT) |
559 | mlog_errno(err); | 558 | mlog_errno(err); |
@@ -578,7 +577,7 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
578 | goto out; | 577 | goto out; |
579 | } | 578 | } |
580 | 579 | ||
581 | err = ocfs2_meta_lock(inode, &fe_bh, 1); | 580 | err = ocfs2_inode_lock(inode, &fe_bh, 1); |
582 | if (err < 0) { | 581 | if (err < 0) { |
583 | if (err != -ENOENT) | 582 | if (err != -ENOENT) |
584 | mlog_errno(err); | 583 | mlog_errno(err); |
@@ -643,10 +642,10 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
643 | out_commit: | 642 | out_commit: |
644 | ocfs2_commit_trans(osb, handle); | 643 | ocfs2_commit_trans(osb, handle); |
645 | out_unlock_inode: | 644 | out_unlock_inode: |
646 | ocfs2_meta_unlock(inode, 1); | 645 | ocfs2_inode_unlock(inode, 1); |
647 | 646 | ||
648 | out: | 647 | out: |
649 | ocfs2_meta_unlock(dir, 1); | 648 | ocfs2_inode_unlock(dir, 1); |
650 | 649 | ||
651 | if (de_bh) | 650 | if (de_bh) |
652 | brelse(de_bh); | 651 | brelse(de_bh); |
@@ -720,7 +719,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
720 | return -EPERM; | 719 | return -EPERM; |
721 | } | 720 | } |
722 | 721 | ||
723 | status = ocfs2_meta_lock(dir, &parent_node_bh, 1); | 722 | status = ocfs2_inode_lock(dir, &parent_node_bh, 1); |
724 | if (status < 0) { | 723 | if (status < 0) { |
725 | if (status != -ENOENT) | 724 | if (status != -ENOENT) |
726 | mlog_errno(status); | 725 | mlog_errno(status); |
@@ -745,7 +744,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
745 | goto leave; | 744 | goto leave; |
746 | } | 745 | } |
747 | 746 | ||
748 | status = ocfs2_meta_lock(inode, &fe_bh, 1); | 747 | status = ocfs2_inode_lock(inode, &fe_bh, 1); |
749 | if (status < 0) { | 748 | if (status < 0) { |
750 | if (status != -ENOENT) | 749 | if (status != -ENOENT) |
751 | mlog_errno(status); | 750 | mlog_errno(status); |
@@ -765,7 +764,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
765 | 764 | ||
766 | status = ocfs2_remote_dentry_delete(dentry); | 765 | status = ocfs2_remote_dentry_delete(dentry); |
767 | if (status < 0) { | 766 | if (status < 0) { |
768 | /* This vote should succeed under all normal | 767 | /* This remote delete should succeed under all normal |
769 | * circumstances. */ | 768 | * circumstances. */ |
770 | mlog_errno(status); | 769 | mlog_errno(status); |
771 | goto leave; | 770 | goto leave; |
@@ -841,13 +840,13 @@ leave: | |||
841 | ocfs2_commit_trans(osb, handle); | 840 | ocfs2_commit_trans(osb, handle); |
842 | 841 | ||
843 | if (child_locked) | 842 | if (child_locked) |
844 | ocfs2_meta_unlock(inode, 1); | 843 | ocfs2_inode_unlock(inode, 1); |
845 | 844 | ||
846 | ocfs2_meta_unlock(dir, 1); | 845 | ocfs2_inode_unlock(dir, 1); |
847 | 846 | ||
848 | if (orphan_dir) { | 847 | if (orphan_dir) { |
849 | /* This was locked for us in ocfs2_prepare_orphan_dir() */ | 848 | /* This was locked for us in ocfs2_prepare_orphan_dir() */ |
850 | ocfs2_meta_unlock(orphan_dir, 1); | 849 | ocfs2_inode_unlock(orphan_dir, 1); |
851 | mutex_unlock(&orphan_dir->i_mutex); | 850 | mutex_unlock(&orphan_dir->i_mutex); |
852 | iput(orphan_dir); | 851 | iput(orphan_dir); |
853 | } | 852 | } |
@@ -908,7 +907,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, | |||
908 | inode1 = tmpinode; | 907 | inode1 = tmpinode; |
909 | } | 908 | } |
910 | /* lock id2 */ | 909 | /* lock id2 */ |
911 | status = ocfs2_meta_lock(inode2, bh2, 1); | 910 | status = ocfs2_inode_lock(inode2, bh2, 1); |
912 | if (status < 0) { | 911 | if (status < 0) { |
913 | if (status != -ENOENT) | 912 | if (status != -ENOENT) |
914 | mlog_errno(status); | 913 | mlog_errno(status); |
@@ -917,14 +916,14 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, | |||
917 | } | 916 | } |
918 | 917 | ||
919 | /* lock id1 */ | 918 | /* lock id1 */ |
920 | status = ocfs2_meta_lock(inode1, bh1, 1); | 919 | status = ocfs2_inode_lock(inode1, bh1, 1); |
921 | if (status < 0) { | 920 | if (status < 0) { |
922 | /* | 921 | /* |
923 | * An error return must mean that no cluster locks | 922 | * An error return must mean that no cluster locks |
924 | * were held on function exit. | 923 | * were held on function exit. |
925 | */ | 924 | */ |
926 | if (oi1->ip_blkno != oi2->ip_blkno) | 925 | if (oi1->ip_blkno != oi2->ip_blkno) |
927 | ocfs2_meta_unlock(inode2, 1); | 926 | ocfs2_inode_unlock(inode2, 1); |
928 | 927 | ||
929 | if (status != -ENOENT) | 928 | if (status != -ENOENT) |
930 | mlog_errno(status); | 929 | mlog_errno(status); |
@@ -937,10 +936,10 @@ bail: | |||
937 | 936 | ||
938 | static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2) | 937 | static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2) |
939 | { | 938 | { |
940 | ocfs2_meta_unlock(inode1, 1); | 939 | ocfs2_inode_unlock(inode1, 1); |
941 | 940 | ||
942 | if (inode1 != inode2) | 941 | if (inode1 != inode2) |
943 | ocfs2_meta_unlock(inode2, 1); | 942 | ocfs2_inode_unlock(inode2, 1); |
944 | } | 943 | } |
945 | 944 | ||
946 | static int ocfs2_rename(struct inode *old_dir, | 945 | static int ocfs2_rename(struct inode *old_dir, |
@@ -1031,10 +1030,11 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1031 | 1030 | ||
1032 | /* | 1031 | /* |
1033 | * Aside from allowing a meta data update, the locking here | 1032 | * Aside from allowing a meta data update, the locking here |
1034 | * also ensures that the vote thread on other nodes won't have | 1033 | * also ensures that the downconvert thread on other nodes |
1035 | * to concurrently downconvert the inode and the dentry locks. | 1034 | * won't have to concurrently downconvert the inode and the |
1035 | * dentry locks. | ||
1036 | */ | 1036 | */ |
1037 | status = ocfs2_meta_lock(old_inode, &old_inode_bh, 1); | 1037 | status = ocfs2_inode_lock(old_inode, &old_inode_bh, 1); |
1038 | if (status < 0) { | 1038 | if (status < 0) { |
1039 | if (status != -ENOENT) | 1039 | if (status != -ENOENT) |
1040 | mlog_errno(status); | 1040 | mlog_errno(status); |
@@ -1143,7 +1143,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1143 | goto bail; | 1143 | goto bail; |
1144 | } | 1144 | } |
1145 | 1145 | ||
1146 | status = ocfs2_meta_lock(new_inode, &newfe_bh, 1); | 1146 | status = ocfs2_inode_lock(new_inode, &newfe_bh, 1); |
1147 | if (status < 0) { | 1147 | if (status < 0) { |
1148 | if (status != -ENOENT) | 1148 | if (status != -ENOENT) |
1149 | mlog_errno(status); | 1149 | mlog_errno(status); |
@@ -1355,14 +1355,14 @@ bail: | |||
1355 | ocfs2_double_unlock(old_dir, new_dir); | 1355 | ocfs2_double_unlock(old_dir, new_dir); |
1356 | 1356 | ||
1357 | if (old_child_locked) | 1357 | if (old_child_locked) |
1358 | ocfs2_meta_unlock(old_inode, 1); | 1358 | ocfs2_inode_unlock(old_inode, 1); |
1359 | 1359 | ||
1360 | if (new_child_locked) | 1360 | if (new_child_locked) |
1361 | ocfs2_meta_unlock(new_inode, 1); | 1361 | ocfs2_inode_unlock(new_inode, 1); |
1362 | 1362 | ||
1363 | if (orphan_dir) { | 1363 | if (orphan_dir) { |
1364 | /* This was locked for us in ocfs2_prepare_orphan_dir() */ | 1364 | /* This was locked for us in ocfs2_prepare_orphan_dir() */ |
1365 | ocfs2_meta_unlock(orphan_dir, 1); | 1365 | ocfs2_inode_unlock(orphan_dir, 1); |
1366 | mutex_unlock(&orphan_dir->i_mutex); | 1366 | mutex_unlock(&orphan_dir->i_mutex); |
1367 | iput(orphan_dir); | 1367 | iput(orphan_dir); |
1368 | } | 1368 | } |
@@ -1530,7 +1530,7 @@ static int ocfs2_symlink(struct inode *dir, | |||
1530 | credits = ocfs2_calc_symlink_credits(sb); | 1530 | credits = ocfs2_calc_symlink_credits(sb); |
1531 | 1531 | ||
1532 | /* lock the parent directory */ | 1532 | /* lock the parent directory */ |
1533 | status = ocfs2_meta_lock(dir, &parent_fe_bh, 1); | 1533 | status = ocfs2_inode_lock(dir, &parent_fe_bh, 1); |
1534 | if (status < 0) { | 1534 | if (status < 0) { |
1535 | if (status != -ENOENT) | 1535 | if (status != -ENOENT) |
1536 | mlog_errno(status); | 1536 | mlog_errno(status); |
@@ -1657,7 +1657,7 @@ bail: | |||
1657 | if (handle) | 1657 | if (handle) |
1658 | ocfs2_commit_trans(osb, handle); | 1658 | ocfs2_commit_trans(osb, handle); |
1659 | 1659 | ||
1660 | ocfs2_meta_unlock(dir, 1); | 1660 | ocfs2_inode_unlock(dir, 1); |
1661 | 1661 | ||
1662 | if (new_fe_bh) | 1662 | if (new_fe_bh) |
1663 | brelse(new_fe_bh); | 1663 | brelse(new_fe_bh); |
@@ -1735,7 +1735,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | |||
1735 | 1735 | ||
1736 | mutex_lock(&orphan_dir_inode->i_mutex); | 1736 | mutex_lock(&orphan_dir_inode->i_mutex); |
1737 | 1737 | ||
1738 | status = ocfs2_meta_lock(orphan_dir_inode, &orphan_dir_bh, 1); | 1738 | status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); |
1739 | if (status < 0) { | 1739 | if (status < 0) { |
1740 | mlog_errno(status); | 1740 | mlog_errno(status); |
1741 | goto leave; | 1741 | goto leave; |
@@ -1745,7 +1745,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | |||
1745 | orphan_dir_bh, name, | 1745 | orphan_dir_bh, name, |
1746 | OCFS2_ORPHAN_NAMELEN, de_bh); | 1746 | OCFS2_ORPHAN_NAMELEN, de_bh); |
1747 | if (status < 0) { | 1747 | if (status < 0) { |
1748 | ocfs2_meta_unlock(orphan_dir_inode, 1); | 1748 | ocfs2_inode_unlock(orphan_dir_inode, 1); |
1749 | 1749 | ||
1750 | mlog_errno(status); | 1750 | mlog_errno(status); |
1751 | goto leave; | 1751 | goto leave; |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 60a23e1906b0..d08480580470 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -101,6 +101,7 @@ enum ocfs2_unlock_action { | |||
101 | * about to be | 101 | * about to be |
102 | * dropped. */ | 102 | * dropped. */ |
103 | #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ | 103 | #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ |
104 | #define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */ | ||
104 | 105 | ||
105 | struct ocfs2_lock_res_ops; | 106 | struct ocfs2_lock_res_ops; |
106 | 107 | ||
@@ -170,6 +171,7 @@ enum ocfs2_mount_options | |||
170 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ | 171 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ |
171 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ | 172 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ |
172 | OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ | 173 | OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ |
174 | OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ | ||
173 | }; | 175 | }; |
174 | 176 | ||
175 | #define OCFS2_OSB_SOFT_RO 0x0001 | 177 | #define OCFS2_OSB_SOFT_RO 0x0001 |
@@ -189,9 +191,7 @@ struct ocfs2_super | |||
189 | struct ocfs2_slot_info *slot_info; | 191 | struct ocfs2_slot_info *slot_info; |
190 | 192 | ||
191 | spinlock_t node_map_lock; | 193 | spinlock_t node_map_lock; |
192 | struct ocfs2_node_map mounted_map; | ||
193 | struct ocfs2_node_map recovery_map; | 194 | struct ocfs2_node_map recovery_map; |
194 | struct ocfs2_node_map umount_map; | ||
195 | 195 | ||
196 | u64 root_blkno; | 196 | u64 root_blkno; |
197 | u64 system_dir_blkno; | 197 | u64 system_dir_blkno; |
@@ -231,7 +231,9 @@ struct ocfs2_super | |||
231 | wait_queue_head_t checkpoint_event; | 231 | wait_queue_head_t checkpoint_event; |
232 | atomic_t needs_checkpoint; | 232 | atomic_t needs_checkpoint; |
233 | struct ocfs2_journal *journal; | 233 | struct ocfs2_journal *journal; |
234 | unsigned long osb_commit_interval; | ||
234 | 235 | ||
236 | int local_alloc_size; | ||
235 | enum ocfs2_local_alloc_state local_alloc_state; | 237 | enum ocfs2_local_alloc_state local_alloc_state; |
236 | struct buffer_head *local_alloc_bh; | 238 | struct buffer_head *local_alloc_bh; |
237 | u64 la_last_gd; | 239 | u64 la_last_gd; |
@@ -254,28 +256,21 @@ struct ocfs2_super | |||
254 | 256 | ||
255 | wait_queue_head_t recovery_event; | 257 | wait_queue_head_t recovery_event; |
256 | 258 | ||
257 | spinlock_t vote_task_lock; | 259 | spinlock_t dc_task_lock; |
258 | struct task_struct *vote_task; | 260 | struct task_struct *dc_task; |
259 | wait_queue_head_t vote_event; | 261 | wait_queue_head_t dc_event; |
260 | unsigned long vote_wake_sequence; | 262 | unsigned long dc_wake_sequence; |
261 | unsigned long vote_work_sequence; | 263 | unsigned long dc_work_sequence; |
262 | 264 | ||
265 | /* | ||
266 | * Any thread can add locks to the list, but the downconvert | ||
267 | * thread is the only one allowed to remove locks. Any change | ||
268 | * to this rule requires updating | ||
269 | * ocfs2_downconvert_thread_do_work(). | ||
270 | */ | ||
263 | struct list_head blocked_lock_list; | 271 | struct list_head blocked_lock_list; |
264 | unsigned long blocked_lock_count; | 272 | unsigned long blocked_lock_count; |
265 | 273 | ||
266 | struct list_head vote_list; | ||
267 | int vote_count; | ||
268 | |||
269 | u32 net_key; | ||
270 | spinlock_t net_response_lock; | ||
271 | unsigned int net_response_ids; | ||
272 | struct list_head net_response_list; | ||
273 | |||
274 | struct o2hb_callback_func osb_hb_up; | ||
275 | struct o2hb_callback_func osb_hb_down; | ||
276 | |||
277 | struct list_head osb_net_handlers; | ||
278 | |||
279 | wait_queue_head_t osb_mount_event; | 274 | wait_queue_head_t osb_mount_event; |
280 | 275 | ||
281 | /* Truncate log info */ | 276 | /* Truncate log info */ |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 6ef876759a73..3633edd3982f 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -231,6 +231,20 @@ struct ocfs2_space_resv { | |||
231 | #define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv) | 231 | #define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv) |
232 | #define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv) | 232 | #define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv) |
233 | 233 | ||
234 | /* Used to pass group descriptor data when online resize is done */ | ||
235 | struct ocfs2_new_group_input { | ||
236 | __u64 group; /* Group descriptor's blkno. */ | ||
237 | __u32 clusters; /* Total number of clusters in this group */ | ||
238 | __u32 frees; /* Total free clusters in this group */ | ||
239 | __u16 chain; /* Chain for this group */ | ||
240 | __u16 reserved1; | ||
241 | __u32 reserved2; | ||
242 | }; | ||
243 | |||
244 | #define OCFS2_IOC_GROUP_EXTEND _IOW('o', 1, int) | ||
245 | #define OCFS2_IOC_GROUP_ADD _IOW('o', 2,struct ocfs2_new_group_input) | ||
246 | #define OCFS2_IOC_GROUP_ADD64 _IOW('o', 3,struct ocfs2_new_group_input) | ||
247 | |||
234 | /* | 248 | /* |
235 | * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) | 249 | * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) |
236 | */ | 250 | */ |
@@ -256,6 +270,14 @@ struct ocfs2_space_resv { | |||
256 | /* Journal limits (in bytes) */ | 270 | /* Journal limits (in bytes) */ |
257 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) | 271 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) |
258 | 272 | ||
273 | /* | ||
274 | * Default local alloc size (in megabytes) | ||
275 | * | ||
276 | * The value chosen should be such that most allocations, including new | ||
277 | * block groups, use local alloc. | ||
278 | */ | ||
279 | #define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8 | ||
280 | |||
259 | struct ocfs2_system_inode_info { | 281 | struct ocfs2_system_inode_info { |
260 | char *si_name; | 282 | char *si_name; |
261 | int si_iflags; | 283 | int si_iflags; |
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index 4ca02b1c38ac..86f3e3799c2b 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h | |||
@@ -45,6 +45,7 @@ enum ocfs2_lock_type { | |||
45 | OCFS2_LOCK_TYPE_RW, | 45 | OCFS2_LOCK_TYPE_RW, |
46 | OCFS2_LOCK_TYPE_DENTRY, | 46 | OCFS2_LOCK_TYPE_DENTRY, |
47 | OCFS2_LOCK_TYPE_OPEN, | 47 | OCFS2_LOCK_TYPE_OPEN, |
48 | OCFS2_LOCK_TYPE_FLOCK, | ||
48 | OCFS2_NUM_LOCK_TYPES | 49 | OCFS2_NUM_LOCK_TYPES |
49 | }; | 50 | }; |
50 | 51 | ||
@@ -73,6 +74,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type) | |||
73 | case OCFS2_LOCK_TYPE_OPEN: | 74 | case OCFS2_LOCK_TYPE_OPEN: |
74 | c = 'O'; | 75 | c = 'O'; |
75 | break; | 76 | break; |
77 | case OCFS2_LOCK_TYPE_FLOCK: | ||
78 | c = 'F'; | ||
79 | break; | ||
76 | default: | 80 | default: |
77 | c = '\0'; | 81 | c = '\0'; |
78 | } | 82 | } |
@@ -90,6 +94,7 @@ static char *ocfs2_lock_type_strings[] = { | |||
90 | [OCFS2_LOCK_TYPE_RW] = "Write/Read", | 94 | [OCFS2_LOCK_TYPE_RW] = "Write/Read", |
91 | [OCFS2_LOCK_TYPE_DENTRY] = "Dentry", | 95 | [OCFS2_LOCK_TYPE_DENTRY] = "Dentry", |
92 | [OCFS2_LOCK_TYPE_OPEN] = "Open", | 96 | [OCFS2_LOCK_TYPE_OPEN] = "Open", |
97 | [OCFS2_LOCK_TYPE_FLOCK] = "Flock", | ||
93 | }; | 98 | }; |
94 | 99 | ||
95 | static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) | 100 | static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) |
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c new file mode 100644 index 000000000000..37835ffcb039 --- /dev/null +++ b/fs/ocfs2/resize.c | |||
@@ -0,0 +1,634 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * resize.c | ||
5 | * | ||
6 | * volume resize. | ||
7 | * Inspired by ext3/resize.c. | ||
8 | * | ||
9 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or | ||
12 | * modify it under the terms of the GNU General Public | ||
13 | * License as published by the Free Software Foundation; either | ||
14 | * version 2 of the License, or (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
19 | * General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public | ||
22 | * License along with this program; if not, write to the | ||
23 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
24 | * Boston, MA 021110-1307, USA. | ||
25 | */ | ||
26 | |||
27 | #include <linux/fs.h> | ||
28 | #include <linux/types.h> | ||
29 | |||
30 | #define MLOG_MASK_PREFIX ML_DISK_ALLOC | ||
31 | #include <cluster/masklog.h> | ||
32 | |||
33 | #include "ocfs2.h" | ||
34 | |||
35 | #include "alloc.h" | ||
36 | #include "dlmglue.h" | ||
37 | #include "inode.h" | ||
38 | #include "journal.h" | ||
39 | #include "super.h" | ||
40 | #include "sysfile.h" | ||
41 | #include "uptodate.h" | ||
42 | |||
43 | #include "buffer_head_io.h" | ||
44 | #include "suballoc.h" | ||
45 | #include "resize.h" | ||
46 | |||
47 | /* | ||
48 | * Check whether there are new backup superblocks exist | ||
49 | * in the last group. If there are some, mark them or clear | ||
50 | * them in the bitmap. | ||
51 | * | ||
52 | * Return how many backups we find in the last group. | ||
53 | */ | ||
54 | static u16 ocfs2_calc_new_backup_super(struct inode *inode, | ||
55 | struct ocfs2_group_desc *gd, | ||
56 | int new_clusters, | ||
57 | u32 first_new_cluster, | ||
58 | u16 cl_cpg, | ||
59 | int set) | ||
60 | { | ||
61 | int i; | ||
62 | u16 backups = 0; | ||
63 | u32 cluster; | ||
64 | u64 blkno, gd_blkno, lgd_blkno = le64_to_cpu(gd->bg_blkno); | ||
65 | |||
66 | for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) { | ||
67 | blkno = ocfs2_backup_super_blkno(inode->i_sb, i); | ||
68 | cluster = ocfs2_blocks_to_clusters(inode->i_sb, blkno); | ||
69 | |||
70 | gd_blkno = ocfs2_which_cluster_group(inode, cluster); | ||
71 | if (gd_blkno < lgd_blkno) | ||
72 | continue; | ||
73 | else if (gd_blkno > lgd_blkno) | ||
74 | break; | ||
75 | |||
76 | if (set) | ||
77 | ocfs2_set_bit(cluster % cl_cpg, | ||
78 | (unsigned long *)gd->bg_bitmap); | ||
79 | else | ||
80 | ocfs2_clear_bit(cluster % cl_cpg, | ||
81 | (unsigned long *)gd->bg_bitmap); | ||
82 | backups++; | ||
83 | } | ||
84 | |||
85 | mlog_exit_void(); | ||
86 | return backups; | ||
87 | } | ||
88 | |||
89 | static int ocfs2_update_last_group_and_inode(handle_t *handle, | ||
90 | struct inode *bm_inode, | ||
91 | struct buffer_head *bm_bh, | ||
92 | struct buffer_head *group_bh, | ||
93 | u32 first_new_cluster, | ||
94 | int new_clusters) | ||
95 | { | ||
96 | int ret = 0; | ||
97 | struct ocfs2_super *osb = OCFS2_SB(bm_inode->i_sb); | ||
98 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bm_bh->b_data; | ||
99 | struct ocfs2_chain_list *cl = &fe->id2.i_chain; | ||
100 | struct ocfs2_chain_rec *cr; | ||
101 | struct ocfs2_group_desc *group; | ||
102 | u16 chain, num_bits, backups = 0; | ||
103 | u16 cl_bpc = le16_to_cpu(cl->cl_bpc); | ||
104 | u16 cl_cpg = le16_to_cpu(cl->cl_cpg); | ||
105 | |||
106 | mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n", | ||
107 | new_clusters, first_new_cluster); | ||
108 | |||
109 | ret = ocfs2_journal_access(handle, bm_inode, group_bh, | ||
110 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
111 | if (ret < 0) { | ||
112 | mlog_errno(ret); | ||
113 | goto out; | ||
114 | } | ||
115 | |||
116 | group = (struct ocfs2_group_desc *)group_bh->b_data; | ||
117 | |||
118 | /* update the group first. */ | ||
119 | num_bits = new_clusters * cl_bpc; | ||
120 | le16_add_cpu(&group->bg_bits, num_bits); | ||
121 | le16_add_cpu(&group->bg_free_bits_count, num_bits); | ||
122 | |||
123 | /* | ||
124 | * check whether there are some new backup superblocks exist in | ||
125 | * this group and update the group bitmap accordingly. | ||
126 | */ | ||
127 | if (OCFS2_HAS_COMPAT_FEATURE(osb->sb, | ||
128 | OCFS2_FEATURE_COMPAT_BACKUP_SB)) { | ||
129 | backups = ocfs2_calc_new_backup_super(bm_inode, | ||
130 | group, | ||
131 | new_clusters, | ||
132 | first_new_cluster, | ||
133 | cl_cpg, 1); | ||
134 | le16_add_cpu(&group->bg_free_bits_count, -1 * backups); | ||
135 | } | ||
136 | |||
137 | ret = ocfs2_journal_dirty(handle, group_bh); | ||
138 | if (ret < 0) { | ||
139 | mlog_errno(ret); | ||
140 | goto out_rollback; | ||
141 | } | ||
142 | |||
143 | /* update the inode accordingly. */ | ||
144 | ret = ocfs2_journal_access(handle, bm_inode, bm_bh, | ||
145 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
146 | if (ret < 0) { | ||
147 | mlog_errno(ret); | ||
148 | goto out_rollback; | ||
149 | } | ||
150 | |||
151 | chain = le16_to_cpu(group->bg_chain); | ||
152 | cr = (&cl->cl_recs[chain]); | ||
153 | le32_add_cpu(&cr->c_total, num_bits); | ||
154 | le32_add_cpu(&cr->c_free, num_bits); | ||
155 | le32_add_cpu(&fe->id1.bitmap1.i_total, num_bits); | ||
156 | le32_add_cpu(&fe->i_clusters, new_clusters); | ||
157 | |||
158 | if (backups) { | ||
159 | le32_add_cpu(&cr->c_free, -1 * backups); | ||
160 | le32_add_cpu(&fe->id1.bitmap1.i_used, backups); | ||
161 | } | ||
162 | |||
163 | spin_lock(&OCFS2_I(bm_inode)->ip_lock); | ||
164 | OCFS2_I(bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); | ||
165 | le64_add_cpu(&fe->i_size, new_clusters << osb->s_clustersize_bits); | ||
166 | spin_unlock(&OCFS2_I(bm_inode)->ip_lock); | ||
167 | i_size_write(bm_inode, le64_to_cpu(fe->i_size)); | ||
168 | |||
169 | ocfs2_journal_dirty(handle, bm_bh); | ||
170 | |||
171 | out_rollback: | ||
172 | if (ret < 0) { | ||
173 | ocfs2_calc_new_backup_super(bm_inode, | ||
174 | group, | ||
175 | new_clusters, | ||
176 | first_new_cluster, | ||
177 | cl_cpg, 0); | ||
178 | le16_add_cpu(&group->bg_free_bits_count, backups); | ||
179 | le16_add_cpu(&group->bg_bits, -1 * num_bits); | ||
180 | le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits); | ||
181 | } | ||
182 | out: | ||
183 | mlog_exit(ret); | ||
184 | return ret; | ||
185 | } | ||
186 | |||
187 | static int update_backups(struct inode * inode, u32 clusters, char *data) | ||
188 | { | ||
189 | int i, ret = 0; | ||
190 | u32 cluster; | ||
191 | u64 blkno; | ||
192 | struct buffer_head *backup = NULL; | ||
193 | struct ocfs2_dinode *backup_di = NULL; | ||
194 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
195 | |||
196 | /* calculate the real backups we need to update. */ | ||
197 | for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) { | ||
198 | blkno = ocfs2_backup_super_blkno(inode->i_sb, i); | ||
199 | cluster = ocfs2_blocks_to_clusters(inode->i_sb, blkno); | ||
200 | if (cluster > clusters) | ||
201 | break; | ||
202 | |||
203 | ret = ocfs2_read_block(osb, blkno, &backup, 0, NULL); | ||
204 | if (ret < 0) { | ||
205 | mlog_errno(ret); | ||
206 | break; | ||
207 | } | ||
208 | |||
209 | memcpy(backup->b_data, data, inode->i_sb->s_blocksize); | ||
210 | |||
211 | backup_di = (struct ocfs2_dinode *)backup->b_data; | ||
212 | backup_di->i_blkno = cpu_to_le64(blkno); | ||
213 | |||
214 | ret = ocfs2_write_super_or_backup(osb, backup); | ||
215 | brelse(backup); | ||
216 | backup = NULL; | ||
217 | if (ret < 0) { | ||
218 | mlog_errno(ret); | ||
219 | break; | ||
220 | } | ||
221 | } | ||
222 | |||
223 | return ret; | ||
224 | } | ||
225 | |||
226 | static void ocfs2_update_super_and_backups(struct inode *inode, | ||
227 | int new_clusters) | ||
228 | { | ||
229 | int ret; | ||
230 | u32 clusters = 0; | ||
231 | struct buffer_head *super_bh = NULL; | ||
232 | struct ocfs2_dinode *super_di = NULL; | ||
233 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
234 | |||
235 | /* | ||
236 | * update the superblock last. | ||
237 | * It doesn't matter if the write failed. | ||
238 | */ | ||
239 | ret = ocfs2_read_block(osb, OCFS2_SUPER_BLOCK_BLKNO, | ||
240 | &super_bh, 0, NULL); | ||
241 | if (ret < 0) { | ||
242 | mlog_errno(ret); | ||
243 | goto out; | ||
244 | } | ||
245 | |||
246 | super_di = (struct ocfs2_dinode *)super_bh->b_data; | ||
247 | le32_add_cpu(&super_di->i_clusters, new_clusters); | ||
248 | clusters = le32_to_cpu(super_di->i_clusters); | ||
249 | |||
250 | ret = ocfs2_write_super_or_backup(osb, super_bh); | ||
251 | if (ret < 0) { | ||
252 | mlog_errno(ret); | ||
253 | goto out; | ||
254 | } | ||
255 | |||
256 | if (OCFS2_HAS_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_COMPAT_BACKUP_SB)) | ||
257 | ret = update_backups(inode, clusters, super_bh->b_data); | ||
258 | |||
259 | out: | ||
260 | brelse(super_bh); | ||
261 | if (ret) | ||
262 | printk(KERN_WARNING "ocfs2: Failed to update super blocks on %s" | ||
263 | " during fs resize. This condition is not fatal," | ||
264 | " but fsck.ocfs2 should be run to fix it\n", | ||
265 | osb->dev_str); | ||
266 | return; | ||
267 | } | ||
268 | |||
269 | /* | ||
270 | * Extend the filesystem to the new number of clusters specified. This entry | ||
271 | * point is only used to extend the current filesystem to the end of the last | ||
272 | * existing group. | ||
273 | */ | ||
274 | int ocfs2_group_extend(struct inode * inode, int new_clusters) | ||
275 | { | ||
276 | int ret; | ||
277 | handle_t *handle; | ||
278 | struct buffer_head *main_bm_bh = NULL; | ||
279 | struct buffer_head *group_bh = NULL; | ||
280 | struct inode *main_bm_inode = NULL; | ||
281 | struct ocfs2_dinode *fe = NULL; | ||
282 | struct ocfs2_group_desc *group = NULL; | ||
283 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
284 | u16 cl_bpc; | ||
285 | u32 first_new_cluster; | ||
286 | u64 lgd_blkno; | ||
287 | |||
288 | mlog_entry_void(); | ||
289 | |||
290 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) | ||
291 | return -EROFS; | ||
292 | |||
293 | if (new_clusters < 0) | ||
294 | return -EINVAL; | ||
295 | else if (new_clusters == 0) | ||
296 | return 0; | ||
297 | |||
298 | main_bm_inode = ocfs2_get_system_file_inode(osb, | ||
299 | GLOBAL_BITMAP_SYSTEM_INODE, | ||
300 | OCFS2_INVALID_SLOT); | ||
301 | if (!main_bm_inode) { | ||
302 | ret = -EINVAL; | ||
303 | mlog_errno(ret); | ||
304 | goto out; | ||
305 | } | ||
306 | |||
307 | mutex_lock(&main_bm_inode->i_mutex); | ||
308 | |||
309 | ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); | ||
310 | if (ret < 0) { | ||
311 | mlog_errno(ret); | ||
312 | goto out_mutex; | ||
313 | } | ||
314 | |||
315 | fe = (struct ocfs2_dinode *)main_bm_bh->b_data; | ||
316 | |||
317 | if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != | ||
318 | ocfs2_group_bitmap_size(osb->sb) * 8) { | ||
319 | mlog(ML_ERROR, "The disk is too old and small. " | ||
320 | "Force to do offline resize."); | ||
321 | ret = -EINVAL; | ||
322 | goto out_unlock; | ||
323 | } | ||
324 | |||
325 | if (!OCFS2_IS_VALID_DINODE(fe)) { | ||
326 | OCFS2_RO_ON_INVALID_DINODE(main_bm_inode->i_sb, fe); | ||
327 | ret = -EIO; | ||
328 | goto out_unlock; | ||
329 | } | ||
330 | |||
331 | first_new_cluster = le32_to_cpu(fe->i_clusters); | ||
332 | lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, | ||
333 | first_new_cluster - 1); | ||
334 | |||
335 | ret = ocfs2_read_block(osb, lgd_blkno, &group_bh, OCFS2_BH_CACHED, | ||
336 | main_bm_inode); | ||
337 | if (ret < 0) { | ||
338 | mlog_errno(ret); | ||
339 | goto out_unlock; | ||
340 | } | ||
341 | |||
342 | group = (struct ocfs2_group_desc *)group_bh->b_data; | ||
343 | |||
344 | ret = ocfs2_check_group_descriptor(inode->i_sb, fe, group); | ||
345 | if (ret) { | ||
346 | mlog_errno(ret); | ||
347 | goto out_unlock; | ||
348 | } | ||
349 | |||
350 | cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); | ||
351 | if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters > | ||
352 | le16_to_cpu(fe->id2.i_chain.cl_cpg)) { | ||
353 | ret = -EINVAL; | ||
354 | goto out_unlock; | ||
355 | } | ||
356 | |||
357 | mlog(0, "extend the last group at %llu, new clusters = %d\n", | ||
358 | (unsigned long long)le64_to_cpu(group->bg_blkno), new_clusters); | ||
359 | |||
360 | handle = ocfs2_start_trans(osb, OCFS2_GROUP_EXTEND_CREDITS); | ||
361 | if (IS_ERR(handle)) { | ||
362 | mlog_errno(PTR_ERR(handle)); | ||
363 | ret = -EINVAL; | ||
364 | goto out_unlock; | ||
365 | } | ||
366 | |||
367 | /* update the last group descriptor and inode. */ | ||
368 | ret = ocfs2_update_last_group_and_inode(handle, main_bm_inode, | ||
369 | main_bm_bh, group_bh, | ||
370 | first_new_cluster, | ||
371 | new_clusters); | ||
372 | if (ret) { | ||
373 | mlog_errno(ret); | ||
374 | goto out_commit; | ||
375 | } | ||
376 | |||
377 | ocfs2_update_super_and_backups(main_bm_inode, new_clusters); | ||
378 | |||
379 | out_commit: | ||
380 | ocfs2_commit_trans(osb, handle); | ||
381 | out_unlock: | ||
382 | brelse(group_bh); | ||
383 | brelse(main_bm_bh); | ||
384 | |||
385 | ocfs2_inode_unlock(main_bm_inode, 1); | ||
386 | |||
387 | out_mutex: | ||
388 | mutex_unlock(&main_bm_inode->i_mutex); | ||
389 | iput(main_bm_inode); | ||
390 | |||
391 | out: | ||
392 | mlog_exit_void(); | ||
393 | return ret; | ||
394 | } | ||
395 | |||
396 | static int ocfs2_check_new_group(struct inode *inode, | ||
397 | struct ocfs2_dinode *di, | ||
398 | struct ocfs2_new_group_input *input, | ||
399 | struct buffer_head *group_bh) | ||
400 | { | ||
401 | int ret; | ||
402 | struct ocfs2_group_desc *gd; | ||
403 | u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc); | ||
404 | unsigned int max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * | ||
405 | le16_to_cpu(di->id2.i_chain.cl_bpc); | ||
406 | |||
407 | |||
408 | gd = (struct ocfs2_group_desc *)group_bh->b_data; | ||
409 | |||
410 | ret = -EIO; | ||
411 | if (!OCFS2_IS_VALID_GROUP_DESC(gd)) | ||
412 | mlog(ML_ERROR, "Group descriptor # %llu isn't valid.\n", | ||
413 | (unsigned long long)le64_to_cpu(gd->bg_blkno)); | ||
414 | else if (di->i_blkno != gd->bg_parent_dinode) | ||
415 | mlog(ML_ERROR, "Group descriptor # %llu has bad parent " | ||
416 | "pointer (%llu, expected %llu)\n", | ||
417 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
418 | (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), | ||
419 | (unsigned long long)le64_to_cpu(di->i_blkno)); | ||
420 | else if (le16_to_cpu(gd->bg_bits) > max_bits) | ||
421 | mlog(ML_ERROR, "Group descriptor # %llu has bit count of %u\n", | ||
422 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
423 | le16_to_cpu(gd->bg_bits)); | ||
424 | else if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) | ||
425 | mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but " | ||
426 | "claims that %u are free\n", | ||
427 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
428 | le16_to_cpu(gd->bg_bits), | ||
429 | le16_to_cpu(gd->bg_free_bits_count)); | ||
430 | else if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) | ||
431 | mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but " | ||
432 | "max bitmap bits of %u\n", | ||
433 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
434 | le16_to_cpu(gd->bg_bits), | ||
435 | 8 * le16_to_cpu(gd->bg_size)); | ||
436 | else if (le16_to_cpu(gd->bg_chain) != input->chain) | ||
437 | mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u " | ||
438 | "while input has %u set.\n", | ||
439 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
440 | le16_to_cpu(gd->bg_chain), input->chain); | ||
441 | else if (le16_to_cpu(gd->bg_bits) != input->clusters * cl_bpc) | ||
442 | mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but " | ||
443 | "input has %u clusters set\n", | ||
444 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
445 | le16_to_cpu(gd->bg_bits), input->clusters); | ||
446 | else if (le16_to_cpu(gd->bg_free_bits_count) != input->frees * cl_bpc) | ||
447 | mlog(ML_ERROR, "Group descriptor # %llu has free bit count %u " | ||
448 | "but it should have %u set\n", | ||
449 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
450 | le16_to_cpu(gd->bg_bits), | ||
451 | input->frees * cl_bpc); | ||
452 | else | ||
453 | ret = 0; | ||
454 | |||
455 | return ret; | ||
456 | } | ||
457 | |||
458 | static int ocfs2_verify_group_and_input(struct inode *inode, | ||
459 | struct ocfs2_dinode *di, | ||
460 | struct ocfs2_new_group_input *input, | ||
461 | struct buffer_head *group_bh) | ||
462 | { | ||
463 | u16 cl_count = le16_to_cpu(di->id2.i_chain.cl_count); | ||
464 | u16 cl_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg); | ||
465 | u16 next_free = le16_to_cpu(di->id2.i_chain.cl_next_free_rec); | ||
466 | u32 cluster = ocfs2_blocks_to_clusters(inode->i_sb, input->group); | ||
467 | u32 total_clusters = le32_to_cpu(di->i_clusters); | ||
468 | int ret = -EINVAL; | ||
469 | |||
470 | if (cluster < total_clusters) | ||
471 | mlog(ML_ERROR, "add a group which is in the current volume.\n"); | ||
472 | else if (input->chain >= cl_count) | ||
473 | mlog(ML_ERROR, "input chain exceeds the limit.\n"); | ||
474 | else if (next_free != cl_count && next_free != input->chain) | ||
475 | mlog(ML_ERROR, | ||
476 | "the add group should be in chain %u\n", next_free); | ||
477 | else if (total_clusters + input->clusters < total_clusters) | ||
478 | mlog(ML_ERROR, "add group's clusters overflow.\n"); | ||
479 | else if (input->clusters > cl_cpg) | ||
480 | mlog(ML_ERROR, "the cluster exceeds the maximum of a group\n"); | ||
481 | else if (input->frees > input->clusters) | ||
482 | mlog(ML_ERROR, "the free cluster exceeds the total clusters\n"); | ||
483 | else if (total_clusters % cl_cpg != 0) | ||
484 | mlog(ML_ERROR, | ||
485 | "the last group isn't full. Use group extend first.\n"); | ||
486 | else if (input->group != ocfs2_which_cluster_group(inode, cluster)) | ||
487 | mlog(ML_ERROR, "group blkno is invalid\n"); | ||
488 | else if ((ret = ocfs2_check_new_group(inode, di, input, group_bh))) | ||
489 | mlog(ML_ERROR, "group descriptor check failed.\n"); | ||
490 | else | ||
491 | ret = 0; | ||
492 | |||
493 | return ret; | ||
494 | } | ||
495 | |||
496 | /* Add a new group descriptor to global_bitmap. */ | ||
497 | int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) | ||
498 | { | ||
499 | int ret; | ||
500 | handle_t *handle; | ||
501 | struct buffer_head *main_bm_bh = NULL; | ||
502 | struct inode *main_bm_inode = NULL; | ||
503 | struct ocfs2_dinode *fe = NULL; | ||
504 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
505 | struct buffer_head *group_bh = NULL; | ||
506 | struct ocfs2_group_desc *group = NULL; | ||
507 | struct ocfs2_chain_list *cl; | ||
508 | struct ocfs2_chain_rec *cr; | ||
509 | u16 cl_bpc; | ||
510 | |||
511 | mlog_entry_void(); | ||
512 | |||
513 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) | ||
514 | return -EROFS; | ||
515 | |||
516 | main_bm_inode = ocfs2_get_system_file_inode(osb, | ||
517 | GLOBAL_BITMAP_SYSTEM_INODE, | ||
518 | OCFS2_INVALID_SLOT); | ||
519 | if (!main_bm_inode) { | ||
520 | ret = -EINVAL; | ||
521 | mlog_errno(ret); | ||
522 | goto out; | ||
523 | } | ||
524 | |||
525 | mutex_lock(&main_bm_inode->i_mutex); | ||
526 | |||
527 | ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); | ||
528 | if (ret < 0) { | ||
529 | mlog_errno(ret); | ||
530 | goto out_mutex; | ||
531 | } | ||
532 | |||
533 | fe = (struct ocfs2_dinode *)main_bm_bh->b_data; | ||
534 | |||
535 | if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != | ||
536 | ocfs2_group_bitmap_size(osb->sb) * 8) { | ||
537 | mlog(ML_ERROR, "The disk is too old and small." | ||
538 | " Force to do offline resize."); | ||
539 | ret = -EINVAL; | ||
540 | goto out_unlock; | ||
541 | } | ||
542 | |||
543 | ret = ocfs2_read_block(osb, input->group, &group_bh, 0, NULL); | ||
544 | if (ret < 0) { | ||
545 | mlog(ML_ERROR, "Can't read the group descriptor # %llu " | ||
546 | "from the device.", (unsigned long long)input->group); | ||
547 | goto out_unlock; | ||
548 | } | ||
549 | |||
550 | ocfs2_set_new_buffer_uptodate(inode, group_bh); | ||
551 | |||
552 | ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh); | ||
553 | if (ret) { | ||
554 | mlog_errno(ret); | ||
555 | goto out_unlock; | ||
556 | } | ||
557 | |||
558 | mlog(0, "Add a new group %llu in chain = %u, length = %u\n", | ||
559 | (unsigned long long)input->group, input->chain, input->clusters); | ||
560 | |||
561 | handle = ocfs2_start_trans(osb, OCFS2_GROUP_ADD_CREDITS); | ||
562 | if (IS_ERR(handle)) { | ||
563 | mlog_errno(PTR_ERR(handle)); | ||
564 | ret = -EINVAL; | ||
565 | goto out_unlock; | ||
566 | } | ||
567 | |||
568 | cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); | ||
569 | cl = &fe->id2.i_chain; | ||
570 | cr = &cl->cl_recs[input->chain]; | ||
571 | |||
572 | ret = ocfs2_journal_access(handle, main_bm_inode, group_bh, | ||
573 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
574 | if (ret < 0) { | ||
575 | mlog_errno(ret); | ||
576 | goto out_commit; | ||
577 | } | ||
578 | |||
579 | group = (struct ocfs2_group_desc *)group_bh->b_data; | ||
580 | group->bg_next_group = cr->c_blkno; | ||
581 | |||
582 | ret = ocfs2_journal_dirty(handle, group_bh); | ||
583 | if (ret < 0) { | ||
584 | mlog_errno(ret); | ||
585 | goto out_commit; | ||
586 | } | ||
587 | |||
588 | ret = ocfs2_journal_access(handle, main_bm_inode, main_bm_bh, | ||
589 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
590 | if (ret < 0) { | ||
591 | mlog_errno(ret); | ||
592 | goto out_commit; | ||
593 | } | ||
594 | |||
595 | if (input->chain == le16_to_cpu(cl->cl_next_free_rec)) { | ||
596 | le16_add_cpu(&cl->cl_next_free_rec, 1); | ||
597 | memset(cr, 0, sizeof(struct ocfs2_chain_rec)); | ||
598 | } | ||
599 | |||
600 | cr->c_blkno = le64_to_cpu(input->group); | ||
601 | le32_add_cpu(&cr->c_total, input->clusters * cl_bpc); | ||
602 | le32_add_cpu(&cr->c_free, input->frees * cl_bpc); | ||
603 | |||
604 | le32_add_cpu(&fe->id1.bitmap1.i_total, input->clusters *cl_bpc); | ||
605 | le32_add_cpu(&fe->id1.bitmap1.i_used, | ||
606 | (input->clusters - input->frees) * cl_bpc); | ||
607 | le32_add_cpu(&fe->i_clusters, input->clusters); | ||
608 | |||
609 | ocfs2_journal_dirty(handle, main_bm_bh); | ||
610 | |||
611 | spin_lock(&OCFS2_I(main_bm_inode)->ip_lock); | ||
612 | OCFS2_I(main_bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); | ||
613 | le64_add_cpu(&fe->i_size, input->clusters << osb->s_clustersize_bits); | ||
614 | spin_unlock(&OCFS2_I(main_bm_inode)->ip_lock); | ||
615 | i_size_write(main_bm_inode, le64_to_cpu(fe->i_size)); | ||
616 | |||
617 | ocfs2_update_super_and_backups(main_bm_inode, input->clusters); | ||
618 | |||
619 | out_commit: | ||
620 | ocfs2_commit_trans(osb, handle); | ||
621 | out_unlock: | ||
622 | brelse(group_bh); | ||
623 | brelse(main_bm_bh); | ||
624 | |||
625 | ocfs2_inode_unlock(main_bm_inode, 1); | ||
626 | |||
627 | out_mutex: | ||
628 | mutex_unlock(&main_bm_inode->i_mutex); | ||
629 | iput(main_bm_inode); | ||
630 | |||
631 | out: | ||
632 | mlog_exit_void(); | ||
633 | return ret; | ||
634 | } | ||
diff --git a/fs/ocfs2/resize.h b/fs/ocfs2/resize.h new file mode 100644 index 000000000000..f38841abf10b --- /dev/null +++ b/fs/ocfs2/resize.h | |||
@@ -0,0 +1,32 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * resize.h | ||
5 | * | ||
6 | * Function prototypes | ||
7 | * | ||
8 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #ifndef OCFS2_RESIZE_H | ||
27 | #define OCFS2_RESIZE_H | ||
28 | |||
29 | int ocfs2_group_extend(struct inode * inode, int new_clusters); | ||
30 | int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input); | ||
31 | |||
32 | #endif /* OCFS2_RESIZE_H */ | ||
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index af4882b62cfa..3a50ce555e64 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
@@ -48,25 +48,6 @@ static void __ocfs2_fill_slot(struct ocfs2_slot_info *si, | |||
48 | s16 slot_num, | 48 | s16 slot_num, |
49 | s16 node_num); | 49 | s16 node_num); |
50 | 50 | ||
51 | /* Use the slot information we've collected to create a map of mounted | ||
52 | * nodes. Should be holding an EX on super block. assumes slot info is | ||
53 | * up to date. Note that we call this *after* we find a slot, so our | ||
54 | * own node should be set in the map too... */ | ||
55 | void ocfs2_populate_mounted_map(struct ocfs2_super *osb) | ||
56 | { | ||
57 | int i; | ||
58 | struct ocfs2_slot_info *si = osb->slot_info; | ||
59 | |||
60 | spin_lock(&si->si_lock); | ||
61 | |||
62 | for (i = 0; i < si->si_size; i++) | ||
63 | if (si->si_global_node_nums[i] != OCFS2_INVALID_SLOT) | ||
64 | ocfs2_node_map_set_bit(osb, &osb->mounted_map, | ||
65 | si->si_global_node_nums[i]); | ||
66 | |||
67 | spin_unlock(&si->si_lock); | ||
68 | } | ||
69 | |||
70 | /* post the slot information on disk into our slot_info struct. */ | 51 | /* post the slot information on disk into our slot_info struct. */ |
71 | void ocfs2_update_slot_info(struct ocfs2_slot_info *si) | 52 | void ocfs2_update_slot_info(struct ocfs2_slot_info *si) |
72 | { | 53 | { |
diff --git a/fs/ocfs2/slot_map.h b/fs/ocfs2/slot_map.h index d8c8ceed031b..1025872aaade 100644 --- a/fs/ocfs2/slot_map.h +++ b/fs/ocfs2/slot_map.h | |||
@@ -52,8 +52,6 @@ s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | |||
52 | void ocfs2_clear_slot(struct ocfs2_slot_info *si, | 52 | void ocfs2_clear_slot(struct ocfs2_slot_info *si, |
53 | s16 slot_num); | 53 | s16 slot_num); |
54 | 54 | ||
55 | void ocfs2_populate_mounted_map(struct ocfs2_super *osb); | ||
56 | |||
57 | static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si, | 55 | static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si, |
58 | int slot_num) | 56 | int slot_num) |
59 | { | 57 | { |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 8f09f5235e3a..7e397e2c25dd 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -101,8 +101,6 @@ static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg | |||
101 | static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, | 101 | static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, |
102 | u64 bg_blkno, | 102 | u64 bg_blkno, |
103 | u16 bg_bit_off); | 103 | u16 bg_bit_off); |
104 | static inline u64 ocfs2_which_cluster_group(struct inode *inode, | ||
105 | u32 cluster); | ||
106 | static inline void ocfs2_block_to_cluster_group(struct inode *inode, | 104 | static inline void ocfs2_block_to_cluster_group(struct inode *inode, |
107 | u64 data_blkno, | 105 | u64 data_blkno, |
108 | u64 *bg_blkno, | 106 | u64 *bg_blkno, |
@@ -114,7 +112,7 @@ void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | |||
114 | 112 | ||
115 | if (inode) { | 113 | if (inode) { |
116 | if (ac->ac_which != OCFS2_AC_USE_LOCAL) | 114 | if (ac->ac_which != OCFS2_AC_USE_LOCAL) |
117 | ocfs2_meta_unlock(inode, 1); | 115 | ocfs2_inode_unlock(inode, 1); |
118 | 116 | ||
119 | mutex_unlock(&inode->i_mutex); | 117 | mutex_unlock(&inode->i_mutex); |
120 | 118 | ||
@@ -131,9 +129,9 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) | |||
131 | } | 129 | } |
132 | 130 | ||
133 | /* somewhat more expensive than our other checks, so use sparingly. */ | 131 | /* somewhat more expensive than our other checks, so use sparingly. */ |
134 | static int ocfs2_check_group_descriptor(struct super_block *sb, | 132 | int ocfs2_check_group_descriptor(struct super_block *sb, |
135 | struct ocfs2_dinode *di, | 133 | struct ocfs2_dinode *di, |
136 | struct ocfs2_group_desc *gd) | 134 | struct ocfs2_group_desc *gd) |
137 | { | 135 | { |
138 | unsigned int max_bits; | 136 | unsigned int max_bits; |
139 | 137 | ||
@@ -412,7 +410,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
412 | 410 | ||
413 | mutex_lock(&alloc_inode->i_mutex); | 411 | mutex_lock(&alloc_inode->i_mutex); |
414 | 412 | ||
415 | status = ocfs2_meta_lock(alloc_inode, &bh, 1); | 413 | status = ocfs2_inode_lock(alloc_inode, &bh, 1); |
416 | if (status < 0) { | 414 | if (status < 0) { |
417 | mutex_unlock(&alloc_inode->i_mutex); | 415 | mutex_unlock(&alloc_inode->i_mutex); |
418 | iput(alloc_inode); | 416 | iput(alloc_inode); |
@@ -1443,8 +1441,7 @@ static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, | |||
1443 | 1441 | ||
1444 | /* given a cluster offset, calculate which block group it belongs to | 1442 | /* given a cluster offset, calculate which block group it belongs to |
1445 | * and return that block offset. */ | 1443 | * and return that block offset. */ |
1446 | static inline u64 ocfs2_which_cluster_group(struct inode *inode, | 1444 | u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster) |
1447 | u32 cluster) | ||
1448 | { | 1445 | { |
1449 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1446 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1450 | u32 group_no; | 1447 | u32 group_no; |
@@ -1519,8 +1516,9 @@ int __ocfs2_claim_clusters(struct ocfs2_super *osb, | |||
1519 | if (min_clusters > (osb->bitmap_cpg - 1)) { | 1516 | if (min_clusters > (osb->bitmap_cpg - 1)) { |
1520 | /* The only paths asking for contiguousness | 1517 | /* The only paths asking for contiguousness |
1521 | * should know about this already. */ | 1518 | * should know about this already. */ |
1522 | mlog(ML_ERROR, "minimum allocation requested exceeds " | 1519 | mlog(ML_ERROR, "minimum allocation requested %u exceeds " |
1523 | "group bitmap size!"); | 1520 | "group bitmap size %u!\n", min_clusters, |
1521 | osb->bitmap_cpg); | ||
1524 | status = -ENOSPC; | 1522 | status = -ENOSPC; |
1525 | goto bail; | 1523 | goto bail; |
1526 | } | 1524 | } |
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index cafe93703095..8799033bb459 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
@@ -147,4 +147,12 @@ static inline int ocfs2_is_cluster_bitmap(struct inode *inode) | |||
147 | int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, | 147 | int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, |
148 | struct ocfs2_alloc_context *ac); | 148 | struct ocfs2_alloc_context *ac); |
149 | 149 | ||
150 | /* given a cluster offset, calculate which block group it belongs to | ||
151 | * and return that block offset. */ | ||
152 | u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster); | ||
153 | |||
154 | /* somewhat more expensive than our other checks, so use sparingly. */ | ||
155 | int ocfs2_check_group_descriptor(struct super_block *sb, | ||
156 | struct ocfs2_dinode *di, | ||
157 | struct ocfs2_group_desc *gd); | ||
150 | #endif /* _CHAINALLOC_H_ */ | 158 | #endif /* _CHAINALLOC_H_ */ |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 5ee775420665..01fe40ee5ea9 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -65,7 +65,6 @@ | |||
65 | #include "sysfile.h" | 65 | #include "sysfile.h" |
66 | #include "uptodate.h" | 66 | #include "uptodate.h" |
67 | #include "ver.h" | 67 | #include "ver.h" |
68 | #include "vote.h" | ||
69 | 68 | ||
70 | #include "buffer_head_io.h" | 69 | #include "buffer_head_io.h" |
71 | 70 | ||
@@ -84,9 +83,11 @@ MODULE_LICENSE("GPL"); | |||
84 | 83 | ||
85 | struct mount_options | 84 | struct mount_options |
86 | { | 85 | { |
86 | unsigned long commit_interval; | ||
87 | unsigned long mount_opt; | 87 | unsigned long mount_opt; |
88 | unsigned int atime_quantum; | 88 | unsigned int atime_quantum; |
89 | signed short slot; | 89 | signed short slot; |
90 | unsigned int localalloc_opt; | ||
90 | }; | 91 | }; |
91 | 92 | ||
92 | static int ocfs2_parse_options(struct super_block *sb, char *options, | 93 | static int ocfs2_parse_options(struct super_block *sb, char *options, |
@@ -150,6 +151,9 @@ enum { | |||
150 | Opt_data_writeback, | 151 | Opt_data_writeback, |
151 | Opt_atime_quantum, | 152 | Opt_atime_quantum, |
152 | Opt_slot, | 153 | Opt_slot, |
154 | Opt_commit, | ||
155 | Opt_localalloc, | ||
156 | Opt_localflocks, | ||
153 | Opt_err, | 157 | Opt_err, |
154 | }; | 158 | }; |
155 | 159 | ||
@@ -165,6 +169,9 @@ static match_table_t tokens = { | |||
165 | {Opt_data_writeback, "data=writeback"}, | 169 | {Opt_data_writeback, "data=writeback"}, |
166 | {Opt_atime_quantum, "atime_quantum=%u"}, | 170 | {Opt_atime_quantum, "atime_quantum=%u"}, |
167 | {Opt_slot, "preferred_slot=%u"}, | 171 | {Opt_slot, "preferred_slot=%u"}, |
172 | {Opt_commit, "commit=%u"}, | ||
173 | {Opt_localalloc, "localalloc=%d"}, | ||
174 | {Opt_localflocks, "localflocks"}, | ||
168 | {Opt_err, NULL} | 175 | {Opt_err, NULL} |
169 | }; | 176 | }; |
170 | 177 | ||
@@ -213,7 +220,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) | |||
213 | 220 | ||
214 | mlog_entry_void(); | 221 | mlog_entry_void(); |
215 | 222 | ||
216 | new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE); | 223 | new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE, 0); |
217 | if (IS_ERR(new)) { | 224 | if (IS_ERR(new)) { |
218 | status = PTR_ERR(new); | 225 | status = PTR_ERR(new); |
219 | mlog_errno(status); | 226 | mlog_errno(status); |
@@ -221,7 +228,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) | |||
221 | } | 228 | } |
222 | osb->root_inode = new; | 229 | osb->root_inode = new; |
223 | 230 | ||
224 | new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE); | 231 | new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE, 0); |
225 | if (IS_ERR(new)) { | 232 | if (IS_ERR(new)) { |
226 | status = PTR_ERR(new); | 233 | status = PTR_ERR(new); |
227 | mlog_errno(status); | 234 | mlog_errno(status); |
@@ -443,6 +450,8 @@ unlock_osb: | |||
443 | osb->s_mount_opt = parsed_options.mount_opt; | 450 | osb->s_mount_opt = parsed_options.mount_opt; |
444 | osb->s_atime_quantum = parsed_options.atime_quantum; | 451 | osb->s_atime_quantum = parsed_options.atime_quantum; |
445 | osb->preferred_slot = parsed_options.slot; | 452 | osb->preferred_slot = parsed_options.slot; |
453 | if (parsed_options.commit_interval) | ||
454 | osb->osb_commit_interval = parsed_options.commit_interval; | ||
446 | 455 | ||
447 | if (!ocfs2_is_hard_readonly(osb)) | 456 | if (!ocfs2_is_hard_readonly(osb)) |
448 | ocfs2_set_journal_params(osb); | 457 | ocfs2_set_journal_params(osb); |
@@ -597,6 +606,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
597 | osb->s_mount_opt = parsed_options.mount_opt; | 606 | osb->s_mount_opt = parsed_options.mount_opt; |
598 | osb->s_atime_quantum = parsed_options.atime_quantum; | 607 | osb->s_atime_quantum = parsed_options.atime_quantum; |
599 | osb->preferred_slot = parsed_options.slot; | 608 | osb->preferred_slot = parsed_options.slot; |
609 | osb->osb_commit_interval = parsed_options.commit_interval; | ||
610 | osb->local_alloc_size = parsed_options.localalloc_opt; | ||
600 | 611 | ||
601 | sb->s_magic = OCFS2_SUPER_MAGIC; | 612 | sb->s_magic = OCFS2_SUPER_MAGIC; |
602 | 613 | ||
@@ -747,9 +758,11 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
747 | mlog_entry("remount: %d, options: \"%s\"\n", is_remount, | 758 | mlog_entry("remount: %d, options: \"%s\"\n", is_remount, |
748 | options ? options : "(none)"); | 759 | options ? options : "(none)"); |
749 | 760 | ||
761 | mopt->commit_interval = 0; | ||
750 | mopt->mount_opt = 0; | 762 | mopt->mount_opt = 0; |
751 | mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; | 763 | mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; |
752 | mopt->slot = OCFS2_INVALID_SLOT; | 764 | mopt->slot = OCFS2_INVALID_SLOT; |
765 | mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; | ||
753 | 766 | ||
754 | if (!options) { | 767 | if (!options) { |
755 | status = 1; | 768 | status = 1; |
@@ -816,6 +829,41 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
816 | if (option) | 829 | if (option) |
817 | mopt->slot = (s16)option; | 830 | mopt->slot = (s16)option; |
818 | break; | 831 | break; |
832 | case Opt_commit: | ||
833 | option = 0; | ||
834 | if (match_int(&args[0], &option)) { | ||
835 | status = 0; | ||
836 | goto bail; | ||
837 | } | ||
838 | if (option < 0) | ||
839 | return 0; | ||
840 | if (option == 0) | ||
841 | option = JBD_DEFAULT_MAX_COMMIT_AGE; | ||
842 | mopt->commit_interval = HZ * option; | ||
843 | break; | ||
844 | case Opt_localalloc: | ||
845 | option = 0; | ||
846 | if (match_int(&args[0], &option)) { | ||
847 | status = 0; | ||
848 | goto bail; | ||
849 | } | ||
850 | if (option >= 0 && (option <= ocfs2_local_alloc_size(sb) * 8)) | ||
851 | mopt->localalloc_opt = option; | ||
852 | break; | ||
853 | case Opt_localflocks: | ||
854 | /* | ||
855 | * Changing this during remount could race | ||
856 | * flock() requests, or "unbalance" existing | ||
857 | * ones (e.g., a lock is taken in one mode but | ||
858 | * dropped in the other). If users care enough | ||
859 | * to flip locking modes during remount, we | ||
860 | * could add a "local" flag to individual | ||
861 | * flock structures for proper tracking of | ||
862 | * state. | ||
863 | */ | ||
864 | if (!is_remount) | ||
865 | mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS; | ||
866 | break; | ||
819 | default: | 867 | default: |
820 | mlog(ML_ERROR, | 868 | mlog(ML_ERROR, |
821 | "Unrecognized mount option \"%s\" " | 869 | "Unrecognized mount option \"%s\" " |
@@ -864,6 +912,16 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
864 | if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM) | 912 | if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM) |
865 | seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); | 913 | seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); |
866 | 914 | ||
915 | if (osb->osb_commit_interval) | ||
916 | seq_printf(s, ",commit=%u", | ||
917 | (unsigned) (osb->osb_commit_interval / HZ)); | ||
918 | |||
919 | if (osb->local_alloc_size != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) | ||
920 | seq_printf(s, ",localalloc=%d", osb->local_alloc_size); | ||
921 | |||
922 | if (opts & OCFS2_MOUNT_LOCALFLOCKS) | ||
923 | seq_printf(s, ",localflocks,"); | ||
924 | |||
867 | return 0; | 925 | return 0; |
868 | } | 926 | } |
869 | 927 | ||
@@ -965,7 +1023,7 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
965 | goto bail; | 1023 | goto bail; |
966 | } | 1024 | } |
967 | 1025 | ||
968 | status = ocfs2_meta_lock(inode, &bh, 0); | 1026 | status = ocfs2_inode_lock(inode, &bh, 0); |
969 | if (status < 0) { | 1027 | if (status < 0) { |
970 | mlog_errno(status); | 1028 | mlog_errno(status); |
971 | goto bail; | 1029 | goto bail; |
@@ -989,7 +1047,7 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
989 | 1047 | ||
990 | brelse(bh); | 1048 | brelse(bh); |
991 | 1049 | ||
992 | ocfs2_meta_unlock(inode, 0); | 1050 | ocfs2_inode_unlock(inode, 0); |
993 | status = 0; | 1051 | status = 0; |
994 | bail: | 1052 | bail: |
995 | if (inode) | 1053 | if (inode) |
@@ -1020,8 +1078,7 @@ static void ocfs2_inode_init_once(struct kmem_cache *cachep, void *data) | |||
1020 | oi->ip_clusters = 0; | 1078 | oi->ip_clusters = 0; |
1021 | 1079 | ||
1022 | ocfs2_lock_res_init_once(&oi->ip_rw_lockres); | 1080 | ocfs2_lock_res_init_once(&oi->ip_rw_lockres); |
1023 | ocfs2_lock_res_init_once(&oi->ip_meta_lockres); | 1081 | ocfs2_lock_res_init_once(&oi->ip_inode_lockres); |
1024 | ocfs2_lock_res_init_once(&oi->ip_data_lockres); | ||
1025 | ocfs2_lock_res_init_once(&oi->ip_open_lockres); | 1082 | ocfs2_lock_res_init_once(&oi->ip_open_lockres); |
1026 | 1083 | ||
1027 | ocfs2_metadata_cache_init(&oi->vfs_inode); | 1084 | ocfs2_metadata_cache_init(&oi->vfs_inode); |
@@ -1117,25 +1174,12 @@ static int ocfs2_mount_volume(struct super_block *sb) | |||
1117 | goto leave; | 1174 | goto leave; |
1118 | } | 1175 | } |
1119 | 1176 | ||
1120 | status = ocfs2_register_hb_callbacks(osb); | ||
1121 | if (status < 0) { | ||
1122 | mlog_errno(status); | ||
1123 | goto leave; | ||
1124 | } | ||
1125 | |||
1126 | status = ocfs2_dlm_init(osb); | 1177 | status = ocfs2_dlm_init(osb); |
1127 | if (status < 0) { | 1178 | if (status < 0) { |
1128 | mlog_errno(status); | 1179 | mlog_errno(status); |
1129 | goto leave; | 1180 | goto leave; |
1130 | } | 1181 | } |
1131 | 1182 | ||
1132 | /* requires vote_thread to be running. */ | ||
1133 | status = ocfs2_register_net_handlers(osb); | ||
1134 | if (status < 0) { | ||
1135 | mlog_errno(status); | ||
1136 | goto leave; | ||
1137 | } | ||
1138 | |||
1139 | status = ocfs2_super_lock(osb, 1); | 1183 | status = ocfs2_super_lock(osb, 1); |
1140 | if (status < 0) { | 1184 | if (status < 0) { |
1141 | mlog_errno(status); | 1185 | mlog_errno(status); |
@@ -1150,8 +1194,6 @@ static int ocfs2_mount_volume(struct super_block *sb) | |||
1150 | goto leave; | 1194 | goto leave; |
1151 | } | 1195 | } |
1152 | 1196 | ||
1153 | ocfs2_populate_mounted_map(osb); | ||
1154 | |||
1155 | /* load all node-local system inodes */ | 1197 | /* load all node-local system inodes */ |
1156 | status = ocfs2_init_local_system_inodes(osb); | 1198 | status = ocfs2_init_local_system_inodes(osb); |
1157 | if (status < 0) { | 1199 | if (status < 0) { |
@@ -1174,15 +1216,6 @@ static int ocfs2_mount_volume(struct super_block *sb) | |||
1174 | if (ocfs2_mount_local(osb)) | 1216 | if (ocfs2_mount_local(osb)) |
1175 | goto leave; | 1217 | goto leave; |
1176 | 1218 | ||
1177 | /* This should be sent *after* we recovered our journal as it | ||
1178 | * will cause other nodes to unmark us as needing | ||
1179 | * recovery. However, we need to send it *before* dropping the | ||
1180 | * super block lock as otherwise their recovery threads might | ||
1181 | * try to clean us up while we're live! */ | ||
1182 | status = ocfs2_request_mount_vote(osb); | ||
1183 | if (status < 0) | ||
1184 | mlog_errno(status); | ||
1185 | |||
1186 | leave: | 1219 | leave: |
1187 | if (unlock_super) | 1220 | if (unlock_super) |
1188 | ocfs2_super_unlock(osb, 1); | 1221 | ocfs2_super_unlock(osb, 1); |
@@ -1240,10 +1273,6 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
1240 | mlog_errno(tmp); | 1273 | mlog_errno(tmp); |
1241 | return; | 1274 | return; |
1242 | } | 1275 | } |
1243 | |||
1244 | tmp = ocfs2_request_umount_vote(osb); | ||
1245 | if (tmp < 0) | ||
1246 | mlog_errno(tmp); | ||
1247 | } | 1276 | } |
1248 | 1277 | ||
1249 | if (osb->slot_num != OCFS2_INVALID_SLOT) | 1278 | if (osb->slot_num != OCFS2_INVALID_SLOT) |
@@ -1254,13 +1283,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
1254 | 1283 | ||
1255 | ocfs2_release_system_inodes(osb); | 1284 | ocfs2_release_system_inodes(osb); |
1256 | 1285 | ||
1257 | if (osb->dlm) { | 1286 | if (osb->dlm) |
1258 | ocfs2_unregister_net_handlers(osb); | ||
1259 | |||
1260 | ocfs2_dlm_shutdown(osb); | 1287 | ocfs2_dlm_shutdown(osb); |
1261 | } | ||
1262 | |||
1263 | ocfs2_clear_hb_callbacks(osb); | ||
1264 | 1288 | ||
1265 | debugfs_remove(osb->osb_debug_root); | 1289 | debugfs_remove(osb->osb_debug_root); |
1266 | 1290 | ||
@@ -1315,7 +1339,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1315 | int i, cbits, bbits; | 1339 | int i, cbits, bbits; |
1316 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; | 1340 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; |
1317 | struct inode *inode = NULL; | 1341 | struct inode *inode = NULL; |
1318 | struct buffer_head *bitmap_bh = NULL; | ||
1319 | struct ocfs2_journal *journal; | 1342 | struct ocfs2_journal *journal; |
1320 | __le32 uuid_net_key; | 1343 | __le32 uuid_net_key; |
1321 | struct ocfs2_super *osb; | 1344 | struct ocfs2_super *osb; |
@@ -1344,19 +1367,13 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1344 | osb->s_sectsize_bits = blksize_bits(sector_size); | 1367 | osb->s_sectsize_bits = blksize_bits(sector_size); |
1345 | BUG_ON(!osb->s_sectsize_bits); | 1368 | BUG_ON(!osb->s_sectsize_bits); |
1346 | 1369 | ||
1347 | osb->net_response_ids = 0; | ||
1348 | spin_lock_init(&osb->net_response_lock); | ||
1349 | INIT_LIST_HEAD(&osb->net_response_list); | ||
1350 | |||
1351 | INIT_LIST_HEAD(&osb->osb_net_handlers); | ||
1352 | init_waitqueue_head(&osb->recovery_event); | 1370 | init_waitqueue_head(&osb->recovery_event); |
1353 | spin_lock_init(&osb->vote_task_lock); | 1371 | spin_lock_init(&osb->dc_task_lock); |
1354 | init_waitqueue_head(&osb->vote_event); | 1372 | init_waitqueue_head(&osb->dc_event); |
1355 | osb->vote_work_sequence = 0; | 1373 | osb->dc_work_sequence = 0; |
1356 | osb->vote_wake_sequence = 0; | 1374 | osb->dc_wake_sequence = 0; |
1357 | INIT_LIST_HEAD(&osb->blocked_lock_list); | 1375 | INIT_LIST_HEAD(&osb->blocked_lock_list); |
1358 | osb->blocked_lock_count = 0; | 1376 | osb->blocked_lock_count = 0; |
1359 | INIT_LIST_HEAD(&osb->vote_list); | ||
1360 | spin_lock_init(&osb->osb_lock); | 1377 | spin_lock_init(&osb->osb_lock); |
1361 | 1378 | ||
1362 | atomic_set(&osb->alloc_stats.moves, 0); | 1379 | atomic_set(&osb->alloc_stats.moves, 0); |
@@ -1496,7 +1513,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1496 | } | 1513 | } |
1497 | 1514 | ||
1498 | memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key)); | 1515 | memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key)); |
1499 | osb->net_key = le32_to_cpu(uuid_net_key); | ||
1500 | 1516 | ||
1501 | strncpy(osb->vol_label, di->id2.i_super.s_label, 63); | 1517 | strncpy(osb->vol_label, di->id2.i_super.s_label, 63); |
1502 | osb->vol_label[63] = '\0'; | 1518 | osb->vol_label[63] = '\0'; |
@@ -1539,25 +1555,9 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1539 | } | 1555 | } |
1540 | 1556 | ||
1541 | osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; | 1557 | osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; |
1542 | |||
1543 | /* We don't have a cluster lock on the bitmap here because | ||
1544 | * we're only interested in static information and the extra | ||
1545 | * complexity at mount time isn't worht it. Don't pass the | ||
1546 | * inode in to the read function though as we don't want it to | ||
1547 | * be put in the cache. */ | ||
1548 | status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0, | ||
1549 | NULL); | ||
1550 | iput(inode); | 1558 | iput(inode); |
1551 | if (status < 0) { | ||
1552 | mlog_errno(status); | ||
1553 | goto bail; | ||
1554 | } | ||
1555 | 1559 | ||
1556 | di = (struct ocfs2_dinode *) bitmap_bh->b_data; | 1560 | osb->bitmap_cpg = ocfs2_group_bitmap_size(sb) * 8; |
1557 | osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg); | ||
1558 | brelse(bitmap_bh); | ||
1559 | mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n", | ||
1560 | (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg); | ||
1561 | 1561 | ||
1562 | status = ocfs2_init_slot_info(osb); | 1562 | status = ocfs2_init_slot_info(osb); |
1563 | if (status < 0) { | 1563 | if (status < 0) { |
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c index fd2e846e3e6f..ab713ebdd546 100644 --- a/fs/ocfs2/sysfile.c +++ b/fs/ocfs2/sysfile.c | |||
@@ -112,7 +112,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb, | |||
112 | goto bail; | 112 | goto bail; |
113 | } | 113 | } |
114 | 114 | ||
115 | inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE); | 115 | inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE, type); |
116 | if (IS_ERR(inode)) { | 116 | if (IS_ERR(inode)) { |
117 | mlog_errno(PTR_ERR(inode)); | 117 | mlog_errno(PTR_ERR(inode)); |
118 | inode = NULL; | 118 | inode = NULL; |
diff --git a/fs/ocfs2/ver.c b/fs/ocfs2/ver.c index 5405ce121c99..e2488f4128a2 100644 --- a/fs/ocfs2/ver.c +++ b/fs/ocfs2/ver.c | |||
@@ -29,7 +29,7 @@ | |||
29 | 29 | ||
30 | #include "ver.h" | 30 | #include "ver.h" |
31 | 31 | ||
32 | #define OCFS2_BUILD_VERSION "1.3.3" | 32 | #define OCFS2_BUILD_VERSION "1.5.0" |
33 | 33 | ||
34 | #define VERSION_STR "OCFS2 " OCFS2_BUILD_VERSION | 34 | #define VERSION_STR "OCFS2 " OCFS2_BUILD_VERSION |
35 | 35 | ||
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c deleted file mode 100644 index c05358538f2b..000000000000 --- a/fs/ocfs2/vote.c +++ /dev/null | |||
@@ -1,756 +0,0 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * vote.c | ||
5 | * | ||
6 | * description here | ||
7 | * | ||
8 | * Copyright (C) 2003, 2004 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #include <linux/types.h> | ||
27 | #include <linux/slab.h> | ||
28 | #include <linux/highmem.h> | ||
29 | #include <linux/kthread.h> | ||
30 | |||
31 | #include <cluster/heartbeat.h> | ||
32 | #include <cluster/nodemanager.h> | ||
33 | #include <cluster/tcp.h> | ||
34 | |||
35 | #include <dlm/dlmapi.h> | ||
36 | |||
37 | #define MLOG_MASK_PREFIX ML_VOTE | ||
38 | #include <cluster/masklog.h> | ||
39 | |||
40 | #include "ocfs2.h" | ||
41 | |||
42 | #include "alloc.h" | ||
43 | #include "dlmglue.h" | ||
44 | #include "extent_map.h" | ||
45 | #include "heartbeat.h" | ||
46 | #include "inode.h" | ||
47 | #include "journal.h" | ||
48 | #include "slot_map.h" | ||
49 | #include "vote.h" | ||
50 | |||
51 | #include "buffer_head_io.h" | ||
52 | |||
53 | #define OCFS2_MESSAGE_TYPE_VOTE (0x1) | ||
54 | #define OCFS2_MESSAGE_TYPE_RESPONSE (0x2) | ||
55 | struct ocfs2_msg_hdr | ||
56 | { | ||
57 | __be32 h_response_id; /* used to lookup message handle on sending | ||
58 | * node. */ | ||
59 | __be32 h_request; | ||
60 | __be64 h_blkno; | ||
61 | __be32 h_generation; | ||
62 | __be32 h_node_num; /* node sending this particular message. */ | ||
63 | }; | ||
64 | |||
65 | struct ocfs2_vote_msg | ||
66 | { | ||
67 | struct ocfs2_msg_hdr v_hdr; | ||
68 | __be32 v_reserved1; | ||
69 | } __attribute__ ((packed)); | ||
70 | |||
71 | /* Responses are given these values to maintain backwards | ||
72 | * compatibility with older ocfs2 versions */ | ||
73 | #define OCFS2_RESPONSE_OK (0) | ||
74 | #define OCFS2_RESPONSE_BUSY (-16) | ||
75 | #define OCFS2_RESPONSE_BAD_MSG (-22) | ||
76 | |||
77 | struct ocfs2_response_msg | ||
78 | { | ||
79 | struct ocfs2_msg_hdr r_hdr; | ||
80 | __be32 r_response; | ||
81 | } __attribute__ ((packed)); | ||
82 | |||
83 | struct ocfs2_vote_work { | ||
84 | struct list_head w_list; | ||
85 | struct ocfs2_vote_msg w_msg; | ||
86 | }; | ||
87 | |||
88 | enum ocfs2_vote_request { | ||
89 | OCFS2_VOTE_REQ_INVALID = 0, | ||
90 | OCFS2_VOTE_REQ_MOUNT, | ||
91 | OCFS2_VOTE_REQ_UMOUNT, | ||
92 | OCFS2_VOTE_REQ_LAST | ||
93 | }; | ||
94 | |||
95 | static inline int ocfs2_is_valid_vote_request(int request) | ||
96 | { | ||
97 | return OCFS2_VOTE_REQ_INVALID < request && | ||
98 | request < OCFS2_VOTE_REQ_LAST; | ||
99 | } | ||
100 | |||
101 | typedef void (*ocfs2_net_response_callback)(void *priv, | ||
102 | struct ocfs2_response_msg *resp); | ||
103 | struct ocfs2_net_response_cb { | ||
104 | ocfs2_net_response_callback rc_cb; | ||
105 | void *rc_priv; | ||
106 | }; | ||
107 | |||
108 | struct ocfs2_net_wait_ctxt { | ||
109 | struct list_head n_list; | ||
110 | u32 n_response_id; | ||
111 | wait_queue_head_t n_event; | ||
112 | struct ocfs2_node_map n_node_map; | ||
113 | int n_response; /* an agreggate response. 0 if | ||
114 | * all nodes are go, < 0 on any | ||
115 | * negative response from any | ||
116 | * node or network error. */ | ||
117 | struct ocfs2_net_response_cb *n_callback; | ||
118 | }; | ||
119 | |||
120 | static void ocfs2_process_mount_request(struct ocfs2_super *osb, | ||
121 | unsigned int node_num) | ||
122 | { | ||
123 | mlog(0, "MOUNT vote from node %u\n", node_num); | ||
124 | /* The other node only sends us this message when he has an EX | ||
125 | * on the superblock, so our recovery threads (if having been | ||
126 | * launched) are waiting on it.*/ | ||
127 | ocfs2_recovery_map_clear(osb, node_num); | ||
128 | ocfs2_node_map_set_bit(osb, &osb->mounted_map, node_num); | ||
129 | |||
130 | /* We clear the umount map here because a node may have been | ||
131 | * previously mounted, safely unmounted but never stopped | ||
132 | * heartbeating - in which case we'd have a stale entry. */ | ||
133 | ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num); | ||
134 | } | ||
135 | |||
136 | static void ocfs2_process_umount_request(struct ocfs2_super *osb, | ||
137 | unsigned int node_num) | ||
138 | { | ||
139 | mlog(0, "UMOUNT vote from node %u\n", node_num); | ||
140 | ocfs2_node_map_clear_bit(osb, &osb->mounted_map, node_num); | ||
141 | ocfs2_node_map_set_bit(osb, &osb->umount_map, node_num); | ||
142 | } | ||
143 | |||
144 | static void ocfs2_process_vote(struct ocfs2_super *osb, | ||
145 | struct ocfs2_vote_msg *msg) | ||
146 | { | ||
147 | int net_status, vote_response; | ||
148 | unsigned int node_num; | ||
149 | u64 blkno; | ||
150 | enum ocfs2_vote_request request; | ||
151 | struct ocfs2_msg_hdr *hdr = &msg->v_hdr; | ||
152 | struct ocfs2_response_msg response; | ||
153 | |||
154 | /* decode the network mumbo jumbo into local variables. */ | ||
155 | request = be32_to_cpu(hdr->h_request); | ||
156 | blkno = be64_to_cpu(hdr->h_blkno); | ||
157 | node_num = be32_to_cpu(hdr->h_node_num); | ||
158 | |||
159 | mlog(0, "processing vote: request = %u, blkno = %llu, node_num = %u\n", | ||
160 | request, (unsigned long long)blkno, node_num); | ||
161 | |||
162 | if (!ocfs2_is_valid_vote_request(request)) { | ||
163 | mlog(ML_ERROR, "Invalid vote request %d from node %u\n", | ||
164 | request, node_num); | ||
165 | vote_response = OCFS2_RESPONSE_BAD_MSG; | ||
166 | goto respond; | ||
167 | } | ||
168 | |||
169 | vote_response = OCFS2_RESPONSE_OK; | ||
170 | |||
171 | switch (request) { | ||
172 | case OCFS2_VOTE_REQ_UMOUNT: | ||
173 | ocfs2_process_umount_request(osb, node_num); | ||
174 | goto respond; | ||
175 | case OCFS2_VOTE_REQ_MOUNT: | ||
176 | ocfs2_process_mount_request(osb, node_num); | ||
177 | goto respond; | ||
178 | default: | ||
179 | /* avoids a gcc warning */ | ||
180 | break; | ||
181 | } | ||
182 | |||
183 | respond: | ||
184 | /* Response struture is small so we just put it on the stack | ||
185 | * and stuff it inline. */ | ||
186 | memset(&response, 0, sizeof(struct ocfs2_response_msg)); | ||
187 | response.r_hdr.h_response_id = hdr->h_response_id; | ||
188 | response.r_hdr.h_blkno = hdr->h_blkno; | ||
189 | response.r_hdr.h_generation = hdr->h_generation; | ||
190 | response.r_hdr.h_node_num = cpu_to_be32(osb->node_num); | ||
191 | response.r_response = cpu_to_be32(vote_response); | ||
192 | |||
193 | net_status = o2net_send_message(OCFS2_MESSAGE_TYPE_RESPONSE, | ||
194 | osb->net_key, | ||
195 | &response, | ||
196 | sizeof(struct ocfs2_response_msg), | ||
197 | node_num, | ||
198 | NULL); | ||
199 | /* We still want to error print for ENOPROTOOPT here. The | ||
200 | * sending node shouldn't have unregistered his net handler | ||
201 | * without sending an unmount vote 1st */ | ||
202 | if (net_status < 0 | ||
203 | && net_status != -ETIMEDOUT | ||
204 | && net_status != -ENOTCONN) | ||
205 | mlog(ML_ERROR, "message to node %u fails with error %d!\n", | ||
206 | node_num, net_status); | ||
207 | } | ||
208 | |||
209 | static void ocfs2_vote_thread_do_work(struct ocfs2_super *osb) | ||
210 | { | ||
211 | unsigned long processed; | ||
212 | struct ocfs2_lock_res *lockres; | ||
213 | struct ocfs2_vote_work *work; | ||
214 | |||
215 | mlog_entry_void(); | ||
216 | |||
217 | spin_lock(&osb->vote_task_lock); | ||
218 | /* grab this early so we know to try again if a state change and | ||
219 | * wake happens part-way through our work */ | ||
220 | osb->vote_work_sequence = osb->vote_wake_sequence; | ||
221 | |||
222 | processed = osb->blocked_lock_count; | ||
223 | while (processed) { | ||
224 | BUG_ON(list_empty(&osb->blocked_lock_list)); | ||
225 | |||
226 | lockres = list_entry(osb->blocked_lock_list.next, | ||
227 | struct ocfs2_lock_res, l_blocked_list); | ||
228 | list_del_init(&lockres->l_blocked_list); | ||
229 | osb->blocked_lock_count--; | ||
230 | spin_unlock(&osb->vote_task_lock); | ||
231 | |||
232 | BUG_ON(!processed); | ||
233 | processed--; | ||
234 | |||
235 | ocfs2_process_blocked_lock(osb, lockres); | ||
236 | |||
237 | spin_lock(&osb->vote_task_lock); | ||
238 | } | ||
239 | |||
240 | while (osb->vote_count) { | ||
241 | BUG_ON(list_empty(&osb->vote_list)); | ||
242 | work = list_entry(osb->vote_list.next, | ||
243 | struct ocfs2_vote_work, w_list); | ||
244 | list_del(&work->w_list); | ||
245 | osb->vote_count--; | ||
246 | spin_unlock(&osb->vote_task_lock); | ||
247 | |||
248 | ocfs2_process_vote(osb, &work->w_msg); | ||
249 | kfree(work); | ||
250 | |||
251 | spin_lock(&osb->vote_task_lock); | ||
252 | } | ||
253 | spin_unlock(&osb->vote_task_lock); | ||
254 | |||
255 | mlog_exit_void(); | ||
256 | } | ||
257 | |||
258 | static int ocfs2_vote_thread_lists_empty(struct ocfs2_super *osb) | ||
259 | { | ||
260 | int empty = 0; | ||
261 | |||
262 | spin_lock(&osb->vote_task_lock); | ||
263 | if (list_empty(&osb->blocked_lock_list) && | ||
264 | list_empty(&osb->vote_list)) | ||
265 | empty = 1; | ||
266 | |||
267 | spin_unlock(&osb->vote_task_lock); | ||
268 | return empty; | ||
269 | } | ||
270 | |||
271 | static int ocfs2_vote_thread_should_wake(struct ocfs2_super *osb) | ||
272 | { | ||
273 | int should_wake = 0; | ||
274 | |||
275 | spin_lock(&osb->vote_task_lock); | ||
276 | if (osb->vote_work_sequence != osb->vote_wake_sequence) | ||
277 | should_wake = 1; | ||
278 | spin_unlock(&osb->vote_task_lock); | ||
279 | |||
280 | return should_wake; | ||
281 | } | ||
282 | |||
283 | int ocfs2_vote_thread(void *arg) | ||
284 | { | ||
285 | int status = 0; | ||
286 | struct ocfs2_super *osb = arg; | ||
287 | |||
288 | /* only quit once we've been asked to stop and there is no more | ||
289 | * work available */ | ||
290 | while (!(kthread_should_stop() && | ||
291 | ocfs2_vote_thread_lists_empty(osb))) { | ||
292 | |||
293 | wait_event_interruptible(osb->vote_event, | ||
294 | ocfs2_vote_thread_should_wake(osb) || | ||
295 | kthread_should_stop()); | ||
296 | |||
297 | mlog(0, "vote_thread: awoken\n"); | ||
298 | |||
299 | ocfs2_vote_thread_do_work(osb); | ||
300 | } | ||
301 | |||
302 | osb->vote_task = NULL; | ||
303 | return status; | ||
304 | } | ||
305 | |||
306 | static struct ocfs2_net_wait_ctxt *ocfs2_new_net_wait_ctxt(unsigned int response_id) | ||
307 | { | ||
308 | struct ocfs2_net_wait_ctxt *w; | ||
309 | |||
310 | w = kzalloc(sizeof(*w), GFP_NOFS); | ||
311 | if (!w) { | ||
312 | mlog_errno(-ENOMEM); | ||
313 | goto bail; | ||
314 | } | ||
315 | |||
316 | INIT_LIST_HEAD(&w->n_list); | ||
317 | init_waitqueue_head(&w->n_event); | ||
318 | ocfs2_node_map_init(&w->n_node_map); | ||
319 | w->n_response_id = response_id; | ||
320 | w->n_callback = NULL; | ||
321 | bail: | ||
322 | return w; | ||
323 | } | ||
324 | |||
325 | static unsigned int ocfs2_new_response_id(struct ocfs2_super *osb) | ||
326 | { | ||
327 | unsigned int ret; | ||
328 | |||
329 | spin_lock(&osb->net_response_lock); | ||
330 | ret = ++osb->net_response_ids; | ||
331 | spin_unlock(&osb->net_response_lock); | ||
332 | |||
333 | return ret; | ||
334 | } | ||
335 | |||
336 | static void ocfs2_dequeue_net_wait_ctxt(struct ocfs2_super *osb, | ||
337 | struct ocfs2_net_wait_ctxt *w) | ||
338 | { | ||
339 | spin_lock(&osb->net_response_lock); | ||
340 | list_del(&w->n_list); | ||
341 | spin_unlock(&osb->net_response_lock); | ||
342 | } | ||
343 | |||
344 | static void ocfs2_queue_net_wait_ctxt(struct ocfs2_super *osb, | ||
345 | struct ocfs2_net_wait_ctxt *w) | ||
346 | { | ||
347 | spin_lock(&osb->net_response_lock); | ||
348 | list_add_tail(&w->n_list, | ||
349 | &osb->net_response_list); | ||
350 | spin_unlock(&osb->net_response_lock); | ||
351 | } | ||
352 | |||
353 | static void __ocfs2_mark_node_responded(struct ocfs2_super *osb, | ||
354 | struct ocfs2_net_wait_ctxt *w, | ||
355 | int node_num) | ||
356 | { | ||
357 | assert_spin_locked(&osb->net_response_lock); | ||
358 | |||
359 | ocfs2_node_map_clear_bit(osb, &w->n_node_map, node_num); | ||
360 | if (ocfs2_node_map_is_empty(osb, &w->n_node_map)) | ||
361 | wake_up(&w->n_event); | ||
362 | } | ||
363 | |||
364 | /* Intended to be called from the node down callback, we fake remove | ||
365 | * the node from all our response contexts */ | ||
366 | void ocfs2_remove_node_from_vote_queues(struct ocfs2_super *osb, | ||
367 | int node_num) | ||
368 | { | ||
369 | struct list_head *p; | ||
370 | struct ocfs2_net_wait_ctxt *w = NULL; | ||
371 | |||
372 | spin_lock(&osb->net_response_lock); | ||
373 | |||
374 | list_for_each(p, &osb->net_response_list) { | ||
375 | w = list_entry(p, struct ocfs2_net_wait_ctxt, n_list); | ||
376 | |||
377 | __ocfs2_mark_node_responded(osb, w, node_num); | ||
378 | } | ||
379 | |||
380 | spin_unlock(&osb->net_response_lock); | ||
381 | } | ||
382 | |||
383 | static int ocfs2_broadcast_vote(struct ocfs2_super *osb, | ||
384 | struct ocfs2_vote_msg *request, | ||
385 | unsigned int response_id, | ||
386 | int *response, | ||
387 | struct ocfs2_net_response_cb *callback) | ||
388 | { | ||
389 | int status, i, remote_err; | ||
390 | struct ocfs2_net_wait_ctxt *w = NULL; | ||
391 | int dequeued = 0; | ||
392 | |||
393 | mlog_entry_void(); | ||
394 | |||
395 | w = ocfs2_new_net_wait_ctxt(response_id); | ||
396 | if (!w) { | ||
397 | status = -ENOMEM; | ||
398 | mlog_errno(status); | ||
399 | goto bail; | ||
400 | } | ||
401 | w->n_callback = callback; | ||
402 | |||
403 | /* we're pretty much ready to go at this point, and this fills | ||
404 | * in n_response which we need anyway... */ | ||
405 | ocfs2_queue_net_wait_ctxt(osb, w); | ||
406 | |||
407 | i = ocfs2_node_map_iterate(osb, &osb->mounted_map, 0); | ||
408 | |||
409 | while (i != O2NM_INVALID_NODE_NUM) { | ||
410 | if (i != osb->node_num) { | ||
411 | mlog(0, "trying to send request to node %i\n", i); | ||
412 | ocfs2_node_map_set_bit(osb, &w->n_node_map, i); | ||
413 | |||
414 | remote_err = 0; | ||
415 | status = o2net_send_message(OCFS2_MESSAGE_TYPE_VOTE, | ||
416 | osb->net_key, | ||
417 | request, | ||
418 | sizeof(*request), | ||
419 | i, | ||
420 | &remote_err); | ||
421 | if (status == -ETIMEDOUT) { | ||
422 | mlog(0, "remote node %d timed out!\n", i); | ||
423 | status = -EAGAIN; | ||
424 | goto bail; | ||
425 | } | ||
426 | if (remote_err < 0) { | ||
427 | status = remote_err; | ||
428 | mlog(0, "remote error %d on node %d!\n", | ||
429 | remote_err, i); | ||
430 | mlog_errno(status); | ||
431 | goto bail; | ||
432 | } | ||
433 | if (status < 0) { | ||
434 | mlog_errno(status); | ||
435 | goto bail; | ||
436 | } | ||
437 | } | ||
438 | i++; | ||
439 | i = ocfs2_node_map_iterate(osb, &osb->mounted_map, i); | ||
440 | mlog(0, "next is %d, i am %d\n", i, osb->node_num); | ||
441 | } | ||
442 | mlog(0, "done sending, now waiting on responses...\n"); | ||
443 | |||
444 | wait_event(w->n_event, ocfs2_node_map_is_empty(osb, &w->n_node_map)); | ||
445 | |||
446 | ocfs2_dequeue_net_wait_ctxt(osb, w); | ||
447 | dequeued = 1; | ||
448 | |||
449 | *response = w->n_response; | ||
450 | status = 0; | ||
451 | bail: | ||
452 | if (w) { | ||
453 | if (!dequeued) | ||
454 | ocfs2_dequeue_net_wait_ctxt(osb, w); | ||
455 | kfree(w); | ||
456 | } | ||
457 | |||
458 | mlog_exit(status); | ||
459 | return status; | ||
460 | } | ||
461 | |||
462 | static struct ocfs2_vote_msg * ocfs2_new_vote_request(struct ocfs2_super *osb, | ||
463 | u64 blkno, | ||
464 | unsigned int generation, | ||
465 | enum ocfs2_vote_request type) | ||
466 | { | ||
467 | struct ocfs2_vote_msg *request; | ||
468 | struct ocfs2_msg_hdr *hdr; | ||
469 | |||
470 | BUG_ON(!ocfs2_is_valid_vote_request(type)); | ||
471 | |||
472 | request = kzalloc(sizeof(*request), GFP_NOFS); | ||
473 | if (!request) { | ||
474 | mlog_errno(-ENOMEM); | ||
475 | } else { | ||
476 | hdr = &request->v_hdr; | ||
477 | hdr->h_node_num = cpu_to_be32(osb->node_num); | ||
478 | hdr->h_request = cpu_to_be32(type); | ||
479 | hdr->h_blkno = cpu_to_be64(blkno); | ||
480 | hdr->h_generation = cpu_to_be32(generation); | ||
481 | } | ||
482 | |||
483 | return request; | ||
484 | } | ||
485 | |||
486 | /* Complete the buildup of a new vote request and process the | ||
487 | * broadcast return value. */ | ||
488 | static int ocfs2_do_request_vote(struct ocfs2_super *osb, | ||
489 | struct ocfs2_vote_msg *request, | ||
490 | struct ocfs2_net_response_cb *callback) | ||
491 | { | ||
492 | int status, response = -EBUSY; | ||
493 | unsigned int response_id; | ||
494 | struct ocfs2_msg_hdr *hdr; | ||
495 | |||
496 | response_id = ocfs2_new_response_id(osb); | ||
497 | |||
498 | hdr = &request->v_hdr; | ||
499 | hdr->h_response_id = cpu_to_be32(response_id); | ||
500 | |||
501 | status = ocfs2_broadcast_vote(osb, request, response_id, &response, | ||
502 | callback); | ||
503 | if (status < 0) { | ||
504 | mlog_errno(status); | ||
505 | goto bail; | ||
506 | } | ||
507 | |||
508 | status = response; | ||
509 | bail: | ||
510 | |||
511 | return status; | ||
512 | } | ||
513 | |||
514 | int ocfs2_request_mount_vote(struct ocfs2_super *osb) | ||
515 | { | ||
516 | int status; | ||
517 | struct ocfs2_vote_msg *request = NULL; | ||
518 | |||
519 | request = ocfs2_new_vote_request(osb, 0ULL, 0, OCFS2_VOTE_REQ_MOUNT); | ||
520 | if (!request) { | ||
521 | status = -ENOMEM; | ||
522 | goto bail; | ||
523 | } | ||
524 | |||
525 | status = -EAGAIN; | ||
526 | while (status == -EAGAIN) { | ||
527 | if (!(osb->s_mount_opt & OCFS2_MOUNT_NOINTR) && | ||
528 | signal_pending(current)) { | ||
529 | status = -ERESTARTSYS; | ||
530 | goto bail; | ||
531 | } | ||
532 | |||
533 | if (ocfs2_node_map_is_only(osb, &osb->mounted_map, | ||
534 | osb->node_num)) { | ||
535 | status = 0; | ||
536 | goto bail; | ||
537 | } | ||
538 | |||
539 | status = ocfs2_do_request_vote(osb, request, NULL); | ||
540 | } | ||
541 | |||
542 | bail: | ||
543 | kfree(request); | ||
544 | return status; | ||
545 | } | ||
546 | |||
547 | int ocfs2_request_umount_vote(struct ocfs2_super *osb) | ||
548 | { | ||
549 | int status; | ||
550 | struct ocfs2_vote_msg *request = NULL; | ||
551 | |||
552 | request = ocfs2_new_vote_request(osb, 0ULL, 0, OCFS2_VOTE_REQ_UMOUNT); | ||
553 | if (!request) { | ||
554 | status = -ENOMEM; | ||
555 | goto bail; | ||
556 | } | ||
557 | |||
558 | status = -EAGAIN; | ||
559 | while (status == -EAGAIN) { | ||
560 | /* Do not check signals on this vote... We really want | ||
561 | * this one to go all the way through. */ | ||
562 | |||
563 | if (ocfs2_node_map_is_only(osb, &osb->mounted_map, | ||
564 | osb->node_num)) { | ||
565 | status = 0; | ||
566 | goto bail; | ||
567 | } | ||
568 | |||
569 | status = ocfs2_do_request_vote(osb, request, NULL); | ||
570 | } | ||
571 | |||
572 | bail: | ||
573 | kfree(request); | ||
574 | return status; | ||
575 | } | ||
576 | |||
577 | /* TODO: This should eventually be a hash table! */ | ||
578 | static struct ocfs2_net_wait_ctxt * __ocfs2_find_net_wait_ctxt(struct ocfs2_super *osb, | ||
579 | u32 response_id) | ||
580 | { | ||
581 | struct list_head *p; | ||
582 | struct ocfs2_net_wait_ctxt *w = NULL; | ||
583 | |||
584 | list_for_each(p, &osb->net_response_list) { | ||
585 | w = list_entry(p, struct ocfs2_net_wait_ctxt, n_list); | ||
586 | if (response_id == w->n_response_id) | ||
587 | break; | ||
588 | w = NULL; | ||
589 | } | ||
590 | |||
591 | return w; | ||
592 | } | ||
593 | |||
594 | /* Translate response codes into local node errno values */ | ||
595 | static inline int ocfs2_translate_response(int response) | ||
596 | { | ||
597 | int ret; | ||
598 | |||
599 | switch (response) { | ||
600 | case OCFS2_RESPONSE_OK: | ||
601 | ret = 0; | ||
602 | break; | ||
603 | |||
604 | case OCFS2_RESPONSE_BUSY: | ||
605 | ret = -EBUSY; | ||
606 | break; | ||
607 | |||
608 | default: | ||
609 | ret = -EINVAL; | ||
610 | } | ||
611 | |||
612 | return ret; | ||
613 | } | ||
614 | |||
615 | static int ocfs2_handle_response_message(struct o2net_msg *msg, | ||
616 | u32 len, | ||
617 | void *data, void **ret_data) | ||
618 | { | ||
619 | unsigned int response_id, node_num; | ||
620 | int response_status; | ||
621 | struct ocfs2_super *osb = data; | ||
622 | struct ocfs2_response_msg *resp; | ||
623 | struct ocfs2_net_wait_ctxt * w; | ||
624 | struct ocfs2_net_response_cb *resp_cb; | ||
625 | |||
626 | resp = (struct ocfs2_response_msg *) msg->buf; | ||
627 | |||
628 | response_id = be32_to_cpu(resp->r_hdr.h_response_id); | ||
629 | node_num = be32_to_cpu(resp->r_hdr.h_node_num); | ||
630 | response_status = | ||
631 | ocfs2_translate_response(be32_to_cpu(resp->r_response)); | ||
632 | |||
633 | mlog(0, "received response message:\n"); | ||
634 | mlog(0, "h_response_id = %u\n", response_id); | ||
635 | mlog(0, "h_request = %u\n", be32_to_cpu(resp->r_hdr.h_request)); | ||
636 | mlog(0, "h_blkno = %llu\n", | ||
637 | (unsigned long long)be64_to_cpu(resp->r_hdr.h_blkno)); | ||
638 | mlog(0, "h_generation = %u\n", be32_to_cpu(resp->r_hdr.h_generation)); | ||
639 | mlog(0, "h_node_num = %u\n", node_num); | ||
640 | mlog(0, "r_response = %d\n", response_status); | ||
641 | |||
642 | spin_lock(&osb->net_response_lock); | ||
643 | w = __ocfs2_find_net_wait_ctxt(osb, response_id); | ||
644 | if (!w) { | ||
645 | mlog(0, "request not found!\n"); | ||
646 | goto bail; | ||
647 | } | ||
648 | resp_cb = w->n_callback; | ||
649 | |||
650 | if (response_status && (!w->n_response)) { | ||
651 | /* we only really need one negative response so don't | ||
652 | * set it twice. */ | ||
653 | w->n_response = response_status; | ||
654 | } | ||
655 | |||
656 | if (resp_cb) { | ||
657 | spin_unlock(&osb->net_response_lock); | ||
658 | |||
659 | resp_cb->rc_cb(resp_cb->rc_priv, resp); | ||
660 | |||
661 | spin_lock(&osb->net_response_lock); | ||
662 | } | ||
663 | |||
664 | __ocfs2_mark_node_responded(osb, w, node_num); | ||
665 | bail: | ||
666 | spin_unlock(&osb->net_response_lock); | ||
667 | |||
668 | return 0; | ||
669 | } | ||
670 | |||
671 | static int ocfs2_handle_vote_message(struct o2net_msg *msg, | ||
672 | u32 len, | ||
673 | void *data, void **ret_data) | ||
674 | { | ||
675 | int status; | ||
676 | struct ocfs2_super *osb = data; | ||
677 | struct ocfs2_vote_work *work; | ||
678 | |||
679 | work = kmalloc(sizeof(struct ocfs2_vote_work), GFP_NOFS); | ||
680 | if (!work) { | ||
681 | status = -ENOMEM; | ||
682 | mlog_errno(status); | ||
683 | goto bail; | ||
684 | } | ||
685 | |||
686 | INIT_LIST_HEAD(&work->w_list); | ||
687 | memcpy(&work->w_msg, msg->buf, sizeof(struct ocfs2_vote_msg)); | ||
688 | |||
689 | mlog(0, "scheduling vote request:\n"); | ||
690 | mlog(0, "h_response_id = %u\n", | ||
691 | be32_to_cpu(work->w_msg.v_hdr.h_response_id)); | ||
692 | mlog(0, "h_request = %u\n", be32_to_cpu(work->w_msg.v_hdr.h_request)); | ||
693 | mlog(0, "h_blkno = %llu\n", | ||
694 | (unsigned long long)be64_to_cpu(work->w_msg.v_hdr.h_blkno)); | ||
695 | mlog(0, "h_generation = %u\n", | ||
696 | be32_to_cpu(work->w_msg.v_hdr.h_generation)); | ||
697 | mlog(0, "h_node_num = %u\n", | ||
698 | be32_to_cpu(work->w_msg.v_hdr.h_node_num)); | ||
699 | |||
700 | spin_lock(&osb->vote_task_lock); | ||
701 | list_add_tail(&work->w_list, &osb->vote_list); | ||
702 | osb->vote_count++; | ||
703 | spin_unlock(&osb->vote_task_lock); | ||
704 | |||
705 | ocfs2_kick_vote_thread(osb); | ||
706 | |||
707 | status = 0; | ||
708 | bail: | ||
709 | return status; | ||
710 | } | ||
711 | |||
712 | void ocfs2_unregister_net_handlers(struct ocfs2_super *osb) | ||
713 | { | ||
714 | if (!osb->net_key) | ||
715 | return; | ||
716 | |||
717 | o2net_unregister_handler_list(&osb->osb_net_handlers); | ||
718 | |||
719 | if (!list_empty(&osb->net_response_list)) | ||
720 | mlog(ML_ERROR, "net response list not empty!\n"); | ||
721 | |||
722 | osb->net_key = 0; | ||
723 | } | ||
724 | |||
725 | int ocfs2_register_net_handlers(struct ocfs2_super *osb) | ||
726 | { | ||
727 | int status = 0; | ||
728 | |||
729 | if (ocfs2_mount_local(osb)) | ||
730 | return 0; | ||
731 | |||
732 | status = o2net_register_handler(OCFS2_MESSAGE_TYPE_RESPONSE, | ||
733 | osb->net_key, | ||
734 | sizeof(struct ocfs2_response_msg), | ||
735 | ocfs2_handle_response_message, | ||
736 | osb, NULL, &osb->osb_net_handlers); | ||
737 | if (status) { | ||
738 | mlog_errno(status); | ||
739 | goto bail; | ||
740 | } | ||
741 | |||
742 | status = o2net_register_handler(OCFS2_MESSAGE_TYPE_VOTE, | ||
743 | osb->net_key, | ||
744 | sizeof(struct ocfs2_vote_msg), | ||
745 | ocfs2_handle_vote_message, | ||
746 | osb, NULL, &osb->osb_net_handlers); | ||
747 | if (status) { | ||
748 | mlog_errno(status); | ||
749 | goto bail; | ||
750 | } | ||
751 | bail: | ||
752 | if (status < 0) | ||
753 | ocfs2_unregister_net_handlers(osb); | ||
754 | |||
755 | return status; | ||
756 | } | ||
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index d88173840082..6b7ff1618945 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c | |||
@@ -131,7 +131,7 @@ static void property_stop(struct seq_file *f, void *v) | |||
131 | /* Nothing to do */ | 131 | /* Nothing to do */ |
132 | } | 132 | } |
133 | 133 | ||
134 | static struct seq_operations property_op = { | 134 | static const struct seq_operations property_op = { |
135 | .start = property_start, | 135 | .start = property_start, |
136 | .next = property_next, | 136 | .next = property_next, |
137 | .stop = property_stop, | 137 | .stop = property_stop, |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 722e12e5acc7..739da701ae7b 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -195,96 +195,45 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
195 | return ERR_PTR(res); | 195 | return ERR_PTR(res); |
196 | } | 196 | } |
197 | 197 | ||
198 | /* | 198 | static ssize_t part_start_show(struct device *dev, |
199 | * sysfs bindings for partitions | 199 | struct device_attribute *attr, char *buf) |
200 | */ | ||
201 | |||
202 | struct part_attribute { | ||
203 | struct attribute attr; | ||
204 | ssize_t (*show)(struct hd_struct *,char *); | ||
205 | ssize_t (*store)(struct hd_struct *,const char *, size_t); | ||
206 | }; | ||
207 | |||
208 | static ssize_t | ||
209 | part_attr_show(struct kobject * kobj, struct attribute * attr, char * page) | ||
210 | { | 200 | { |
211 | struct hd_struct * p = container_of(kobj,struct hd_struct,kobj); | 201 | struct hd_struct *p = dev_to_part(dev); |
212 | struct part_attribute * part_attr = container_of(attr,struct part_attribute,attr); | ||
213 | ssize_t ret = 0; | ||
214 | if (part_attr->show) | ||
215 | ret = part_attr->show(p, page); | ||
216 | return ret; | ||
217 | } | ||
218 | static ssize_t | ||
219 | part_attr_store(struct kobject * kobj, struct attribute * attr, | ||
220 | const char *page, size_t count) | ||
221 | { | ||
222 | struct hd_struct * p = container_of(kobj,struct hd_struct,kobj); | ||
223 | struct part_attribute * part_attr = container_of(attr,struct part_attribute,attr); | ||
224 | ssize_t ret = 0; | ||
225 | 202 | ||
226 | if (part_attr->store) | 203 | return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); |
227 | ret = part_attr->store(p, page, count); | ||
228 | return ret; | ||
229 | } | 204 | } |
230 | 205 | ||
231 | static struct sysfs_ops part_sysfs_ops = { | 206 | static ssize_t part_size_show(struct device *dev, |
232 | .show = part_attr_show, | 207 | struct device_attribute *attr, char *buf) |
233 | .store = part_attr_store, | ||
234 | }; | ||
235 | |||
236 | static ssize_t part_uevent_store(struct hd_struct * p, | ||
237 | const char *page, size_t count) | ||
238 | { | 208 | { |
239 | kobject_uevent(&p->kobj, KOBJ_ADD); | 209 | struct hd_struct *p = dev_to_part(dev); |
240 | return count; | 210 | return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); |
241 | } | 211 | } |
242 | static ssize_t part_dev_read(struct hd_struct * p, char *page) | 212 | |
243 | { | 213 | static ssize_t part_stat_show(struct device *dev, |
244 | struct gendisk *disk = container_of(p->kobj.parent,struct gendisk,kobj); | 214 | struct device_attribute *attr, char *buf) |
245 | dev_t dev = MKDEV(disk->major, disk->first_minor + p->partno); | ||
246 | return print_dev_t(page, dev); | ||
247 | } | ||
248 | static ssize_t part_start_read(struct hd_struct * p, char *page) | ||
249 | { | ||
250 | return sprintf(page, "%llu\n",(unsigned long long)p->start_sect); | ||
251 | } | ||
252 | static ssize_t part_size_read(struct hd_struct * p, char *page) | ||
253 | { | ||
254 | return sprintf(page, "%llu\n",(unsigned long long)p->nr_sects); | ||
255 | } | ||
256 | static ssize_t part_stat_read(struct hd_struct * p, char *page) | ||
257 | { | 215 | { |
258 | return sprintf(page, "%8u %8llu %8u %8llu\n", | 216 | struct hd_struct *p = dev_to_part(dev); |
217 | |||
218 | return sprintf(buf, "%8u %8llu %8u %8llu\n", | ||
259 | p->ios[0], (unsigned long long)p->sectors[0], | 219 | p->ios[0], (unsigned long long)p->sectors[0], |
260 | p->ios[1], (unsigned long long)p->sectors[1]); | 220 | p->ios[1], (unsigned long long)p->sectors[1]); |
261 | } | 221 | } |
262 | static struct part_attribute part_attr_uevent = { | ||
263 | .attr = {.name = "uevent", .mode = S_IWUSR }, | ||
264 | .store = part_uevent_store | ||
265 | }; | ||
266 | static struct part_attribute part_attr_dev = { | ||
267 | .attr = {.name = "dev", .mode = S_IRUGO }, | ||
268 | .show = part_dev_read | ||
269 | }; | ||
270 | static struct part_attribute part_attr_start = { | ||
271 | .attr = {.name = "start", .mode = S_IRUGO }, | ||
272 | .show = part_start_read | ||
273 | }; | ||
274 | static struct part_attribute part_attr_size = { | ||
275 | .attr = {.name = "size", .mode = S_IRUGO }, | ||
276 | .show = part_size_read | ||
277 | }; | ||
278 | static struct part_attribute part_attr_stat = { | ||
279 | .attr = {.name = "stat", .mode = S_IRUGO }, | ||
280 | .show = part_stat_read | ||
281 | }; | ||
282 | 222 | ||
283 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 223 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
224 | static ssize_t part_fail_show(struct device *dev, | ||
225 | struct device_attribute *attr, char *buf) | ||
226 | { | ||
227 | struct hd_struct *p = dev_to_part(dev); | ||
284 | 228 | ||
285 | static ssize_t part_fail_store(struct hd_struct * p, | 229 | return sprintf(buf, "%d\n", p->make_it_fail); |
230 | } | ||
231 | |||
232 | static ssize_t part_fail_store(struct device *dev, | ||
233 | struct device_attribute *attr, | ||
286 | const char *buf, size_t count) | 234 | const char *buf, size_t count) |
287 | { | 235 | { |
236 | struct hd_struct *p = dev_to_part(dev); | ||
288 | int i; | 237 | int i; |
289 | 238 | ||
290 | if (count > 0 && sscanf(buf, "%d", &i) > 0) | 239 | if (count > 0 && sscanf(buf, "%d", &i) > 0) |
@@ -292,50 +241,53 @@ static ssize_t part_fail_store(struct hd_struct * p, | |||
292 | 241 | ||
293 | return count; | 242 | return count; |
294 | } | 243 | } |
295 | static ssize_t part_fail_read(struct hd_struct * p, char *page) | 244 | #endif |
296 | { | ||
297 | return sprintf(page, "%d\n", p->make_it_fail); | ||
298 | } | ||
299 | static struct part_attribute part_attr_fail = { | ||
300 | .attr = {.name = "make-it-fail", .mode = S_IRUGO | S_IWUSR }, | ||
301 | .store = part_fail_store, | ||
302 | .show = part_fail_read | ||
303 | }; | ||
304 | 245 | ||
246 | static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); | ||
247 | static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); | ||
248 | static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); | ||
249 | #ifdef CONFIG_FAIL_MAKE_REQUEST | ||
250 | static struct device_attribute dev_attr_fail = | ||
251 | __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); | ||
305 | #endif | 252 | #endif |
306 | 253 | ||
307 | static struct attribute * default_attrs[] = { | 254 | static struct attribute *part_attrs[] = { |
308 | &part_attr_uevent.attr, | 255 | &dev_attr_start.attr, |
309 | &part_attr_dev.attr, | 256 | &dev_attr_size.attr, |
310 | &part_attr_start.attr, | 257 | &dev_attr_stat.attr, |
311 | &part_attr_size.attr, | ||
312 | &part_attr_stat.attr, | ||
313 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 258 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
314 | &part_attr_fail.attr, | 259 | &dev_attr_fail.attr, |
315 | #endif | 260 | #endif |
316 | NULL, | 261 | NULL |
317 | }; | 262 | }; |
318 | 263 | ||
319 | extern struct kset block_subsys; | 264 | static struct attribute_group part_attr_group = { |
265 | .attrs = part_attrs, | ||
266 | }; | ||
320 | 267 | ||
321 | static void part_release(struct kobject *kobj) | 268 | static struct attribute_group *part_attr_groups[] = { |
269 | &part_attr_group, | ||
270 | NULL | ||
271 | }; | ||
272 | |||
273 | static void part_release(struct device *dev) | ||
322 | { | 274 | { |
323 | struct hd_struct * p = container_of(kobj,struct hd_struct,kobj); | 275 | struct hd_struct *p = dev_to_part(dev); |
324 | kfree(p); | 276 | kfree(p); |
325 | } | 277 | } |
326 | 278 | ||
327 | struct kobj_type ktype_part = { | 279 | struct device_type part_type = { |
280 | .name = "partition", | ||
281 | .groups = part_attr_groups, | ||
328 | .release = part_release, | 282 | .release = part_release, |
329 | .default_attrs = default_attrs, | ||
330 | .sysfs_ops = &part_sysfs_ops, | ||
331 | }; | 283 | }; |
332 | 284 | ||
333 | static inline void partition_sysfs_add_subdir(struct hd_struct *p) | 285 | static inline void partition_sysfs_add_subdir(struct hd_struct *p) |
334 | { | 286 | { |
335 | struct kobject *k; | 287 | struct kobject *k; |
336 | 288 | ||
337 | k = kobject_get(&p->kobj); | 289 | k = kobject_get(&p->dev.kobj); |
338 | p->holder_dir = kobject_add_dir(k, "holders"); | 290 | p->holder_dir = kobject_create_and_add("holders", k); |
339 | kobject_put(k); | 291 | kobject_put(k); |
340 | } | 292 | } |
341 | 293 | ||
@@ -343,15 +295,16 @@ static inline void disk_sysfs_add_subdirs(struct gendisk *disk) | |||
343 | { | 295 | { |
344 | struct kobject *k; | 296 | struct kobject *k; |
345 | 297 | ||
346 | k = kobject_get(&disk->kobj); | 298 | k = kobject_get(&disk->dev.kobj); |
347 | disk->holder_dir = kobject_add_dir(k, "holders"); | 299 | disk->holder_dir = kobject_create_and_add("holders", k); |
348 | disk->slave_dir = kobject_add_dir(k, "slaves"); | 300 | disk->slave_dir = kobject_create_and_add("slaves", k); |
349 | kobject_put(k); | 301 | kobject_put(k); |
350 | } | 302 | } |
351 | 303 | ||
352 | void delete_partition(struct gendisk *disk, int part) | 304 | void delete_partition(struct gendisk *disk, int part) |
353 | { | 305 | { |
354 | struct hd_struct *p = disk->part[part-1]; | 306 | struct hd_struct *p = disk->part[part-1]; |
307 | |||
355 | if (!p) | 308 | if (!p) |
356 | return; | 309 | return; |
357 | if (!p->nr_sects) | 310 | if (!p->nr_sects) |
@@ -361,113 +314,55 @@ void delete_partition(struct gendisk *disk, int part) | |||
361 | p->nr_sects = 0; | 314 | p->nr_sects = 0; |
362 | p->ios[0] = p->ios[1] = 0; | 315 | p->ios[0] = p->ios[1] = 0; |
363 | p->sectors[0] = p->sectors[1] = 0; | 316 | p->sectors[0] = p->sectors[1] = 0; |
364 | sysfs_remove_link(&p->kobj, "subsystem"); | 317 | kobject_put(p->holder_dir); |
365 | kobject_unregister(p->holder_dir); | 318 | device_del(&p->dev); |
366 | kobject_uevent(&p->kobj, KOBJ_REMOVE); | 319 | put_device(&p->dev); |
367 | kobject_del(&p->kobj); | ||
368 | kobject_put(&p->kobj); | ||
369 | } | 320 | } |
370 | 321 | ||
371 | void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) | 322 | void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) |
372 | { | 323 | { |
373 | struct hd_struct *p; | 324 | struct hd_struct *p; |
325 | int err; | ||
374 | 326 | ||
375 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 327 | p = kzalloc(sizeof(*p), GFP_KERNEL); |
376 | if (!p) | 328 | if (!p) |
377 | return; | 329 | return; |
378 | 330 | ||
379 | p->start_sect = start; | 331 | p->start_sect = start; |
380 | p->nr_sects = len; | 332 | p->nr_sects = len; |
381 | p->partno = part; | 333 | p->partno = part; |
382 | p->policy = disk->policy; | 334 | p->policy = disk->policy; |
383 | 335 | ||
384 | if (isdigit(disk->kobj.k_name[strlen(disk->kobj.k_name)-1])) | 336 | if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1])) |
385 | kobject_set_name(&p->kobj, "%sp%d", | 337 | snprintf(p->dev.bus_id, BUS_ID_SIZE, |
386 | kobject_name(&disk->kobj), part); | 338 | "%sp%d", disk->dev.bus_id, part); |
387 | else | 339 | else |
388 | kobject_set_name(&p->kobj, "%s%d", | 340 | snprintf(p->dev.bus_id, BUS_ID_SIZE, |
389 | kobject_name(&disk->kobj),part); | 341 | "%s%d", disk->dev.bus_id, part); |
390 | p->kobj.parent = &disk->kobj; | 342 | |
391 | p->kobj.ktype = &ktype_part; | 343 | device_initialize(&p->dev); |
392 | kobject_init(&p->kobj); | 344 | p->dev.devt = MKDEV(disk->major, disk->first_minor + part); |
393 | kobject_add(&p->kobj); | 345 | p->dev.class = &block_class; |
394 | if (!disk->part_uevent_suppress) | 346 | p->dev.type = &part_type; |
395 | kobject_uevent(&p->kobj, KOBJ_ADD); | 347 | p->dev.parent = &disk->dev; |
396 | sysfs_create_link(&p->kobj, &block_subsys.kobj, "subsystem"); | 348 | disk->part[part-1] = p; |
349 | |||
350 | /* delay uevent until 'holders' subdir is created */ | ||
351 | p->dev.uevent_suppress = 1; | ||
352 | device_add(&p->dev); | ||
353 | partition_sysfs_add_subdir(p); | ||
354 | p->dev.uevent_suppress = 0; | ||
397 | if (flags & ADDPART_FLAG_WHOLEDISK) { | 355 | if (flags & ADDPART_FLAG_WHOLEDISK) { |
398 | static struct attribute addpartattr = { | 356 | static struct attribute addpartattr = { |
399 | .name = "whole_disk", | 357 | .name = "whole_disk", |
400 | .mode = S_IRUSR | S_IRGRP | S_IROTH, | 358 | .mode = S_IRUSR | S_IRGRP | S_IROTH, |
401 | }; | 359 | }; |
402 | 360 | err = sysfs_create_file(&p->dev.kobj, &addpartattr); | |
403 | sysfs_create_file(&p->kobj, &addpartattr); | ||
404 | } | 361 | } |
405 | partition_sysfs_add_subdir(p); | ||
406 | disk->part[part-1] = p; | ||
407 | } | ||
408 | 362 | ||
409 | static char *make_block_name(struct gendisk *disk) | 363 | /* suppress uevent if the disk supresses it */ |
410 | { | 364 | if (!disk->dev.uevent_suppress) |
411 | char *name; | 365 | kobject_uevent(&p->dev.kobj, KOBJ_ADD); |
412 | static char *block_str = "block:"; | ||
413 | int size; | ||
414 | char *s; | ||
415 | |||
416 | size = strlen(block_str) + strlen(disk->disk_name) + 1; | ||
417 | name = kmalloc(size, GFP_KERNEL); | ||
418 | if (!name) | ||
419 | return NULL; | ||
420 | strcpy(name, block_str); | ||
421 | strcat(name, disk->disk_name); | ||
422 | /* ewww... some of these buggers have / in name... */ | ||
423 | s = strchr(name, '/'); | ||
424 | if (s) | ||
425 | *s = '!'; | ||
426 | return name; | ||
427 | } | ||
428 | |||
429 | static int disk_sysfs_symlinks(struct gendisk *disk) | ||
430 | { | ||
431 | struct device *target = get_device(disk->driverfs_dev); | ||
432 | int err; | ||
433 | char *disk_name = NULL; | ||
434 | |||
435 | if (target) { | ||
436 | disk_name = make_block_name(disk); | ||
437 | if (!disk_name) { | ||
438 | err = -ENOMEM; | ||
439 | goto err_out; | ||
440 | } | ||
441 | |||
442 | err = sysfs_create_link(&disk->kobj, &target->kobj, "device"); | ||
443 | if (err) | ||
444 | goto err_out_disk_name; | ||
445 | |||
446 | err = sysfs_create_link(&target->kobj, &disk->kobj, disk_name); | ||
447 | if (err) | ||
448 | goto err_out_dev_link; | ||
449 | } | ||
450 | |||
451 | err = sysfs_create_link(&disk->kobj, &block_subsys.kobj, | ||
452 | "subsystem"); | ||
453 | if (err) | ||
454 | goto err_out_disk_name_lnk; | ||
455 | |||
456 | kfree(disk_name); | ||
457 | |||
458 | return 0; | ||
459 | |||
460 | err_out_disk_name_lnk: | ||
461 | if (target) { | ||
462 | sysfs_remove_link(&target->kobj, disk_name); | ||
463 | err_out_dev_link: | ||
464 | sysfs_remove_link(&disk->kobj, "device"); | ||
465 | err_out_disk_name: | ||
466 | kfree(disk_name); | ||
467 | err_out: | ||
468 | put_device(target); | ||
469 | } | ||
470 | return err; | ||
471 | } | 366 | } |
472 | 367 | ||
473 | /* Not exported, helper to add_disk(). */ | 368 | /* Not exported, helper to add_disk(). */ |
@@ -479,19 +374,29 @@ void register_disk(struct gendisk *disk) | |||
479 | struct hd_struct *p; | 374 | struct hd_struct *p; |
480 | int err; | 375 | int err; |
481 | 376 | ||
482 | kobject_set_name(&disk->kobj, "%s", disk->disk_name); | 377 | disk->dev.parent = disk->driverfs_dev; |
483 | /* ewww... some of these buggers have / in name... */ | 378 | disk->dev.devt = MKDEV(disk->major, disk->first_minor); |
484 | s = strchr(disk->kobj.k_name, '/'); | 379 | |
380 | strlcpy(disk->dev.bus_id, disk->disk_name, KOBJ_NAME_LEN); | ||
381 | /* ewww... some of these buggers have / in the name... */ | ||
382 | s = strchr(disk->dev.bus_id, '/'); | ||
485 | if (s) | 383 | if (s) |
486 | *s = '!'; | 384 | *s = '!'; |
487 | if ((err = kobject_add(&disk->kobj))) | 385 | |
386 | /* delay uevents, until we scanned partition table */ | ||
387 | disk->dev.uevent_suppress = 1; | ||
388 | |||
389 | if (device_add(&disk->dev)) | ||
488 | return; | 390 | return; |
489 | err = disk_sysfs_symlinks(disk); | 391 | #ifndef CONFIG_SYSFS_DEPRECATED |
392 | err = sysfs_create_link(block_depr, &disk->dev.kobj, | ||
393 | kobject_name(&disk->dev.kobj)); | ||
490 | if (err) { | 394 | if (err) { |
491 | kobject_del(&disk->kobj); | 395 | device_del(&disk->dev); |
492 | return; | 396 | return; |
493 | } | 397 | } |
494 | disk_sysfs_add_subdirs(disk); | 398 | #endif |
399 | disk_sysfs_add_subdirs(disk); | ||
495 | 400 | ||
496 | /* No minors to use for partitions */ | 401 | /* No minors to use for partitions */ |
497 | if (disk->minors == 1) | 402 | if (disk->minors == 1) |
@@ -505,25 +410,23 @@ void register_disk(struct gendisk *disk) | |||
505 | if (!bdev) | 410 | if (!bdev) |
506 | goto exit; | 411 | goto exit; |
507 | 412 | ||
508 | /* scan partition table, but suppress uevents */ | ||
509 | bdev->bd_invalidated = 1; | 413 | bdev->bd_invalidated = 1; |
510 | disk->part_uevent_suppress = 1; | ||
511 | err = blkdev_get(bdev, FMODE_READ, 0); | 414 | err = blkdev_get(bdev, FMODE_READ, 0); |
512 | disk->part_uevent_suppress = 0; | ||
513 | if (err < 0) | 415 | if (err < 0) |
514 | goto exit; | 416 | goto exit; |
515 | blkdev_put(bdev); | 417 | blkdev_put(bdev); |
516 | 418 | ||
517 | exit: | 419 | exit: |
518 | /* announce disk after possible partitions are already created */ | 420 | /* announce disk after possible partitions are created */ |
519 | kobject_uevent(&disk->kobj, KOBJ_ADD); | 421 | disk->dev.uevent_suppress = 0; |
422 | kobject_uevent(&disk->dev.kobj, KOBJ_ADD); | ||
520 | 423 | ||
521 | /* announce possible partitions */ | 424 | /* announce possible partitions */ |
522 | for (i = 1; i < disk->minors; i++) { | 425 | for (i = 1; i < disk->minors; i++) { |
523 | p = disk->part[i-1]; | 426 | p = disk->part[i-1]; |
524 | if (!p || !p->nr_sects) | 427 | if (!p || !p->nr_sects) |
525 | continue; | 428 | continue; |
526 | kobject_uevent(&p->kobj, KOBJ_ADD); | 429 | kobject_uevent(&p->dev.kobj, KOBJ_ADD); |
527 | } | 430 | } |
528 | } | 431 | } |
529 | 432 | ||
@@ -602,19 +505,11 @@ void del_gendisk(struct gendisk *disk) | |||
602 | disk_stat_set_all(disk, 0); | 505 | disk_stat_set_all(disk, 0); |
603 | disk->stamp = 0; | 506 | disk->stamp = 0; |
604 | 507 | ||
605 | kobject_uevent(&disk->kobj, KOBJ_REMOVE); | 508 | kobject_put(disk->holder_dir); |
606 | kobject_unregister(disk->holder_dir); | 509 | kobject_put(disk->slave_dir); |
607 | kobject_unregister(disk->slave_dir); | 510 | disk->driverfs_dev = NULL; |
608 | if (disk->driverfs_dev) { | 511 | #ifndef CONFIG_SYSFS_DEPRECATED |
609 | char *disk_name = make_block_name(disk); | 512 | sysfs_remove_link(block_depr, disk->dev.bus_id); |
610 | sysfs_remove_link(&disk->kobj, "device"); | 513 | #endif |
611 | if (disk_name) { | 514 | device_del(&disk->dev); |
612 | sysfs_remove_link(&disk->driverfs_dev->kobj, disk_name); | ||
613 | kfree(disk_name); | ||
614 | } | ||
615 | put_device(disk->driverfs_dev); | ||
616 | disk->driverfs_dev = NULL; | ||
617 | } | ||
618 | sysfs_remove_link(&disk->kobj, "subsystem"); | ||
619 | kobject_del(&disk->kobj); | ||
620 | } | 515 | } |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 5be663e5dad1..b380313092bd 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -164,7 +164,7 @@ static inline char *task_state(struct task_struct *p, char *buffer) | |||
164 | ppid = pid_alive(p) ? | 164 | ppid = pid_alive(p) ? |
165 | task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; | 165 | task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; |
166 | tpid = pid_alive(p) && p->ptrace ? | 166 | tpid = pid_alive(p) && p->ptrace ? |
167 | task_ppid_nr_ns(rcu_dereference(p->parent), ns) : 0; | 167 | task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0; |
168 | buffer += sprintf(buffer, | 168 | buffer += sprintf(buffer, |
169 | "State:\t%s\n" | 169 | "State:\t%s\n" |
170 | "Tgid:\t%d\n" | 170 | "Tgid:\t%d\n" |
@@ -459,8 +459,8 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) | |||
459 | } | 459 | } |
460 | 460 | ||
461 | sid = task_session_nr_ns(task, ns); | 461 | sid = task_session_nr_ns(task, ns); |
462 | ppid = task_tgid_nr_ns(task->real_parent, ns); | ||
462 | pgid = task_pgrp_nr_ns(task, ns); | 463 | pgid = task_pgrp_nr_ns(task, ns); |
463 | ppid = task_ppid_nr_ns(task, ns); | ||
464 | 464 | ||
465 | unlock_task_sighand(task, &flags); | 465 | unlock_task_sighand(task, &flags); |
466 | } | 466 | } |
diff --git a/fs/proc/base.c b/fs/proc/base.c index e88ee1a0323a..9fa9708cc715 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -202,6 +202,26 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf | |||
202 | (task_is_stopped_or_traced(task)) && \ | 202 | (task_is_stopped_or_traced(task)) && \ |
203 | security_ptrace(current,task) == 0)) | 203 | security_ptrace(current,task) == 0)) |
204 | 204 | ||
205 | struct mm_struct *mm_for_maps(struct task_struct *task) | ||
206 | { | ||
207 | struct mm_struct *mm = get_task_mm(task); | ||
208 | if (!mm) | ||
209 | return NULL; | ||
210 | down_read(&mm->mmap_sem); | ||
211 | task_lock(task); | ||
212 | if (task->mm != mm) | ||
213 | goto out; | ||
214 | if (task->mm != current->mm && __ptrace_may_attach(task) < 0) | ||
215 | goto out; | ||
216 | task_unlock(task); | ||
217 | return mm; | ||
218 | out: | ||
219 | task_unlock(task); | ||
220 | up_read(&mm->mmap_sem); | ||
221 | mmput(mm); | ||
222 | return NULL; | ||
223 | } | ||
224 | |||
205 | static int proc_pid_cmdline(struct task_struct *task, char * buffer) | 225 | static int proc_pid_cmdline(struct task_struct *task, char * buffer) |
206 | { | 226 | { |
207 | int res = 0; | 227 | int res = 0; |
@@ -290,6 +310,77 @@ static int proc_pid_schedstat(struct task_struct *task, char *buffer) | |||
290 | } | 310 | } |
291 | #endif | 311 | #endif |
292 | 312 | ||
313 | #ifdef CONFIG_LATENCYTOP | ||
314 | static int lstats_show_proc(struct seq_file *m, void *v) | ||
315 | { | ||
316 | int i; | ||
317 | struct task_struct *task = m->private; | ||
318 | seq_puts(m, "Latency Top version : v0.1\n"); | ||
319 | |||
320 | for (i = 0; i < 32; i++) { | ||
321 | if (task->latency_record[i].backtrace[0]) { | ||
322 | int q; | ||
323 | seq_printf(m, "%i %li %li ", | ||
324 | task->latency_record[i].count, | ||
325 | task->latency_record[i].time, | ||
326 | task->latency_record[i].max); | ||
327 | for (q = 0; q < LT_BACKTRACEDEPTH; q++) { | ||
328 | char sym[KSYM_NAME_LEN]; | ||
329 | char *c; | ||
330 | if (!task->latency_record[i].backtrace[q]) | ||
331 | break; | ||
332 | if (task->latency_record[i].backtrace[q] == ULONG_MAX) | ||
333 | break; | ||
334 | sprint_symbol(sym, task->latency_record[i].backtrace[q]); | ||
335 | c = strchr(sym, '+'); | ||
336 | if (c) | ||
337 | *c = 0; | ||
338 | seq_printf(m, "%s ", sym); | ||
339 | } | ||
340 | seq_printf(m, "\n"); | ||
341 | } | ||
342 | |||
343 | } | ||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | static int lstats_open(struct inode *inode, struct file *file) | ||
348 | { | ||
349 | int ret; | ||
350 | struct seq_file *m; | ||
351 | struct task_struct *task = get_proc_task(inode); | ||
352 | |||
353 | ret = single_open(file, lstats_show_proc, NULL); | ||
354 | if (!ret) { | ||
355 | m = file->private_data; | ||
356 | m->private = task; | ||
357 | } | ||
358 | return ret; | ||
359 | } | ||
360 | |||
361 | static ssize_t lstats_write(struct file *file, const char __user *buf, | ||
362 | size_t count, loff_t *offs) | ||
363 | { | ||
364 | struct seq_file *m; | ||
365 | struct task_struct *task; | ||
366 | |||
367 | m = file->private_data; | ||
368 | task = m->private; | ||
369 | clear_all_latency_tracing(task); | ||
370 | |||
371 | return count; | ||
372 | } | ||
373 | |||
374 | static const struct file_operations proc_lstats_operations = { | ||
375 | .open = lstats_open, | ||
376 | .read = seq_read, | ||
377 | .write = lstats_write, | ||
378 | .llseek = seq_lseek, | ||
379 | .release = single_release, | ||
380 | }; | ||
381 | |||
382 | #endif | ||
383 | |||
293 | /* The badness from the OOM killer */ | 384 | /* The badness from the OOM killer */ |
294 | unsigned long badness(struct task_struct *p, unsigned long uptime); | 385 | unsigned long badness(struct task_struct *p, unsigned long uptime); |
295 | static int proc_oom_score(struct task_struct *task, char *buffer) | 386 | static int proc_oom_score(struct task_struct *task, char *buffer) |
@@ -1000,6 +1091,7 @@ static const struct file_operations proc_fault_inject_operations = { | |||
1000 | }; | 1091 | }; |
1001 | #endif | 1092 | #endif |
1002 | 1093 | ||
1094 | |||
1003 | #ifdef CONFIG_SCHED_DEBUG | 1095 | #ifdef CONFIG_SCHED_DEBUG |
1004 | /* | 1096 | /* |
1005 | * Print out various scheduling related per-task fields: | 1097 | * Print out various scheduling related per-task fields: |
@@ -2210,6 +2302,9 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2210 | #ifdef CONFIG_SCHEDSTATS | 2302 | #ifdef CONFIG_SCHEDSTATS |
2211 | INF("schedstat", S_IRUGO, pid_schedstat), | 2303 | INF("schedstat", S_IRUGO, pid_schedstat), |
2212 | #endif | 2304 | #endif |
2305 | #ifdef CONFIG_LATENCYTOP | ||
2306 | REG("latency", S_IRUGO, lstats), | ||
2307 | #endif | ||
2213 | #ifdef CONFIG_PROC_PID_CPUSET | 2308 | #ifdef CONFIG_PROC_PID_CPUSET |
2214 | REG("cpuset", S_IRUGO, cpuset), | 2309 | REG("cpuset", S_IRUGO, cpuset), |
2215 | #endif | 2310 | #endif |
@@ -2535,6 +2630,9 @@ static const struct pid_entry tid_base_stuff[] = { | |||
2535 | #ifdef CONFIG_SCHEDSTATS | 2630 | #ifdef CONFIG_SCHEDSTATS |
2536 | INF("schedstat", S_IRUGO, pid_schedstat), | 2631 | INF("schedstat", S_IRUGO, pid_schedstat), |
2537 | #endif | 2632 | #endif |
2633 | #ifdef CONFIG_LATENCYTOP | ||
2634 | REG("latency", S_IRUGO, lstats), | ||
2635 | #endif | ||
2538 | #ifdef CONFIG_PROC_PID_CPUSET | 2636 | #ifdef CONFIG_PROC_PID_CPUSET |
2539 | REG("cpuset", S_IRUGO, cpuset), | 2637 | REG("cpuset", S_IRUGO, cpuset), |
2540 | #endif | 2638 | #endif |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 8d49838e5554..6a2fe5187b62 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -374,16 +374,9 @@ static int proc_delete_dentry(struct dentry * dentry) | |||
374 | return 1; | 374 | return 1; |
375 | } | 375 | } |
376 | 376 | ||
377 | static int proc_revalidate_dentry(struct dentry *dentry, struct nameidata *nd) | ||
378 | { | ||
379 | d_drop(dentry); | ||
380 | return 0; | ||
381 | } | ||
382 | |||
383 | static struct dentry_operations proc_dentry_operations = | 377 | static struct dentry_operations proc_dentry_operations = |
384 | { | 378 | { |
385 | .d_delete = proc_delete_dentry, | 379 | .d_delete = proc_delete_dentry, |
386 | .d_revalidate = proc_revalidate_dentry, | ||
387 | }; | 380 | }; |
388 | 381 | ||
389 | /* | 382 | /* |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 1820eb2ef762..05b3e9006262 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -27,6 +27,8 @@ struct vmalloc_info { | |||
27 | unsigned long largest_chunk; | 27 | unsigned long largest_chunk; |
28 | }; | 28 | }; |
29 | 29 | ||
30 | extern struct mm_struct *mm_for_maps(struct task_struct *); | ||
31 | |||
30 | #ifdef CONFIG_MMU | 32 | #ifdef CONFIG_MMU |
31 | #define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) | 33 | #define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) |
32 | extern void get_vmalloc_info(struct vmalloc_info *vmi); | 34 | extern void get_vmalloc_info(struct vmalloc_info *vmi); |
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index e0d064e9764e..3462bfde89f6 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c | |||
@@ -410,7 +410,7 @@ static const struct file_operations proc_modules_operations = { | |||
410 | }; | 410 | }; |
411 | #endif | 411 | #endif |
412 | 412 | ||
413 | #ifdef CONFIG_SLAB | 413 | #ifdef CONFIG_SLABINFO |
414 | static int slabinfo_open(struct inode *inode, struct file *file) | 414 | static int slabinfo_open(struct inode *inode, struct file *file) |
415 | { | 415 | { |
416 | return seq_open(file, &slabinfo_op); | 416 | return seq_open(file, &slabinfo_op); |
@@ -728,7 +728,7 @@ void __init proc_misc_init(void) | |||
728 | #endif | 728 | #endif |
729 | create_seq_entry("stat", 0, &proc_stat_operations); | 729 | create_seq_entry("stat", 0, &proc_stat_operations); |
730 | create_seq_entry("interrupts", 0, &proc_interrupts_operations); | 730 | create_seq_entry("interrupts", 0, &proc_interrupts_operations); |
731 | #ifdef CONFIG_SLAB | 731 | #ifdef CONFIG_SLABINFO |
732 | create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); | 732 | create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); |
733 | #ifdef CONFIG_DEBUG_SLAB_LEAK | 733 | #ifdef CONFIG_DEBUG_SLAB_LEAK |
734 | create_seq_entry("slab_allocators", 0 ,&proc_slabstats_operations); | 734 | create_seq_entry("slab_allocators", 0 ,&proc_slabstats_operations); |
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 0afe21ee0607..4823c9677fac 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c | |||
@@ -22,10 +22,48 @@ | |||
22 | #include <linux/mount.h> | 22 | #include <linux/mount.h> |
23 | #include <linux/nsproxy.h> | 23 | #include <linux/nsproxy.h> |
24 | #include <net/net_namespace.h> | 24 | #include <net/net_namespace.h> |
25 | #include <linux/seq_file.h> | ||
25 | 26 | ||
26 | #include "internal.h" | 27 | #include "internal.h" |
27 | 28 | ||
28 | 29 | ||
30 | int seq_open_net(struct inode *ino, struct file *f, | ||
31 | const struct seq_operations *ops, int size) | ||
32 | { | ||
33 | struct net *net; | ||
34 | struct seq_net_private *p; | ||
35 | |||
36 | BUG_ON(size < sizeof(*p)); | ||
37 | |||
38 | net = get_proc_net(ino); | ||
39 | if (net == NULL) | ||
40 | return -ENXIO; | ||
41 | |||
42 | p = __seq_open_private(f, ops, size); | ||
43 | if (p == NULL) { | ||
44 | put_net(net); | ||
45 | return -ENOMEM; | ||
46 | } | ||
47 | p->net = net; | ||
48 | return 0; | ||
49 | } | ||
50 | EXPORT_SYMBOL_GPL(seq_open_net); | ||
51 | |||
52 | int seq_release_net(struct inode *ino, struct file *f) | ||
53 | { | ||
54 | struct seq_file *seq; | ||
55 | struct seq_net_private *p; | ||
56 | |||
57 | seq = f->private_data; | ||
58 | p = seq->private; | ||
59 | |||
60 | put_net(p->net); | ||
61 | seq_release_private(ino, f); | ||
62 | return 0; | ||
63 | } | ||
64 | EXPORT_SYMBOL_GPL(seq_release_net); | ||
65 | |||
66 | |||
29 | struct proc_dir_entry *proc_net_fops_create(struct net *net, | 67 | struct proc_dir_entry *proc_net_fops_create(struct net *net, |
30 | const char *name, mode_t mode, const struct file_operations *fops) | 68 | const char *name, mode_t mode, const struct file_operations *fops) |
31 | { | 69 | { |
@@ -58,6 +96,17 @@ static struct proc_dir_entry *proc_net_shadow(struct task_struct *task, | |||
58 | return task->nsproxy->net_ns->proc_net; | 96 | return task->nsproxy->net_ns->proc_net; |
59 | } | 97 | } |
60 | 98 | ||
99 | struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, | ||
100 | struct proc_dir_entry *parent) | ||
101 | { | ||
102 | struct proc_dir_entry *pde; | ||
103 | pde = proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); | ||
104 | if (pde != NULL) | ||
105 | pde->data = net; | ||
106 | return pde; | ||
107 | } | ||
108 | EXPORT_SYMBOL_GPL(proc_net_mkdir); | ||
109 | |||
61 | static __net_init int proc_net_ns_init(struct net *net) | 110 | static __net_init int proc_net_ns_init(struct net *net) |
62 | { | 111 | { |
63 | struct proc_dir_entry *root, *netd, *net_statd; | 112 | struct proc_dir_entry *root, *netd, *net_statd; |
@@ -69,18 +118,16 @@ static __net_init int proc_net_ns_init(struct net *net) | |||
69 | goto out; | 118 | goto out; |
70 | 119 | ||
71 | err = -EEXIST; | 120 | err = -EEXIST; |
72 | netd = proc_mkdir("net", root); | 121 | netd = proc_net_mkdir(net, "net", root); |
73 | if (!netd) | 122 | if (!netd) |
74 | goto free_root; | 123 | goto free_root; |
75 | 124 | ||
76 | err = -EEXIST; | 125 | err = -EEXIST; |
77 | net_statd = proc_mkdir("stat", netd); | 126 | net_statd = proc_net_mkdir(net, "stat", netd); |
78 | if (!net_statd) | 127 | if (!net_statd) |
79 | goto free_net; | 128 | goto free_net; |
80 | 129 | ||
81 | root->data = net; | 130 | root->data = net; |
82 | netd->data = net; | ||
83 | net_statd->data = net; | ||
84 | 131 | ||
85 | net->proc_net_root = root; | 132 | net->proc_net_root = root; |
86 | net->proc_net = netd; | 133 | net->proc_net = netd; |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index c24d81a5a040..8043a3eab52c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -397,12 +397,11 @@ static void *m_start(struct seq_file *m, loff_t *pos) | |||
397 | if (!priv->task) | 397 | if (!priv->task) |
398 | return NULL; | 398 | return NULL; |
399 | 399 | ||
400 | mm = get_task_mm(priv->task); | 400 | mm = mm_for_maps(priv->task); |
401 | if (!mm) | 401 | if (!mm) |
402 | return NULL; | 402 | return NULL; |
403 | 403 | ||
404 | priv->tail_vma = tail_vma = get_gate_vma(priv->task); | 404 | priv->tail_vma = tail_vma = get_gate_vma(priv->task); |
405 | down_read(&mm->mmap_sem); | ||
406 | 405 | ||
407 | /* Start with last addr hint */ | 406 | /* Start with last addr hint */ |
408 | if (last_addr && (vma = find_vma(mm, last_addr))) { | 407 | if (last_addr && (vma = find_vma(mm, last_addr))) { |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index d8b8c7183c24..1932c2ca3457 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
@@ -165,15 +165,13 @@ static void *m_start(struct seq_file *m, loff_t *pos) | |||
165 | if (!priv->task) | 165 | if (!priv->task) |
166 | return NULL; | 166 | return NULL; |
167 | 167 | ||
168 | mm = get_task_mm(priv->task); | 168 | mm = mm_for_maps(priv->task); |
169 | if (!mm) { | 169 | if (!mm) { |
170 | put_task_struct(priv->task); | 170 | put_task_struct(priv->task); |
171 | priv->task = NULL; | 171 | priv->task = NULL; |
172 | return NULL; | 172 | return NULL; |
173 | } | 173 | } |
174 | 174 | ||
175 | down_read(&mm->mmap_sem); | ||
176 | |||
177 | /* start from the Nth VMA */ | 175 | /* start from the Nth VMA */ |
178 | for (vml = mm->context.vmlist; vml; vml = vml->next) | 176 | for (vml = mm->context.vmlist; vml; vml = vml->next) |
179 | if (n-- == 0) | 177 | if (n-- == 0) |
diff --git a/fs/read_write.c b/fs/read_write.c index ea1f94cc722e..1c177f29e1b7 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -197,25 +197,27 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count | |||
197 | { | 197 | { |
198 | struct inode *inode; | 198 | struct inode *inode; |
199 | loff_t pos; | 199 | loff_t pos; |
200 | int retval = -EINVAL; | ||
200 | 201 | ||
201 | inode = file->f_path.dentry->d_inode; | 202 | inode = file->f_path.dentry->d_inode; |
202 | if (unlikely((ssize_t) count < 0)) | 203 | if (unlikely((ssize_t) count < 0)) |
203 | goto Einval; | 204 | return retval; |
204 | pos = *ppos; | 205 | pos = *ppos; |
205 | if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) | 206 | if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) |
206 | goto Einval; | 207 | return retval; |
207 | 208 | ||
208 | if (unlikely(inode->i_flock && mandatory_lock(inode))) { | 209 | if (unlikely(inode->i_flock && mandatory_lock(inode))) { |
209 | int retval = locks_mandatory_area( | 210 | retval = locks_mandatory_area( |
210 | read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, | 211 | read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, |
211 | inode, file, pos, count); | 212 | inode, file, pos, count); |
212 | if (retval < 0) | 213 | if (retval < 0) |
213 | return retval; | 214 | return retval; |
214 | } | 215 | } |
216 | retval = security_file_permission(file, | ||
217 | read_write == READ ? MAY_READ : MAY_WRITE); | ||
218 | if (retval) | ||
219 | return retval; | ||
215 | return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; | 220 | return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; |
216 | |||
217 | Einval: | ||
218 | return -EINVAL; | ||
219 | } | 221 | } |
220 | 222 | ||
221 | static void wait_on_retry_sync_kiocb(struct kiocb *iocb) | 223 | static void wait_on_retry_sync_kiocb(struct kiocb *iocb) |
@@ -267,18 +269,15 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) | |||
267 | ret = rw_verify_area(READ, file, pos, count); | 269 | ret = rw_verify_area(READ, file, pos, count); |
268 | if (ret >= 0) { | 270 | if (ret >= 0) { |
269 | count = ret; | 271 | count = ret; |
270 | ret = security_file_permission (file, MAY_READ); | 272 | if (file->f_op->read) |
271 | if (!ret) { | 273 | ret = file->f_op->read(file, buf, count, pos); |
272 | if (file->f_op->read) | 274 | else |
273 | ret = file->f_op->read(file, buf, count, pos); | 275 | ret = do_sync_read(file, buf, count, pos); |
274 | else | 276 | if (ret > 0) { |
275 | ret = do_sync_read(file, buf, count, pos); | 277 | fsnotify_access(file->f_path.dentry); |
276 | if (ret > 0) { | 278 | add_rchar(current, ret); |
277 | fsnotify_access(file->f_path.dentry); | ||
278 | add_rchar(current, ret); | ||
279 | } | ||
280 | inc_syscr(current); | ||
281 | } | 279 | } |
280 | inc_syscr(current); | ||
282 | } | 281 | } |
283 | 282 | ||
284 | return ret; | 283 | return ret; |
@@ -325,18 +324,15 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ | |||
325 | ret = rw_verify_area(WRITE, file, pos, count); | 324 | ret = rw_verify_area(WRITE, file, pos, count); |
326 | if (ret >= 0) { | 325 | if (ret >= 0) { |
327 | count = ret; | 326 | count = ret; |
328 | ret = security_file_permission (file, MAY_WRITE); | 327 | if (file->f_op->write) |
329 | if (!ret) { | 328 | ret = file->f_op->write(file, buf, count, pos); |
330 | if (file->f_op->write) | 329 | else |
331 | ret = file->f_op->write(file, buf, count, pos); | 330 | ret = do_sync_write(file, buf, count, pos); |
332 | else | 331 | if (ret > 0) { |
333 | ret = do_sync_write(file, buf, count, pos); | 332 | fsnotify_modify(file->f_path.dentry); |
334 | if (ret > 0) { | 333 | add_wchar(current, ret); |
335 | fsnotify_modify(file->f_path.dentry); | ||
336 | add_wchar(current, ret); | ||
337 | } | ||
338 | inc_syscw(current); | ||
339 | } | 334 | } |
335 | inc_syscw(current); | ||
340 | } | 336 | } |
341 | 337 | ||
342 | return ret; | 338 | return ret; |
@@ -450,6 +446,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) | |||
450 | } | 446 | } |
451 | return seg; | 447 | return seg; |
452 | } | 448 | } |
449 | EXPORT_SYMBOL(iov_shorten); | ||
453 | 450 | ||
454 | ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, | 451 | ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, |
455 | unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) | 452 | unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) |
@@ -603,9 +600,6 @@ static ssize_t do_readv_writev(int type, struct file *file, | |||
603 | ret = rw_verify_area(type, file, pos, tot_len); | 600 | ret = rw_verify_area(type, file, pos, tot_len); |
604 | if (ret < 0) | 601 | if (ret < 0) |
605 | goto out; | 602 | goto out; |
606 | ret = security_file_permission(file, type == READ ? MAY_READ : MAY_WRITE); | ||
607 | if (ret) | ||
608 | goto out; | ||
609 | 603 | ||
610 | fnv = NULL; | 604 | fnv = NULL; |
611 | if (type == READ) { | 605 | if (type == READ) { |
@@ -737,10 +731,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
737 | goto fput_in; | 731 | goto fput_in; |
738 | count = retval; | 732 | count = retval; |
739 | 733 | ||
740 | retval = security_file_permission (in_file, MAY_READ); | ||
741 | if (retval) | ||
742 | goto fput_in; | ||
743 | |||
744 | /* | 734 | /* |
745 | * Get output file, and verify that it is ok.. | 735 | * Get output file, and verify that it is ok.. |
746 | */ | 736 | */ |
@@ -759,10 +749,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
759 | goto fput_out; | 749 | goto fput_out; |
760 | count = retval; | 750 | count = retval; |
761 | 751 | ||
762 | retval = security_file_permission (out_file, MAY_WRITE); | ||
763 | if (retval) | ||
764 | goto fput_out; | ||
765 | |||
766 | if (!max) | 752 | if (!max) |
767 | max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); | 753 | max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); |
768 | 754 | ||
diff --git a/fs/smbfs/Makefile b/fs/smbfs/Makefile index 6673ee82cb4c..4faf8c4722c3 100644 --- a/fs/smbfs/Makefile +++ b/fs/smbfs/Makefile | |||
@@ -16,23 +16,3 @@ EXTRA_CFLAGS += -DSMBFS_PARANOIA | |||
16 | #EXTRA_CFLAGS += -DDEBUG_SMB_TIMESTAMP | 16 | #EXTRA_CFLAGS += -DDEBUG_SMB_TIMESTAMP |
17 | #EXTRA_CFLAGS += -Werror | 17 | #EXTRA_CFLAGS += -Werror |
18 | 18 | ||
19 | # | ||
20 | # Maintainer rules | ||
21 | # | ||
22 | |||
23 | # getopt.c not included. It is intentionally separate | ||
24 | SRC = proc.c dir.c cache.c sock.c inode.c file.c ioctl.c smbiod.c request.c \ | ||
25 | symlink.c | ||
26 | |||
27 | proto: | ||
28 | -rm -f proto.h | ||
29 | @echo > proto2.h "/*" | ||
30 | @echo >> proto2.h " * Autogenerated with cproto on: " `date` | ||
31 | @echo >> proto2.h " */" | ||
32 | @echo >> proto2.h "" | ||
33 | @echo >> proto2.h "struct smb_request;" | ||
34 | @echo >> proto2.h "struct sock;" | ||
35 | @echo >> proto2.h "struct statfs;" | ||
36 | @echo >> proto2.h "" | ||
37 | cproto -E "gcc -E" -e -v -I $(TOPDIR)/include -DMAKING_PROTO -D__KERNEL__ $(SRC) >> proto2.h | ||
38 | mv proto2.h proto.h | ||
diff --git a/fs/splice.c b/fs/splice.c index 6bdcb6107bc3..1577a7391d23 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -254,11 +254,16 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, | |||
254 | } | 254 | } |
255 | 255 | ||
256 | while (page_nr < spd_pages) | 256 | while (page_nr < spd_pages) |
257 | page_cache_release(spd->pages[page_nr++]); | 257 | spd->spd_release(spd, page_nr++); |
258 | 258 | ||
259 | return ret; | 259 | return ret; |
260 | } | 260 | } |
261 | 261 | ||
262 | static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) | ||
263 | { | ||
264 | page_cache_release(spd->pages[i]); | ||
265 | } | ||
266 | |||
262 | static int | 267 | static int |
263 | __generic_file_splice_read(struct file *in, loff_t *ppos, | 268 | __generic_file_splice_read(struct file *in, loff_t *ppos, |
264 | struct pipe_inode_info *pipe, size_t len, | 269 | struct pipe_inode_info *pipe, size_t len, |
@@ -277,6 +282,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
277 | .partial = partial, | 282 | .partial = partial, |
278 | .flags = flags, | 283 | .flags = flags, |
279 | .ops = &page_cache_pipe_buf_ops, | 284 | .ops = &page_cache_pipe_buf_ops, |
285 | .spd_release = spd_release_page, | ||
280 | }; | 286 | }; |
281 | 287 | ||
282 | index = *ppos >> PAGE_CACHE_SHIFT; | 288 | index = *ppos >> PAGE_CACHE_SHIFT; |
@@ -908,10 +914,6 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, | |||
908 | if (unlikely(ret < 0)) | 914 | if (unlikely(ret < 0)) |
909 | return ret; | 915 | return ret; |
910 | 916 | ||
911 | ret = security_file_permission(out, MAY_WRITE); | ||
912 | if (unlikely(ret < 0)) | ||
913 | return ret; | ||
914 | |||
915 | return out->f_op->splice_write(pipe, out, ppos, len, flags); | 917 | return out->f_op->splice_write(pipe, out, ppos, len, flags); |
916 | } | 918 | } |
917 | 919 | ||
@@ -934,10 +936,6 @@ static long do_splice_to(struct file *in, loff_t *ppos, | |||
934 | if (unlikely(ret < 0)) | 936 | if (unlikely(ret < 0)) |
935 | return ret; | 937 | return ret; |
936 | 938 | ||
937 | ret = security_file_permission(in, MAY_READ); | ||
938 | if (unlikely(ret < 0)) | ||
939 | return ret; | ||
940 | |||
941 | return in->f_op->splice_read(in, ppos, pipe, len, flags); | 939 | return in->f_op->splice_read(in, ppos, pipe, len, flags); |
942 | } | 940 | } |
943 | 941 | ||
@@ -1033,7 +1031,11 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, | |||
1033 | goto out_release; | 1031 | goto out_release; |
1034 | } | 1032 | } |
1035 | 1033 | ||
1034 | done: | ||
1036 | pipe->nrbufs = pipe->curbuf = 0; | 1035 | pipe->nrbufs = pipe->curbuf = 0; |
1036 | if (bytes > 0) | ||
1037 | file_accessed(in); | ||
1038 | |||
1037 | return bytes; | 1039 | return bytes; |
1038 | 1040 | ||
1039 | out_release: | 1041 | out_release: |
@@ -1049,16 +1051,11 @@ out_release: | |||
1049 | buf->ops = NULL; | 1051 | buf->ops = NULL; |
1050 | } | 1052 | } |
1051 | } | 1053 | } |
1052 | pipe->nrbufs = pipe->curbuf = 0; | ||
1053 | |||
1054 | /* | ||
1055 | * If we transferred some data, return the number of bytes: | ||
1056 | */ | ||
1057 | if (bytes > 0) | ||
1058 | return bytes; | ||
1059 | 1054 | ||
1060 | return ret; | 1055 | if (!bytes) |
1056 | bytes = ret; | ||
1061 | 1057 | ||
1058 | goto done; | ||
1062 | } | 1059 | } |
1063 | EXPORT_SYMBOL(splice_direct_to_actor); | 1060 | EXPORT_SYMBOL(splice_direct_to_actor); |
1064 | 1061 | ||
@@ -1440,6 +1437,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, | |||
1440 | .partial = partial, | 1437 | .partial = partial, |
1441 | .flags = flags, | 1438 | .flags = flags, |
1442 | .ops = &user_page_pipe_buf_ops, | 1439 | .ops = &user_page_pipe_buf_ops, |
1440 | .spd_release = spd_release_page, | ||
1443 | }; | 1441 | }; |
1444 | 1442 | ||
1445 | pipe = pipe_info(file->f_path.dentry->d_inode); | 1443 | pipe = pipe_info(file->f_path.dentry->d_inode); |
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 337162935d21..4948d9bc405d 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c | |||
@@ -440,7 +440,7 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) | |||
440 | /** | 440 | /** |
441 | * sysfs_remove_one - remove sysfs_dirent from parent | 441 | * sysfs_remove_one - remove sysfs_dirent from parent |
442 | * @acxt: addrm context to use | 442 | * @acxt: addrm context to use |
443 | * @sd: sysfs_dirent to be added | 443 | * @sd: sysfs_dirent to be removed |
444 | * | 444 | * |
445 | * Mark @sd removed and drop nlink of parent inode if @sd is a | 445 | * Mark @sd removed and drop nlink of parent inode if @sd is a |
446 | * directory. @sd is unlinked from the children list. | 446 | * directory. @sd is unlinked from the children list. |
@@ -678,8 +678,10 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, | |||
678 | sd = sysfs_find_dirent(parent_sd, dentry->d_name.name); | 678 | sd = sysfs_find_dirent(parent_sd, dentry->d_name.name); |
679 | 679 | ||
680 | /* no such entry */ | 680 | /* no such entry */ |
681 | if (!sd) | 681 | if (!sd) { |
682 | ret = ERR_PTR(-ENOENT); | ||
682 | goto out_unlock; | 683 | goto out_unlock; |
684 | } | ||
683 | 685 | ||
684 | /* attach dentry and inode */ | 686 | /* attach dentry and inode */ |
685 | inode = sysfs_get_inode(sd); | 687 | inode = sysfs_get_inode(sd); |
@@ -781,6 +783,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) | |||
781 | old_dentry = sysfs_get_dentry(sd); | 783 | old_dentry = sysfs_get_dentry(sd); |
782 | if (IS_ERR(old_dentry)) { | 784 | if (IS_ERR(old_dentry)) { |
783 | error = PTR_ERR(old_dentry); | 785 | error = PTR_ERR(old_dentry); |
786 | old_dentry = NULL; | ||
784 | goto out; | 787 | goto out; |
785 | } | 788 | } |
786 | 789 | ||
@@ -848,6 +851,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) | |||
848 | old_dentry = sysfs_get_dentry(sd); | 851 | old_dentry = sysfs_get_dentry(sd); |
849 | if (IS_ERR(old_dentry)) { | 852 | if (IS_ERR(old_dentry)) { |
850 | error = PTR_ERR(old_dentry); | 853 | error = PTR_ERR(old_dentry); |
854 | old_dentry = NULL; | ||
851 | goto out; | 855 | goto out; |
852 | } | 856 | } |
853 | old_parent = old_dentry->d_parent; | 857 | old_parent = old_dentry->d_parent; |
@@ -855,6 +859,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) | |||
855 | new_parent = sysfs_get_dentry(new_parent_sd); | 859 | new_parent = sysfs_get_dentry(new_parent_sd); |
856 | if (IS_ERR(new_parent)) { | 860 | if (IS_ERR(new_parent)) { |
857 | error = PTR_ERR(new_parent); | 861 | error = PTR_ERR(new_parent); |
862 | new_parent = NULL; | ||
858 | goto out; | 863 | goto out; |
859 | } | 864 | } |
860 | 865 | ||
@@ -878,7 +883,6 @@ again: | |||
878 | error = 0; | 883 | error = 0; |
879 | d_add(new_dentry, NULL); | 884 | d_add(new_dentry, NULL); |
880 | d_move(old_dentry, new_dentry); | 885 | d_move(old_dentry, new_dentry); |
881 | dput(new_dentry); | ||
882 | 886 | ||
883 | /* Remove from old parent's list and insert into new parent's list. */ | 887 | /* Remove from old parent's list and insert into new parent's list. */ |
884 | sysfs_unlink_sibling(sd); | 888 | sysfs_unlink_sibling(sd); |
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 4045bdcc4b33..a271c87c4472 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -20,43 +20,6 @@ | |||
20 | 20 | ||
21 | #include "sysfs.h" | 21 | #include "sysfs.h" |
22 | 22 | ||
23 | #define to_sattr(a) container_of(a,struct subsys_attribute, attr) | ||
24 | |||
25 | /* | ||
26 | * Subsystem file operations. | ||
27 | * These operations allow subsystems to have files that can be | ||
28 | * read/written. | ||
29 | */ | ||
30 | static ssize_t | ||
31 | subsys_attr_show(struct kobject * kobj, struct attribute * attr, char * page) | ||
32 | { | ||
33 | struct kset *kset = to_kset(kobj); | ||
34 | struct subsys_attribute * sattr = to_sattr(attr); | ||
35 | ssize_t ret = -EIO; | ||
36 | |||
37 | if (sattr->show) | ||
38 | ret = sattr->show(kset, page); | ||
39 | return ret; | ||
40 | } | ||
41 | |||
42 | static ssize_t | ||
43 | subsys_attr_store(struct kobject * kobj, struct attribute * attr, | ||
44 | const char * page, size_t count) | ||
45 | { | ||
46 | struct kset *kset = to_kset(kobj); | ||
47 | struct subsys_attribute * sattr = to_sattr(attr); | ||
48 | ssize_t ret = -EIO; | ||
49 | |||
50 | if (sattr->store) | ||
51 | ret = sattr->store(kset, page, count); | ||
52 | return ret; | ||
53 | } | ||
54 | |||
55 | static struct sysfs_ops subsys_sysfs_ops = { | ||
56 | .show = subsys_attr_show, | ||
57 | .store = subsys_attr_store, | ||
58 | }; | ||
59 | |||
60 | /* | 23 | /* |
61 | * There's one sysfs_buffer for each open file and one | 24 | * There's one sysfs_buffer for each open file and one |
62 | * sysfs_open_dirent for each sysfs_dirent with one or more open | 25 | * sysfs_open_dirent for each sysfs_dirent with one or more open |
@@ -66,7 +29,7 @@ static struct sysfs_ops subsys_sysfs_ops = { | |||
66 | * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open | 29 | * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open |
67 | * is protected by sysfs_open_dirent_lock. | 30 | * is protected by sysfs_open_dirent_lock. |
68 | */ | 31 | */ |
69 | static spinlock_t sysfs_open_dirent_lock = SPIN_LOCK_UNLOCKED; | 32 | static DEFINE_SPINLOCK(sysfs_open_dirent_lock); |
70 | 33 | ||
71 | struct sysfs_open_dirent { | 34 | struct sysfs_open_dirent { |
72 | atomic_t refcnt; | 35 | atomic_t refcnt; |
@@ -354,31 +317,23 @@ static int sysfs_open_file(struct inode *inode, struct file *file) | |||
354 | { | 317 | { |
355 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; | 318 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; |
356 | struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; | 319 | struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; |
357 | struct sysfs_buffer * buffer; | 320 | struct sysfs_buffer *buffer; |
358 | struct sysfs_ops * ops = NULL; | 321 | struct sysfs_ops *ops; |
359 | int error; | 322 | int error = -EACCES; |
360 | 323 | ||
361 | /* need attr_sd for attr and ops, its parent for kobj */ | 324 | /* need attr_sd for attr and ops, its parent for kobj */ |
362 | if (!sysfs_get_active_two(attr_sd)) | 325 | if (!sysfs_get_active_two(attr_sd)) |
363 | return -ENODEV; | 326 | return -ENODEV; |
364 | 327 | ||
365 | /* if the kobject has no ktype, then we assume that it is a subsystem | 328 | /* every kobject with an attribute needs a ktype assigned */ |
366 | * itself, and use ops for it. | 329 | if (kobj->ktype && kobj->ktype->sysfs_ops) |
367 | */ | ||
368 | if (kobj->kset && kobj->kset->ktype) | ||
369 | ops = kobj->kset->ktype->sysfs_ops; | ||
370 | else if (kobj->ktype) | ||
371 | ops = kobj->ktype->sysfs_ops; | 330 | ops = kobj->ktype->sysfs_ops; |
372 | else | 331 | else { |
373 | ops = &subsys_sysfs_ops; | 332 | printk(KERN_ERR "missing sysfs attribute operations for " |
374 | 333 | "kobject: %s\n", kobject_name(kobj)); | |
375 | error = -EACCES; | 334 | WARN_ON(1); |
376 | |||
377 | /* No sysfs operations, either from having no subsystem, | ||
378 | * or the subsystem have no operations. | ||
379 | */ | ||
380 | if (!ops) | ||
381 | goto err_out; | 335 | goto err_out; |
336 | } | ||
382 | 337 | ||
383 | /* File needs write support. | 338 | /* File needs write support. |
384 | * The inode's perms must say it's ok, | 339 | * The inode's perms must say it's ok, |
@@ -568,7 +523,11 @@ int sysfs_add_file_to_group(struct kobject *kobj, | |||
568 | struct sysfs_dirent *dir_sd; | 523 | struct sysfs_dirent *dir_sd; |
569 | int error; | 524 | int error; |
570 | 525 | ||
571 | dir_sd = sysfs_get_dirent(kobj->sd, group); | 526 | if (group) |
527 | dir_sd = sysfs_get_dirent(kobj->sd, group); | ||
528 | else | ||
529 | dir_sd = sysfs_get(kobj->sd); | ||
530 | |||
572 | if (!dir_sd) | 531 | if (!dir_sd) |
573 | return -ENOENT; | 532 | return -ENOENT; |
574 | 533 | ||
@@ -656,7 +615,10 @@ void sysfs_remove_file_from_group(struct kobject *kobj, | |||
656 | { | 615 | { |
657 | struct sysfs_dirent *dir_sd; | 616 | struct sysfs_dirent *dir_sd; |
658 | 617 | ||
659 | dir_sd = sysfs_get_dirent(kobj->sd, group); | 618 | if (group) |
619 | dir_sd = sysfs_get_dirent(kobj->sd, group); | ||
620 | else | ||
621 | dir_sd = sysfs_get(kobj->sd); | ||
660 | if (dir_sd) { | 622 | if (dir_sd) { |
661 | sysfs_hash_and_remove(dir_sd, attr->name); | 623 | sysfs_hash_and_remove(dir_sd, attr->name); |
662 | sysfs_put(dir_sd); | 624 | sysfs_put(dir_sd); |
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index d1972374655a..0871c3dadce1 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c | |||
@@ -16,25 +16,31 @@ | |||
16 | #include "sysfs.h" | 16 | #include "sysfs.h" |
17 | 17 | ||
18 | 18 | ||
19 | static void remove_files(struct sysfs_dirent *dir_sd, | 19 | static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, |
20 | const struct attribute_group *grp) | 20 | const struct attribute_group *grp) |
21 | { | 21 | { |
22 | struct attribute *const* attr; | 22 | struct attribute *const* attr; |
23 | int i; | ||
23 | 24 | ||
24 | for (attr = grp->attrs; *attr; attr++) | 25 | for (i = 0, attr = grp->attrs; *attr; i++, attr++) |
25 | sysfs_hash_and_remove(dir_sd, (*attr)->name); | 26 | if (!grp->is_visible || |
27 | grp->is_visible(kobj, *attr, i)) | ||
28 | sysfs_hash_and_remove(dir_sd, (*attr)->name); | ||
26 | } | 29 | } |
27 | 30 | ||
28 | static int create_files(struct sysfs_dirent *dir_sd, | 31 | static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, |
29 | const struct attribute_group *grp) | 32 | const struct attribute_group *grp) |
30 | { | 33 | { |
31 | struct attribute *const* attr; | 34 | struct attribute *const* attr; |
32 | int error = 0; | 35 | int error = 0, i; |
33 | 36 | ||
34 | for (attr = grp->attrs; *attr && !error; attr++) | 37 | for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) |
35 | error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR); | 38 | if (!grp->is_visible || |
39 | grp->is_visible(kobj, *attr, i)) | ||
40 | error |= | ||
41 | sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR); | ||
36 | if (error) | 42 | if (error) |
37 | remove_files(dir_sd, grp); | 43 | remove_files(dir_sd, kobj, grp); |
38 | return error; | 44 | return error; |
39 | } | 45 | } |
40 | 46 | ||
@@ -54,7 +60,7 @@ int sysfs_create_group(struct kobject * kobj, | |||
54 | } else | 60 | } else |
55 | sd = kobj->sd; | 61 | sd = kobj->sd; |
56 | sysfs_get(sd); | 62 | sysfs_get(sd); |
57 | error = create_files(sd, grp); | 63 | error = create_files(sd, kobj, grp); |
58 | if (error) { | 64 | if (error) { |
59 | if (grp->name) | 65 | if (grp->name) |
60 | sysfs_remove_subdir(sd); | 66 | sysfs_remove_subdir(sd); |
@@ -75,7 +81,7 @@ void sysfs_remove_group(struct kobject * kobj, | |||
75 | } else | 81 | } else |
76 | sd = sysfs_get(dir_sd); | 82 | sd = sysfs_get(dir_sd); |
77 | 83 | ||
78 | remove_files(sd, grp); | 84 | remove_files(sd, kobj, grp); |
79 | if (grp->name) | 85 | if (grp->name) |
80 | sysfs_remove_subdir(sd); | 86 | sysfs_remove_subdir(sd); |
81 | 87 | ||
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 3eac20c63c41..5f66c4466151 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c | |||
@@ -19,39 +19,6 @@ | |||
19 | 19 | ||
20 | #include "sysfs.h" | 20 | #include "sysfs.h" |
21 | 21 | ||
22 | static int object_depth(struct sysfs_dirent *sd) | ||
23 | { | ||
24 | int depth = 0; | ||
25 | |||
26 | for (; sd->s_parent; sd = sd->s_parent) | ||
27 | depth++; | ||
28 | |||
29 | return depth; | ||
30 | } | ||
31 | |||
32 | static int object_path_length(struct sysfs_dirent * sd) | ||
33 | { | ||
34 | int length = 1; | ||
35 | |||
36 | for (; sd->s_parent; sd = sd->s_parent) | ||
37 | length += strlen(sd->s_name) + 1; | ||
38 | |||
39 | return length; | ||
40 | } | ||
41 | |||
42 | static void fill_object_path(struct sysfs_dirent *sd, char *buffer, int length) | ||
43 | { | ||
44 | --length; | ||
45 | for (; sd->s_parent; sd = sd->s_parent) { | ||
46 | int cur = strlen(sd->s_name); | ||
47 | |||
48 | /* back up enough to print this bus id with '/' */ | ||
49 | length -= cur; | ||
50 | strncpy(buffer + length, sd->s_name, cur); | ||
51 | *(buffer + --length) = '/'; | ||
52 | } | ||
53 | } | ||
54 | |||
55 | /** | 22 | /** |
56 | * sysfs_create_link - create symlink between two objects. | 23 | * sysfs_create_link - create symlink between two objects. |
57 | * @kobj: object whose directory we're creating the link in. | 24 | * @kobj: object whose directory we're creating the link in. |
@@ -112,7 +79,6 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char | |||
112 | return error; | 79 | return error; |
113 | } | 80 | } |
114 | 81 | ||
115 | |||
116 | /** | 82 | /** |
117 | * sysfs_remove_link - remove symlink in object's directory. | 83 | * sysfs_remove_link - remove symlink in object's directory. |
118 | * @kobj: object we're acting for. | 84 | * @kobj: object we're acting for. |
@@ -124,24 +90,54 @@ void sysfs_remove_link(struct kobject * kobj, const char * name) | |||
124 | sysfs_hash_and_remove(kobj->sd, name); | 90 | sysfs_hash_and_remove(kobj->sd, name); |
125 | } | 91 | } |
126 | 92 | ||
127 | static int sysfs_get_target_path(struct sysfs_dirent * parent_sd, | 93 | static int sysfs_get_target_path(struct sysfs_dirent *parent_sd, |
128 | struct sysfs_dirent * target_sd, char *path) | 94 | struct sysfs_dirent *target_sd, char *path) |
129 | { | 95 | { |
130 | char * s; | 96 | struct sysfs_dirent *base, *sd; |
131 | int depth, size; | 97 | char *s = path; |
98 | int len = 0; | ||
99 | |||
100 | /* go up to the root, stop at the base */ | ||
101 | base = parent_sd; | ||
102 | while (base->s_parent) { | ||
103 | sd = target_sd->s_parent; | ||
104 | while (sd->s_parent && base != sd) | ||
105 | sd = sd->s_parent; | ||
106 | |||
107 | if (base == sd) | ||
108 | break; | ||
109 | |||
110 | strcpy(s, "../"); | ||
111 | s += 3; | ||
112 | base = base->s_parent; | ||
113 | } | ||
114 | |||
115 | /* determine end of target string for reverse fillup */ | ||
116 | sd = target_sd; | ||
117 | while (sd->s_parent && sd != base) { | ||
118 | len += strlen(sd->s_name) + 1; | ||
119 | sd = sd->s_parent; | ||
120 | } | ||
132 | 121 | ||
133 | depth = object_depth(parent_sd); | 122 | /* check limits */ |
134 | size = object_path_length(target_sd) + depth * 3 - 1; | 123 | if (len < 2) |
135 | if (size > PATH_MAX) | 124 | return -EINVAL; |
125 | len--; | ||
126 | if ((s - path) + len > PATH_MAX) | ||
136 | return -ENAMETOOLONG; | 127 | return -ENAMETOOLONG; |
137 | 128 | ||
138 | pr_debug("%s: depth = %d, size = %d\n", __FUNCTION__, depth, size); | 129 | /* reverse fillup of target string from target to base */ |
130 | sd = target_sd; | ||
131 | while (sd->s_parent && sd != base) { | ||
132 | int slen = strlen(sd->s_name); | ||
139 | 133 | ||
140 | for (s = path; depth--; s += 3) | 134 | len -= slen; |
141 | strcpy(s,"../"); | 135 | strncpy(s + len, sd->s_name, slen); |
136 | if (len) | ||
137 | s[--len] = '/'; | ||
142 | 138 | ||
143 | fill_object_path(target_sd, path, size); | 139 | sd = sd->s_parent; |
144 | pr_debug("%s: path = '%s'\n", __FUNCTION__, path); | 140 | } |
145 | 141 | ||
146 | return 0; | 142 | return 0; |
147 | } | 143 | } |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index b9c8589e05c2..a49dd8d4b069 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -725,15 +725,15 @@ xfs_buf_associate_memory( | |||
725 | { | 725 | { |
726 | int rval; | 726 | int rval; |
727 | int i = 0; | 727 | int i = 0; |
728 | size_t ptr; | 728 | unsigned long pageaddr; |
729 | size_t end, end_cur; | 729 | unsigned long offset; |
730 | off_t offset; | 730 | size_t buflen; |
731 | int page_count; | 731 | int page_count; |
732 | 732 | ||
733 | page_count = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT; | 733 | pageaddr = (unsigned long)mem & PAGE_CACHE_MASK; |
734 | offset = (off_t) mem - ((off_t)mem & PAGE_CACHE_MASK); | 734 | offset = (unsigned long)mem - pageaddr; |
735 | if (offset && (len > PAGE_CACHE_SIZE)) | 735 | buflen = PAGE_CACHE_ALIGN(len + offset); |
736 | page_count++; | 736 | page_count = buflen >> PAGE_CACHE_SHIFT; |
737 | 737 | ||
738 | /* Free any previous set of page pointers */ | 738 | /* Free any previous set of page pointers */ |
739 | if (bp->b_pages) | 739 | if (bp->b_pages) |
@@ -747,22 +747,15 @@ xfs_buf_associate_memory( | |||
747 | return rval; | 747 | return rval; |
748 | 748 | ||
749 | bp->b_offset = offset; | 749 | bp->b_offset = offset; |
750 | ptr = (size_t) mem & PAGE_CACHE_MASK; | 750 | |
751 | end = PAGE_CACHE_ALIGN((size_t) mem + len); | 751 | for (i = 0; i < bp->b_page_count; i++) { |
752 | end_cur = end; | 752 | bp->b_pages[i] = mem_to_page((void *)pageaddr); |
753 | /* set up first page */ | 753 | pageaddr += PAGE_CACHE_SIZE; |
754 | bp->b_pages[0] = mem_to_page(mem); | ||
755 | |||
756 | ptr += PAGE_CACHE_SIZE; | ||
757 | bp->b_page_count = ++i; | ||
758 | while (ptr < end) { | ||
759 | bp->b_pages[i] = mem_to_page((void *)ptr); | ||
760 | bp->b_page_count = ++i; | ||
761 | ptr += PAGE_CACHE_SIZE; | ||
762 | } | 754 | } |
763 | bp->b_locked = 0; | 755 | bp->b_locked = 0; |
764 | 756 | ||
765 | bp->b_count_desired = bp->b_buffer_length = len; | 757 | bp->b_count_desired = len; |
758 | bp->b_buffer_length = buflen; | ||
766 | bp->b_flags |= XBF_MAPPED; | 759 | bp->b_flags |= XBF_MAPPED; |
767 | 760 | ||
768 | return 0; | 761 | return 0; |
@@ -1032,7 +1025,7 @@ xfs_buf_ioend( | |||
1032 | xfs_buf_t *bp, | 1025 | xfs_buf_t *bp, |
1033 | int schedule) | 1026 | int schedule) |
1034 | { | 1027 | { |
1035 | bp->b_flags &= ~(XBF_READ | XBF_WRITE); | 1028 | bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); |
1036 | if (bp->b_error == 0) | 1029 | if (bp->b_error == 0) |
1037 | bp->b_flags |= XBF_DONE; | 1030 | bp->b_flags |= XBF_DONE; |
1038 | 1031 | ||
@@ -1750,6 +1743,8 @@ xfsbufd( | |||
1750 | 1743 | ||
1751 | current->flags |= PF_MEMALLOC; | 1744 | current->flags |= PF_MEMALLOC; |
1752 | 1745 | ||
1746 | set_freezable(); | ||
1747 | |||
1753 | do { | 1748 | do { |
1754 | if (unlikely(freezing(current))) { | 1749 | if (unlikely(freezing(current))) { |
1755 | set_bit(XBT_FORCE_SLEEP, &target->bt_flags); | 1750 | set_bit(XBT_FORCE_SLEEP, &target->bt_flags); |
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index fb8dd34041eb..21a1c2b1c5fc 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -218,6 +218,15 @@ xfs_vm_fault( | |||
218 | } | 218 | } |
219 | #endif /* CONFIG_XFS_DMAPI */ | 219 | #endif /* CONFIG_XFS_DMAPI */ |
220 | 220 | ||
221 | /* | ||
222 | * Unfortunately we can't just use the clean and simple readdir implementation | ||
223 | * below, because nfs might call back into ->lookup from the filldir callback | ||
224 | * and that will deadlock the low-level btree code. | ||
225 | * | ||
226 | * Hopefully we'll find a better workaround that allows to use the optimal | ||
227 | * version at least for local readdirs for 2.6.25. | ||
228 | */ | ||
229 | #if 0 | ||
221 | STATIC int | 230 | STATIC int |
222 | xfs_file_readdir( | 231 | xfs_file_readdir( |
223 | struct file *filp, | 232 | struct file *filp, |
@@ -249,6 +258,126 @@ xfs_file_readdir( | |||
249 | return -error; | 258 | return -error; |
250 | return 0; | 259 | return 0; |
251 | } | 260 | } |
261 | #else | ||
262 | |||
263 | struct hack_dirent { | ||
264 | u64 ino; | ||
265 | loff_t offset; | ||
266 | int namlen; | ||
267 | unsigned int d_type; | ||
268 | char name[]; | ||
269 | }; | ||
270 | |||
271 | struct hack_callback { | ||
272 | char *dirent; | ||
273 | size_t len; | ||
274 | size_t used; | ||
275 | }; | ||
276 | |||
277 | STATIC int | ||
278 | xfs_hack_filldir( | ||
279 | void *__buf, | ||
280 | const char *name, | ||
281 | int namlen, | ||
282 | loff_t offset, | ||
283 | u64 ino, | ||
284 | unsigned int d_type) | ||
285 | { | ||
286 | struct hack_callback *buf = __buf; | ||
287 | struct hack_dirent *de = (struct hack_dirent *)(buf->dirent + buf->used); | ||
288 | unsigned int reclen; | ||
289 | |||
290 | reclen = ALIGN(sizeof(struct hack_dirent) + namlen, sizeof(u64)); | ||
291 | if (buf->used + reclen > buf->len) | ||
292 | return -EINVAL; | ||
293 | |||
294 | de->namlen = namlen; | ||
295 | de->offset = offset; | ||
296 | de->ino = ino; | ||
297 | de->d_type = d_type; | ||
298 | memcpy(de->name, name, namlen); | ||
299 | buf->used += reclen; | ||
300 | return 0; | ||
301 | } | ||
302 | |||
303 | STATIC int | ||
304 | xfs_file_readdir( | ||
305 | struct file *filp, | ||
306 | void *dirent, | ||
307 | filldir_t filldir) | ||
308 | { | ||
309 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
310 | xfs_inode_t *ip = XFS_I(inode); | ||
311 | struct hack_callback buf; | ||
312 | struct hack_dirent *de; | ||
313 | int error; | ||
314 | loff_t size; | ||
315 | int eof = 0; | ||
316 | xfs_off_t start_offset, curr_offset, offset; | ||
317 | |||
318 | /* | ||
319 | * Try fairly hard to get memory | ||
320 | */ | ||
321 | buf.len = PAGE_CACHE_SIZE; | ||
322 | do { | ||
323 | buf.dirent = kmalloc(buf.len, GFP_KERNEL); | ||
324 | if (buf.dirent) | ||
325 | break; | ||
326 | buf.len >>= 1; | ||
327 | } while (buf.len >= 1024); | ||
328 | |||
329 | if (!buf.dirent) | ||
330 | return -ENOMEM; | ||
331 | |||
332 | curr_offset = filp->f_pos; | ||
333 | if (curr_offset == 0x7fffffff) | ||
334 | offset = 0xffffffff; | ||
335 | else | ||
336 | offset = filp->f_pos; | ||
337 | |||
338 | while (!eof) { | ||
339 | unsigned int reclen; | ||
340 | |||
341 | start_offset = offset; | ||
342 | |||
343 | buf.used = 0; | ||
344 | error = -xfs_readdir(ip, &buf, buf.len, &offset, | ||
345 | xfs_hack_filldir); | ||
346 | if (error || offset == start_offset) { | ||
347 | size = 0; | ||
348 | break; | ||
349 | } | ||
350 | |||
351 | size = buf.used; | ||
352 | de = (struct hack_dirent *)buf.dirent; | ||
353 | curr_offset = de->offset /* & 0x7fffffff */; | ||
354 | while (size > 0) { | ||
355 | if (filldir(dirent, de->name, de->namlen, | ||
356 | curr_offset & 0x7fffffff, | ||
357 | de->ino, de->d_type)) { | ||
358 | goto done; | ||
359 | } | ||
360 | |||
361 | reclen = ALIGN(sizeof(struct hack_dirent) + de->namlen, | ||
362 | sizeof(u64)); | ||
363 | size -= reclen; | ||
364 | de = (struct hack_dirent *)((char *)de + reclen); | ||
365 | curr_offset = de->offset /* & 0x7fffffff */; | ||
366 | } | ||
367 | } | ||
368 | |||
369 | done: | ||
370 | if (!error) { | ||
371 | if (size == 0) | ||
372 | filp->f_pos = offset & 0x7fffffff; | ||
373 | else if (de) | ||
374 | filp->f_pos = curr_offset; | ||
375 | } | ||
376 | |||
377 | kfree(buf.dirent); | ||
378 | return error; | ||
379 | } | ||
380 | #endif | ||
252 | 381 | ||
253 | STATIC int | 382 | STATIC int |
254 | xfs_file_mmap( | 383 | xfs_file_mmap( |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 2b34bad48b07..98a56568bb24 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -1047,24 +1047,20 @@ xfs_ioc_bulkstat( | |||
1047 | if ((count = bulkreq.icount) <= 0) | 1047 | if ((count = bulkreq.icount) <= 0) |
1048 | return -XFS_ERROR(EINVAL); | 1048 | return -XFS_ERROR(EINVAL); |
1049 | 1049 | ||
1050 | if (bulkreq.ubuffer == NULL) | ||
1051 | return -XFS_ERROR(EINVAL); | ||
1052 | |||
1050 | if (cmd == XFS_IOC_FSINUMBERS) | 1053 | if (cmd == XFS_IOC_FSINUMBERS) |
1051 | error = xfs_inumbers(mp, &inlast, &count, | 1054 | error = xfs_inumbers(mp, &inlast, &count, |
1052 | bulkreq.ubuffer, xfs_inumbers_fmt); | 1055 | bulkreq.ubuffer, xfs_inumbers_fmt); |
1053 | else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) | 1056 | else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) |
1054 | error = xfs_bulkstat_single(mp, &inlast, | 1057 | error = xfs_bulkstat_single(mp, &inlast, |
1055 | bulkreq.ubuffer, &done); | 1058 | bulkreq.ubuffer, &done); |
1056 | else { /* XFS_IOC_FSBULKSTAT */ | 1059 | else /* XFS_IOC_FSBULKSTAT */ |
1057 | if (count == 1 && inlast != 0) { | 1060 | error = xfs_bulkstat(mp, &inlast, &count, |
1058 | inlast++; | 1061 | (bulkstat_one_pf)xfs_bulkstat_one, NULL, |
1059 | error = xfs_bulkstat_single(mp, &inlast, | 1062 | sizeof(xfs_bstat_t), bulkreq.ubuffer, |
1060 | bulkreq.ubuffer, &done); | 1063 | BULKSTAT_FG_QUICK, &done); |
1061 | } else { | ||
1062 | error = xfs_bulkstat(mp, &inlast, &count, | ||
1063 | (bulkstat_one_pf)xfs_bulkstat_one, NULL, | ||
1064 | sizeof(xfs_bstat_t), bulkreq.ubuffer, | ||
1065 | BULKSTAT_FG_QUICK, &done); | ||
1066 | } | ||
1067 | } | ||
1068 | 1064 | ||
1069 | if (error) | 1065 | if (error) |
1070 | return -error; | 1066 | return -error; |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 0046bdd5b7f1..bf2a956b63c2 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c | |||
@@ -291,6 +291,9 @@ xfs_ioc_bulkstat_compat( | |||
291 | if ((count = bulkreq.icount) <= 0) | 291 | if ((count = bulkreq.icount) <= 0) |
292 | return -XFS_ERROR(EINVAL); | 292 | return -XFS_ERROR(EINVAL); |
293 | 293 | ||
294 | if (bulkreq.ubuffer == NULL) | ||
295 | return -XFS_ERROR(EINVAL); | ||
296 | |||
294 | if (cmd == XFS_IOC_FSINUMBERS) | 297 | if (cmd == XFS_IOC_FSINUMBERS) |
295 | error = xfs_inumbers(mp, &inlast, &count, | 298 | error = xfs_inumbers(mp, &inlast, &count, |
296 | bulkreq.ubuffer, xfs_inumbers_fmt_compat); | 299 | bulkreq.ubuffer, xfs_inumbers_fmt_compat); |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index ac50f8a37582..5e8bb7f71b5a 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -117,7 +117,7 @@ xfs_ichgtime( | |||
117 | */ | 117 | */ |
118 | SYNCHRONIZE(); | 118 | SYNCHRONIZE(); |
119 | ip->i_update_core = 1; | 119 | ip->i_update_core = 1; |
120 | if (!(inode->i_state & I_SYNC)) | 120 | if (!(inode->i_state & I_NEW)) |
121 | mark_inode_dirty_sync(inode); | 121 | mark_inode_dirty_sync(inode); |
122 | } | 122 | } |
123 | 123 | ||
@@ -169,7 +169,7 @@ xfs_ichgtime_fast( | |||
169 | */ | 169 | */ |
170 | SYNCHRONIZE(); | 170 | SYNCHRONIZE(); |
171 | ip->i_update_core = 1; | 171 | ip->i_update_core = 1; |
172 | if (!(inode->i_state & I_SYNC)) | 172 | if (!(inode->i_state & I_NEW)) |
173 | mark_inode_dirty_sync(inode); | 173 | mark_inode_dirty_sync(inode); |
174 | } | 174 | } |
175 | 175 | ||
@@ -332,9 +332,7 @@ xfs_vn_mknod( | |||
332 | ASSERT(vp); | 332 | ASSERT(vp); |
333 | ip = vn_to_inode(vp); | 333 | ip = vn_to_inode(vp); |
334 | 334 | ||
335 | if (S_ISCHR(mode) || S_ISBLK(mode)) | 335 | if (S_ISDIR(mode)) |
336 | ip->i_rdev = rdev; | ||
337 | else if (S_ISDIR(mode)) | ||
338 | xfs_validate_fields(ip); | 336 | xfs_validate_fields(ip); |
339 | d_instantiate(dentry, ip); | 337 | d_instantiate(dentry, ip); |
340 | xfs_validate_fields(dir); | 338 | xfs_validate_fields(dir); |
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index b5f91281b707..d488645f833d 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c | |||
@@ -1008,6 +1008,9 @@ xfs_qm_sync( | |||
1008 | boolean_t nowait; | 1008 | boolean_t nowait; |
1009 | int error; | 1009 | int error; |
1010 | 1010 | ||
1011 | if (! XFS_IS_QUOTA_ON(mp)) | ||
1012 | return 0; | ||
1013 | |||
1011 | restarts = 0; | 1014 | restarts = 0; |
1012 | /* | 1015 | /* |
1013 | * We won't block unless we are asked to. | 1016 | * We won't block unless we are asked to. |
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index c171767e242a..a5f4f4fb8868 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c | |||
@@ -508,7 +508,7 @@ xfs_dir2_block_getdents( | |||
508 | continue; | 508 | continue; |
509 | 509 | ||
510 | cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, | 510 | cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, |
511 | ptr - (char *)block); | 511 | (char *)dep - (char *)block); |
512 | ino = be64_to_cpu(dep->inumber); | 512 | ino = be64_to_cpu(dep->inumber); |
513 | #if XFS_BIG_INUMS | 513 | #if XFS_BIG_INUMS |
514 | ino += mp->m_inoadd; | 514 | ino += mp->m_inoadd; |
@@ -519,9 +519,7 @@ xfs_dir2_block_getdents( | |||
519 | */ | 519 | */ |
520 | if (filldir(dirent, dep->name, dep->namelen, cook, | 520 | if (filldir(dirent, dep->name, dep->namelen, cook, |
521 | ino, DT_UNKNOWN)) { | 521 | ino, DT_UNKNOWN)) { |
522 | *offset = xfs_dir2_db_off_to_dataptr(mp, | 522 | *offset = cook; |
523 | mp->m_dirdatablk, | ||
524 | (char *)dep - (char *)block); | ||
525 | xfs_da_brelse(NULL, bp); | 523 | xfs_da_brelse(NULL, bp); |
526 | return 0; | 524 | return 0; |
527 | } | 525 | } |
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index e7c12fa1303e..0ca0020ba09f 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c | |||
@@ -1091,7 +1091,7 @@ xfs_dir2_leaf_getdents( | |||
1091 | * Won't fit. Return to caller. | 1091 | * Won't fit. Return to caller. |
1092 | */ | 1092 | */ |
1093 | if (filldir(dirent, dep->name, dep->namelen, | 1093 | if (filldir(dirent, dep->name, dep->namelen, |
1094 | xfs_dir2_byte_to_dataptr(mp, curoff + length), | 1094 | xfs_dir2_byte_to_dataptr(mp, curoff), |
1095 | ino, DT_UNKNOWN)) | 1095 | ino, DT_UNKNOWN)) |
1096 | break; | 1096 | break; |
1097 | 1097 | ||
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index 182c70315ad1..919d275a1cef 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c | |||
@@ -752,7 +752,7 @@ xfs_dir2_sf_getdents( | |||
752 | #if XFS_BIG_INUMS | 752 | #if XFS_BIG_INUMS |
753 | ino += mp->m_inoadd; | 753 | ino += mp->m_inoadd; |
754 | #endif | 754 | #endif |
755 | if (filldir(dirent, ".", 1, dotdot_offset, ino, DT_DIR)) { | 755 | if (filldir(dirent, ".", 1, dot_offset, ino, DT_DIR)) { |
756 | *offset = dot_offset; | 756 | *offset = dot_offset; |
757 | return 0; | 757 | return 0; |
758 | } | 758 | } |
@@ -762,13 +762,11 @@ xfs_dir2_sf_getdents( | |||
762 | * Put .. entry unless we're starting past it. | 762 | * Put .. entry unless we're starting past it. |
763 | */ | 763 | */ |
764 | if (*offset <= dotdot_offset) { | 764 | if (*offset <= dotdot_offset) { |
765 | off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, | ||
766 | XFS_DIR2_DATA_FIRST_OFFSET); | ||
767 | ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); | 765 | ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); |
768 | #if XFS_BIG_INUMS | 766 | #if XFS_BIG_INUMS |
769 | ino += mp->m_inoadd; | 767 | ino += mp->m_inoadd; |
770 | #endif | 768 | #endif |
771 | if (filldir(dirent, "..", 2, off, ino, DT_DIR)) { | 769 | if (filldir(dirent, "..", 2, dotdot_offset, ino, DT_DIR)) { |
772 | *offset = dotdot_offset; | 770 | *offset = dotdot_offset; |
773 | return 0; | 771 | return 0; |
774 | } | 772 | } |
@@ -793,8 +791,7 @@ xfs_dir2_sf_getdents( | |||
793 | #endif | 791 | #endif |
794 | 792 | ||
795 | if (filldir(dirent, sfep->name, sfep->namelen, | 793 | if (filldir(dirent, sfep->name, sfep->namelen, |
796 | off + xfs_dir2_data_entsize(sfep->namelen), | 794 | off, ino, DT_UNKNOWN)) { |
797 | ino, DT_UNKNOWN)) { | ||
798 | *offset = off; | 795 | *offset = off; |
799 | return 0; | 796 | return 0; |
800 | } | 797 | } |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 488836e204a3..fb69ef180b27 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -267,7 +267,7 @@ finish_inode: | |||
267 | icl = NULL; | 267 | icl = NULL; |
268 | if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq, | 268 | if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq, |
269 | first_index, 1)) { | 269 | first_index, 1)) { |
270 | if ((iq->i_ino & mask) == first_index) | 270 | if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) == first_index) |
271 | icl = iq->i_cluster; | 271 | icl = iq->i_cluster; |
272 | } | 272 | } |
273 | 273 | ||
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index abf509a88915..344948082819 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -1459,8 +1459,10 @@ xfs_itruncate_start( | |||
1459 | mp = ip->i_mount; | 1459 | mp = ip->i_mount; |
1460 | vp = XFS_ITOV(ip); | 1460 | vp = XFS_ITOV(ip); |
1461 | 1461 | ||
1462 | vn_iowait(ip); /* wait for the completion of any pending DIOs */ | 1462 | /* wait for the completion of any pending DIOs */ |
1463 | 1463 | if (new_size < ip->i_size) | |
1464 | vn_iowait(ip); | ||
1465 | |||
1464 | /* | 1466 | /* |
1465 | * Call toss_pages or flushinval_pages to get rid of pages | 1467 | * Call toss_pages or flushinval_pages to get rid of pages |
1466 | * overlapping the region being removed. We have to use | 1468 | * overlapping the region being removed. We have to use |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 9972992fd3c3..9fc4c2886529 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -316,6 +316,8 @@ xfs_bulkstat_use_dinode( | |||
316 | return 1; | 316 | return 1; |
317 | } | 317 | } |
318 | 318 | ||
319 | #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) | ||
320 | |||
319 | /* | 321 | /* |
320 | * Return stat information in bulk (by-inode) for the filesystem. | 322 | * Return stat information in bulk (by-inode) for the filesystem. |
321 | */ | 323 | */ |
@@ -353,7 +355,7 @@ xfs_bulkstat( | |||
353 | xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */ | 355 | xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */ |
354 | xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ | 356 | xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ |
355 | xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ | 357 | xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ |
356 | xfs_ino_t lastino=0; /* last inode number returned */ | 358 | xfs_ino_t lastino; /* last inode number returned */ |
357 | int nbcluster; /* # of blocks in a cluster */ | 359 | int nbcluster; /* # of blocks in a cluster */ |
358 | int nicluster; /* # of inodes in a cluster */ | 360 | int nicluster; /* # of inodes in a cluster */ |
359 | int nimask; /* mask for inode clusters */ | 361 | int nimask; /* mask for inode clusters */ |
@@ -373,6 +375,7 @@ xfs_bulkstat( | |||
373 | * Get the last inode value, see if there's nothing to do. | 375 | * Get the last inode value, see if there's nothing to do. |
374 | */ | 376 | */ |
375 | ino = (xfs_ino_t)*lastinop; | 377 | ino = (xfs_ino_t)*lastinop; |
378 | lastino = ino; | ||
376 | dip = NULL; | 379 | dip = NULL; |
377 | agno = XFS_INO_TO_AGNO(mp, ino); | 380 | agno = XFS_INO_TO_AGNO(mp, ino); |
378 | agino = XFS_INO_TO_AGINO(mp, ino); | 381 | agino = XFS_INO_TO_AGINO(mp, ino); |
@@ -382,6 +385,9 @@ xfs_bulkstat( | |||
382 | *ubcountp = 0; | 385 | *ubcountp = 0; |
383 | return 0; | 386 | return 0; |
384 | } | 387 | } |
388 | if (!ubcountp || *ubcountp <= 0) { | ||
389 | return EINVAL; | ||
390 | } | ||
385 | ubcount = *ubcountp; /* statstruct's */ | 391 | ubcount = *ubcountp; /* statstruct's */ |
386 | ubleft = ubcount * statstruct_size; /* bytes */ | 392 | ubleft = ubcount * statstruct_size; /* bytes */ |
387 | *ubcountp = ubelem = 0; | 393 | *ubcountp = ubelem = 0; |
@@ -402,7 +408,8 @@ xfs_bulkstat( | |||
402 | * inode returned; 0 means start of the allocation group. | 408 | * inode returned; 0 means start of the allocation group. |
403 | */ | 409 | */ |
404 | rval = 0; | 410 | rval = 0; |
405 | while (ubleft >= statstruct_size && agno < mp->m_sb.sb_agcount) { | 411 | while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { |
412 | cond_resched(); | ||
406 | bp = NULL; | 413 | bp = NULL; |
407 | down_read(&mp->m_peraglock); | 414 | down_read(&mp->m_peraglock); |
408 | error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); | 415 | error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); |
@@ -499,6 +506,7 @@ xfs_bulkstat( | |||
499 | break; | 506 | break; |
500 | error = xfs_inobt_lookup_ge(cur, agino, 0, 0, | 507 | error = xfs_inobt_lookup_ge(cur, agino, 0, 0, |
501 | &tmp); | 508 | &tmp); |
509 | cond_resched(); | ||
502 | } | 510 | } |
503 | /* | 511 | /* |
504 | * If ran off the end of the ag either with an error, | 512 | * If ran off the end of the ag either with an error, |
@@ -542,6 +550,7 @@ xfs_bulkstat( | |||
542 | */ | 550 | */ |
543 | agino = gino + XFS_INODES_PER_CHUNK; | 551 | agino = gino + XFS_INODES_PER_CHUNK; |
544 | error = xfs_inobt_increment(cur, 0, &tmp); | 552 | error = xfs_inobt_increment(cur, 0, &tmp); |
553 | cond_resched(); | ||
545 | } | 554 | } |
546 | /* | 555 | /* |
547 | * Drop the btree buffers and the agi buffer. | 556 | * Drop the btree buffers and the agi buffer. |
@@ -555,12 +564,12 @@ xfs_bulkstat( | |||
555 | */ | 564 | */ |
556 | irbufend = irbp; | 565 | irbufend = irbp; |
557 | for (irbp = irbuf; | 566 | for (irbp = irbuf; |
558 | irbp < irbufend && ubleft >= statstruct_size; irbp++) { | 567 | irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) { |
559 | /* | 568 | /* |
560 | * Now process this chunk of inodes. | 569 | * Now process this chunk of inodes. |
561 | */ | 570 | */ |
562 | for (agino = irbp->ir_startino, chunkidx = clustidx = 0; | 571 | for (agino = irbp->ir_startino, chunkidx = clustidx = 0; |
563 | ubleft > 0 && | 572 | XFS_BULKSTAT_UBLEFT(ubleft) && |
564 | irbp->ir_freecount < XFS_INODES_PER_CHUNK; | 573 | irbp->ir_freecount < XFS_INODES_PER_CHUNK; |
565 | chunkidx++, clustidx++, agino++) { | 574 | chunkidx++, clustidx++, agino++) { |
566 | ASSERT(chunkidx < XFS_INODES_PER_CHUNK); | 575 | ASSERT(chunkidx < XFS_INODES_PER_CHUNK); |
@@ -663,15 +672,13 @@ xfs_bulkstat( | |||
663 | ubleft, private_data, | 672 | ubleft, private_data, |
664 | bno, &ubused, dip, &fmterror); | 673 | bno, &ubused, dip, &fmterror); |
665 | if (fmterror == BULKSTAT_RV_NOTHING) { | 674 | if (fmterror == BULKSTAT_RV_NOTHING) { |
666 | if (error == EFAULT) { | 675 | if (error && error != ENOENT && |
667 | ubleft = 0; | 676 | error != EINVAL) { |
668 | rval = error; | ||
669 | break; | ||
670 | } | ||
671 | else if (error == ENOMEM) | ||
672 | ubleft = 0; | 677 | ubleft = 0; |
673 | else | 678 | rval = error; |
674 | lastino = ino; | 679 | break; |
680 | } | ||
681 | lastino = ino; | ||
675 | continue; | 682 | continue; |
676 | } | 683 | } |
677 | if (fmterror == BULKSTAT_RV_GIVEUP) { | 684 | if (fmterror == BULKSTAT_RV_GIVEUP) { |
@@ -686,6 +693,8 @@ xfs_bulkstat( | |||
686 | ubelem++; | 693 | ubelem++; |
687 | lastino = ino; | 694 | lastino = ino; |
688 | } | 695 | } |
696 | |||
697 | cond_resched(); | ||
689 | } | 698 | } |
690 | 699 | ||
691 | if (bp) | 700 | if (bp) |
@@ -694,11 +703,12 @@ xfs_bulkstat( | |||
694 | /* | 703 | /* |
695 | * Set up for the next loop iteration. | 704 | * Set up for the next loop iteration. |
696 | */ | 705 | */ |
697 | if (ubleft > 0) { | 706 | if (XFS_BULKSTAT_UBLEFT(ubleft)) { |
698 | if (end_of_ag) { | 707 | if (end_of_ag) { |
699 | agno++; | 708 | agno++; |
700 | agino = 0; | 709 | agino = 0; |
701 | } | 710 | } else |
711 | agino = XFS_INO_TO_AGINO(mp, lastino); | ||
702 | } else | 712 | } else |
703 | break; | 713 | break; |
704 | } | 714 | } |
@@ -707,6 +717,11 @@ xfs_bulkstat( | |||
707 | */ | 717 | */ |
708 | kmem_free(irbuf, irbsize); | 718 | kmem_free(irbuf, irbsize); |
709 | *ubcountp = ubelem; | 719 | *ubcountp = ubelem; |
720 | /* | ||
721 | * Found some inodes, return them now and return the error next time. | ||
722 | */ | ||
723 | if (ubelem) | ||
724 | rval = 0; | ||
710 | if (agno >= mp->m_sb.sb_agcount) { | 725 | if (agno >= mp->m_sb.sb_agcount) { |
711 | /* | 726 | /* |
712 | * If we ran out of filesystem, mark lastino as off | 727 | * If we ran out of filesystem, mark lastino as off |